mirror of
https://github.com/esphome/esphome.git
synced 2026-02-24 20:35:30 -07:00
[core] Avoid expensive modulo in LockFreeQueue for non-power-of-2 sizes
Replace modulo-based ring buffer index advancement with constexpr-dispatched approach: power-of-2 sizes keep modulo (compiler emits single mask instruction), non-power-of-2 sizes use comparison+branch instead of multiply-shift sequences. Benchmarked on real ESP32-C3 (RISC-V) hardware (100k iterations): - SIZE=88 (BLE): 24,120 us -> 6,493 us (3.7x faster) - SIZE=30 (MQTT): 22,816 us -> 6,879 us (3.3x faster) - SIZE=32 (pow2): identical (both use mask) Flash savings on ESP32 Xtensa (BLE proxy build): - push(): 149 -> 133 bytes (-16 B) - pop(): 78 -> 67 bytes (-11 B)
This commit is contained in:
@@ -38,13 +38,27 @@ template<class T, uint8_t SIZE> class LockFreeQueue {
|
||||
}
|
||||
|
||||
protected:
|
||||
// Advance ring buffer index by one, wrapping at SIZE.
|
||||
// Power-of-2 sizes use modulo (compiler emits single mask instruction).
|
||||
// Non-power-of-2 sizes use comparison to avoid expensive multiply-shift sequences.
|
||||
static constexpr uint8_t next_index_(uint8_t index) {
|
||||
if constexpr ((SIZE & (SIZE - 1)) == 0) {
|
||||
return (index + 1) % SIZE;
|
||||
} else {
|
||||
uint8_t next = index + 1;
|
||||
if (next >= SIZE) [[unlikely]]
|
||||
next = 0;
|
||||
return next;
|
||||
}
|
||||
}
|
||||
|
||||
// Internal push that reports queue state - for use by derived classes
|
||||
bool push_internal_(T *element, bool &was_empty, uint8_t &old_tail) {
|
||||
if (element == nullptr)
|
||||
return false;
|
||||
|
||||
uint8_t current_tail = tail_.load(std::memory_order_relaxed);
|
||||
uint8_t next_tail = (current_tail + 1) % SIZE;
|
||||
uint8_t next_tail = next_index_(current_tail);
|
||||
|
||||
// Read head before incrementing tail
|
||||
uint8_t head_before = head_.load(std::memory_order_acquire);
|
||||
@@ -73,14 +87,21 @@ template<class T, uint8_t SIZE> class LockFreeQueue {
|
||||
}
|
||||
|
||||
T *element = buffer_[current_head];
|
||||
head_.store((current_head + 1) % SIZE, std::memory_order_release);
|
||||
head_.store(next_index_(current_head), std::memory_order_release);
|
||||
return element;
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
uint8_t tail = tail_.load(std::memory_order_acquire);
|
||||
uint8_t head = head_.load(std::memory_order_acquire);
|
||||
return (tail - head + SIZE) % SIZE;
|
||||
if constexpr ((SIZE & (SIZE - 1)) == 0) {
|
||||
return (tail - head + SIZE) % SIZE;
|
||||
} else {
|
||||
int diff = static_cast<int>(tail) - static_cast<int>(head);
|
||||
if (diff < 0)
|
||||
diff += SIZE;
|
||||
return static_cast<size_t>(diff);
|
||||
}
|
||||
}
|
||||
|
||||
uint16_t get_and_reset_dropped_count() { return dropped_count_.exchange(0, std::memory_order_relaxed); }
|
||||
@@ -90,7 +111,7 @@ template<class T, uint8_t SIZE> class LockFreeQueue {
|
||||
bool empty() const { return head_.load(std::memory_order_acquire) == tail_.load(std::memory_order_acquire); }
|
||||
|
||||
bool full() const {
|
||||
uint8_t next_tail = (tail_.load(std::memory_order_relaxed) + 1) % SIZE;
|
||||
uint8_t next_tail = next_index_(tail_.load(std::memory_order_relaxed));
|
||||
return next_tail == head_.load(std::memory_order_acquire);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user