fix scheduler heap churn with rapid timeouts

This commit is contained in:
J. Nick Koston
2025-11-25 14:57:02 -06:00
parent ae60b5e6a1
commit 85e5119ba2
3 changed files with 38 additions and 12 deletions

View File

@@ -15,17 +15,20 @@ namespace esphome {
static const char *const TAG = "scheduler";
// Memory pool configuration constants
// Pool size of 5 matches typical usage patterns (2-4 active timers)
// - Minimal memory overhead (~250 bytes on ESP32)
// - Sufficient for most configs with a couple sensors/components
// - Still prevents heap fragmentation and allocation stalls
// - Complex setups with many timers will just allocate beyond the pool
// Pool can grow up to MAX_POOL_SIZE to handle burst scenarios (e.g., many sensors
// with timeout filters receiving rapid updates). The pool periodically shrinks
// back toward MIN_POOL_SIZE when usage is low to reclaim memory.
// - MAX of 16 handles configs with many timeout-based filters without allocation stalls
// - MIN of 4 keeps a small reserve for typical usage patterns
// - Shrinking every 5 minutes prevents memory waste on simple configs
// See https://github.com/esphome/backlog/issues/52
static constexpr size_t MAX_POOL_SIZE = 5;
static constexpr size_t MAX_POOL_SIZE = 16;
static constexpr size_t MIN_POOL_SIZE = 4;
// Shrink interval in milliseconds (5 minutes)
static constexpr uint32_t POOL_SHRINK_INTERVAL_MS = 5 * 60 * 1000;
// Maximum number of logically deleted (cancelled) items before forcing cleanup.
// Set to 5 to match the pool size - when we have as many cancelled items as our
// pool can hold, it's time to clean up and recycle them.
// Value chosen based on testing to balance cleanup frequency vs overhead.
static constexpr uint32_t MAX_LOGICALLY_DELETED_ITEMS = 5;
// Half the 32-bit range - used to detect rollovers vs normal time progression
static constexpr uint32_t HALF_MAX_UINT32 = std::numeric_limits<uint32_t>::max() / 2;
@@ -331,6 +334,21 @@ void HOT Scheduler::call(uint32_t now) {
this->process_defer_queue_(now);
#endif /* not ESPHOME_THREAD_SINGLE */
// Periodically shrink the pool if it's larger than needed
// Check uses subtraction to handle uint32_t wraparound correctly
if (now - this->last_pool_shrink_ >= POOL_SHRINK_INTERVAL_MS) {
this->last_pool_shrink_ = now;
// Shrink pool to max(high_watermark, MIN_POOL_SIZE)
size_t target_size = this->pool_high_watermark_ > MIN_POOL_SIZE ? this->pool_high_watermark_ : MIN_POOL_SIZE;
while (this->scheduler_item_pool_.size() > target_size) {
this->scheduler_item_pool_.pop_back();
}
// Actually release the memory
this->scheduler_item_pool_.shrink_to_fit();
// Reset watermark for next period
this->pool_high_watermark_ = static_cast<uint8_t>(this->scheduler_item_pool_.size());
}
// Convert the fresh timestamp from main loop to 64-bit for scheduler operations
const auto now_64 = this->millis_64_(now); // 'now' from parameter - fresh from Application::loop()
this->process_to_add();
@@ -759,6 +777,11 @@ void Scheduler::recycle_item_(std::unique_ptr<SchedulerItem> item) {
// Clear dynamic name if any
item->clear_dynamic_name();
this->scheduler_item_pool_.push_back(std::move(item));
// Track high watermark for adaptive pool shrinking
uint8_t current_size = static_cast<uint8_t>(this->scheduler_item_pool_.size());
if (current_size > this->pool_high_watermark_) {
this->pool_high_watermark_ = current_size;
}
#ifdef ESPHOME_DEBUG_SCHEDULER
ESP_LOGD(TAG, "Recycled item to pool (pool size now: %zu)", this->scheduler_item_pool_.size());
#endif

View File

@@ -460,12 +460,15 @@ class Scheduler {
// Memory pool for recycling SchedulerItem objects to reduce heap churn.
// Design decisions:
// - std::vector is used instead of a fixed array because many systems only need 1-2 scheduler items
// - The vector grows dynamically up to MAX_POOL_SIZE (5) only when needed, saving memory on simple setups
// - Pool size of 5 matches typical usage (2-4 timers) while keeping memory overhead low (~250 bytes on ESP32)
// - The vector grows dynamically up to MAX_POOL_SIZE only when needed, saving memory on simple setups
// - Pool periodically shrinks toward MIN_POOL_SIZE to reclaim memory when usage is low
// - The pool significantly reduces heap fragmentation which is critical because heap allocation/deallocation
// can stall the entire system, causing timing issues and dropped events for any components that need
// to synchronize between tasks (see https://github.com/esphome/backlog/issues/52)
std::vector<std::unique_ptr<SchedulerItem>> scheduler_item_pool_;
// Tracks peak pool usage for adaptive shrinking
uint8_t pool_high_watermark_{0};
uint32_t last_pool_shrink_{0};
#ifdef ESPHOME_THREAD_MULTI_ATOMICS
/*

View File

@@ -188,8 +188,8 @@ async def test_scheduler_pool(
size = int(match.group(1))
max_pool_size = max(max_pool_size, size)
# Pool can grow up to its maximum of 5
assert max_pool_size <= 5, f"Pool grew beyond maximum ({max_pool_size})"
# Pool can grow up to its maximum of 16
assert max_pool_size <= 16, f"Pool grew beyond maximum ({max_pool_size})"
# Log summary for debugging
print("\nScheduler Pool Test Summary (Python Orchestrated):")