Compare commits

...

3 Commits

Author SHA1 Message Date
J. Nick Koston
2f69399e87 naming 2026-01-21 18:20:49 -10:00
J. Nick Koston
1d9ca60c20 Update esphome/components/api/api_frame_helper.h
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2026-01-21 18:10:23 -10:00
J. Nick Koston
900f875816 [api] Limit Nagle batching for log messages to reduce LWIP buffer pressure 2026-01-21 17:59:21 -10:00
2 changed files with 46 additions and 40 deletions

View File

@@ -1844,23 +1844,8 @@ bool APIConnection::send_buffer(ProtoWriteBuffer buffer, uint8_t message_type) {
return false;
}
// Toggle Nagle's algorithm based on message type to prevent log messages from
// filling the TCP send buffer and crowding out important state updates.
//
// This honors the `no_delay` proto option - SubscribeLogsResponse is the only
// message with `option (no_delay) = false;` in api.proto, indicating it should
// allow Nagle coalescing. This option existed since 2019 but was never implemented.
//
// - Log messages: Enable Nagle (NODELAY=false) so small log packets coalesce
// into fewer, larger packets. They flush naturally via TCP delayed ACK timer
// (~200ms), buffer filling, or when a state update triggers a flush.
//
// - All other messages (state updates, responses): Disable Nagle (NODELAY=true)
// for immediate delivery. These are time-sensitive and should not be delayed.
//
// This must be done proactively BEFORE the buffer fills up - checking buffer
// state here would be too late since we'd already be in a degraded state.
this->helper_->set_nodelay(!is_log_message);
// Set TCP_NODELAY based on message type - see set_nodelay_for_message() for details
this->helper_->set_nodelay_for_message(is_log_message);
APIError err = this->helper_->write_protobuf_packet(message_type, buffer);
if (err == APIError::WOULD_BLOCK)

View File

@@ -120,26 +120,39 @@ class APIFrameHelper {
}
return APIError::OK;
}
/// Toggle TCP_NODELAY socket option to control Nagle's algorithm.
///
/// This is used to allow log messages to coalesce (Nagle enabled) while keeping
/// state updates low-latency (NODELAY enabled). Without this, many small log
/// packets fill the TCP send buffer, crowding out important state updates.
///
/// State is tracked to minimize setsockopt() overhead - on lwip_raw (ESP8266/RP2040)
/// this is just a boolean assignment; on other platforms it's a lightweight syscall.
///
/// @param enable true to enable NODELAY (disable Nagle), false to enable Nagle
/// @return true if successful or already in desired state
bool set_nodelay(bool enable) {
if (this->nodelay_enabled_ == enable)
return true;
int val = enable ? 1 : 0;
int err = this->socket_->setsockopt(IPPROTO_TCP, TCP_NODELAY, &val, sizeof(int));
if (err == 0) {
this->nodelay_enabled_ = enable;
// Manage TCP_NODELAY (Nagle's algorithm) based on message type.
//
// For non-log messages (sensor data, state updates): Always disable Nagle
// (NODELAY on) for immediate delivery - these are time-sensitive.
//
// For log messages: Use Nagle to coalesce multiple small log packets into
// fewer larger packets, reducing WiFi overhead. However, we limit batching
// to 3 messages to avoid excessive LWIP buffer pressure on memory-constrained
// devices like ESP8266. LWIP's TCP_OVERSIZE option coalesces the data into
// shared pbufs, but holding data too long waiting for Nagle's timer causes
// buffer exhaustion and dropped messages.
//
// Flow: Log 1 (Nagle on) -> Log 2 (Nagle on) -> Log 3 (NODELAY, flush all)
//
void set_nodelay_for_message(bool is_log_message) {
if (!is_log_message) {
if (this->nodelay_state_ != NODELAY_ON) {
this->set_nodelay_raw_(true);
this->nodelay_state_ = NODELAY_ON;
}
return;
}
// Log messages 1-3: state transitions -1 -> 1 -> 2 -> -1 (flush on 3rd)
if (this->nodelay_state_ == NODELAY_ON) {
this->set_nodelay_raw_(false);
this->nodelay_state_ = 1;
} else if (this->nodelay_state_ >= LOG_NAGLE_COUNT) {
this->set_nodelay_raw_(true);
this->nodelay_state_ = NODELAY_ON;
} else {
this->nodelay_state_++;
}
return err == 0;
}
virtual APIError write_protobuf_packet(uint8_t type, ProtoWriteBuffer buffer) = 0;
// Write multiple protobuf messages in a single operation
@@ -229,10 +242,18 @@ class APIFrameHelper {
uint8_t tx_buf_head_{0};
uint8_t tx_buf_tail_{0};
uint8_t tx_buf_count_{0};
// Tracks TCP_NODELAY state to minimize setsockopt() calls. Initialized to true
// since init_common_() enables NODELAY. Used by set_nodelay() to allow log
// messages to coalesce while keeping state updates low-latency.
bool nodelay_enabled_{true};
// Nagle batching state for log messages. NODELAY_ON (-1) means NODELAY is enabled
// (immediate send). Values 1-2 count log messages in the current Nagle batch.
// After LOG_NAGLE_COUNT logs, we switch to NODELAY to flush and reset.
static constexpr int8_t NODELAY_ON = -1;
static constexpr int8_t LOG_NAGLE_COUNT = 2;
int8_t nodelay_state_{NODELAY_ON};
// Internal helper to set TCP_NODELAY socket option
void set_nodelay_raw_(bool enable) {
int val = enable ? 1 : 0;
this->socket_->setsockopt(IPPROTO_TCP, TCP_NODELAY, &val, sizeof(int));
}
// Common initialization for both plaintext and noise protocols
APIError init_common_();