Compare commits

..

3 Commits

Author SHA1 Message Date
J. Nick Koston
88df48a52d Add TODO for std::trivially_relocatable when available 2026-02-12 12:42:09 -06:00
J. Nick Koston
282ba90f62 Address review feedback: add explicit includes and clarify comments
- Add #include <type_traits> to both .cpp and .h for static_asserts
- Clarify CompactString comment: explicitly note it is not trivially
  copyable, and that memcpy safety relies on validated layout property
- Use memcpy_fn indirection to suppress both GCC -Wclass-memaccess
  and clang-tidy bugprone-undefined-memory-manipulation without
  platform-specific pragma guards
2026-02-12 12:32:06 -06:00
J. Nick Koston
58f8029264 [wifi] Use memcpy-based insertion sort for scan results
Replace copy-assignment with raw memcpy in the WiFi scan result
insertion sort. Copy assignment on WiFiScanResult calls
CompactString's destructor then placement-new for every shift,
which means delete[]/new[] per shift for heap-allocated SSIDs.

With 70+ networks visible (e.g., during captive portal transition
showing full scan results), this caused event loop blocking from
hundreds of heap allocations in a tight loop on an 80MHz ESP8266.

This optimization is safe because we're permuting elements within
the same array - each slot is overwritten exactly once, so no
ownership duplication occurs. CompactString stores either inline
data or a heap pointer, never a self-referential pointer (unlike
libstdc++ std::string SSO). This was made possible by PR#13472
which replaced std::string with CompactString.

Static asserts guard the memcpy safety assumptions at compile time.

Confirmed on real device: event loop blocking during captive portal
transition is eliminated and WiFi connection is slightly faster.
2026-02-12 12:29:19 -06:00
10 changed files with 85 additions and 63 deletions

View File

@@ -47,7 +47,7 @@ runs:
- name: Build and push to ghcr by digest
id: build-ghcr
uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6.19.2
uses: docker/build-push-action@601a80b39c9405e50806ae38af30926f9d957c47 # v6.19.1
env:
DOCKER_BUILD_SUMMARY: false
DOCKER_BUILD_RECORD_UPLOAD: false
@@ -73,7 +73,7 @@ runs:
- name: Build and push to dockerhub by digest
id: build-dockerhub
uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6.19.2
uses: docker/build-push-action@601a80b39c9405e50806ae38af30926f9d957c47 # v6.19.1
env:
DOCKER_BUILD_SUMMARY: false
DOCKER_BUILD_RECORD_UPLOAD: false

View File

@@ -9,8 +9,7 @@ FROM ghcr.io/esphome/docker-base:${BUILD_OS}-ha-addon-${BUILD_BASE_VERSION} AS b
ARG BUILD_TYPE
FROM base-source-${BUILD_TYPE} AS base
RUN git config --system --add safe.directory "*" \
&& git config --system advice.detachedHead false
RUN git config --system --add safe.directory "*"
# Install build tools for Python packages that require compilation
# (e.g., ruamel.yaml.clibz used by ESP-IDF's idf-component-manager)

View File

@@ -1864,8 +1864,6 @@ void APIConnection::on_fatal_error() {
this->flags_.remove = true;
}
void __attribute__((flatten)) APIConnection::DeferredBatch::push_item(const BatchItem &item) { items.push_back(item); }
void APIConnection::DeferredBatch::add_item(EntityBase *entity, uint8_t message_type, uint8_t estimated_size,
uint8_t aux_data_index) {
// Check if we already have a message of this type for this entity
@@ -1882,7 +1880,7 @@ void APIConnection::DeferredBatch::add_item(EntityBase *entity, uint8_t message_
}
}
// No existing item found (or event), add new one
this->push_item({entity, message_type, estimated_size, aux_data_index});
items.push_back({entity, message_type, estimated_size, aux_data_index});
}
void APIConnection::DeferredBatch::add_item_front(EntityBase *entity, uint8_t message_type, uint8_t estimated_size) {
@@ -1890,7 +1888,7 @@ void APIConnection::DeferredBatch::add_item_front(EntityBase *entity, uint8_t me
// This avoids expensive vector::insert which shifts all elements
// Note: We only ever have one high-priority message at a time (ping OR disconnect)
// If we're disconnecting, pings are blocked, so this simple swap is sufficient
this->push_item({entity, message_type, estimated_size, AUX_DATA_UNUSED});
items.push_back({entity, message_type, estimated_size, AUX_DATA_UNUSED});
if (items.size() > 1) {
// Swap the new high-priority item to the front
std::swap(items.front(), items.back());

View File

@@ -541,8 +541,6 @@ class APIConnection final : public APIServerConnectionBase {
uint8_t aux_data_index = AUX_DATA_UNUSED);
// Add item to the front of the batch (for high priority messages like ping)
void add_item_front(EntityBase *entity, uint8_t message_type, uint8_t estimated_size);
// Single push_back site to avoid duplicate _M_realloc_insert instantiation
void push_item(const BatchItem &item);
// Clear all items
void clear() {

View File

@@ -138,12 +138,10 @@ APIError APINoiseFrameHelper::handle_noise_error_(int err, const LogString *func
/// Run through handshake messages (if in that phase)
APIError APINoiseFrameHelper::loop() {
// Cache ready() outside the loop. On ESP8266 LWIP raw TCP, ready() returns false once
// the rx buffer is consumed. Re-checking each iteration would block handshake writes
// that must follow reads, deadlocking the handshake. state_action() will return
// WOULD_BLOCK when no more data is available to read.
bool socket_ready = this->socket_->ready();
while (state_ != State::DATA && socket_ready) {
// During handshake phase, process as many actions as possible until we can't progress
// socket_->ready() stays true until next main loop, but state_action() will return
// WOULD_BLOCK when no more data is available to read
while (state_ != State::DATA && this->socket_->ready()) {
APIError err = state_action_();
if (err == APIError::WOULD_BLOCK) {
break;

View File

@@ -148,16 +148,12 @@ void APIServer::loop() {
while (client_index < this->clients_.size()) {
auto &client = this->clients_[client_index];
// Common case: process active client
if (!client->flags_.remove) {
client->loop();
}
// Handle disconnection promptly - close socket to free LWIP PCB
// resources and prevent retransmit crashes on ESP8266.
if (client->flags_.remove) {
// Rare case: handle disconnection (don't increment - swapped element needs processing)
this->remove_client_(client_index);
} else {
// Common case: process active client
client->loop();
client_index++;
}
}
@@ -199,7 +195,7 @@ void APIServer::remove_client_(size_t client_index) {
#endif
}
void __attribute__((flatten)) APIServer::accept_new_connections_() {
void APIServer::accept_new_connections_() {
while (true) {
struct sockaddr_storage source_addr;
socklen_t addr_len = sizeof(source_addr);

View File

@@ -198,8 +198,7 @@ EntityMatchResult UrlMatch::match_entity(EntityBase *entity) const {
#if !defined(USE_ESP32) && defined(USE_ARDUINO)
// helper for allowing only unique entries in the queue
void __attribute__((flatten))
DeferredUpdateEventSource::deq_push_back_with_dedup_(void *source, message_generator_t *message_generator) {
void DeferredUpdateEventSource::deq_push_back_with_dedup_(void *source, message_generator_t *message_generator) {
DeferredEvent item(source, message_generator);
// Use range-based for loop instead of std::find_if to reduce template instantiation overhead and binary size

View File

@@ -3,6 +3,7 @@
#include <cassert>
#include <cinttypes>
#include <cmath>
#include <type_traits>
#ifdef USE_ESP32
#if (ESP_IDF_VERSION_MAJOR >= 5 && ESP_IDF_VERSION_MINOR >= 1)
@@ -487,19 +488,6 @@ bool WiFiComponent::matches_configured_network_(const char *ssid, const uint8_t
return false;
}
void __attribute__((flatten)) WiFiComponent::set_sta_priority(bssid_t bssid, int8_t priority) {
for (auto &it : this->sta_priorities_) {
if (it.bssid == bssid) {
it.priority = priority;
return;
}
}
this->sta_priorities_.push_back(WiFiSTAPriority{
.bssid = bssid,
.priority = priority,
});
}
void WiFiComponent::log_discarded_scan_result_(const char *ssid, const uint8_t *bssid, int8_t rssi, uint8_t channel) {
#if ESPHOME_LOG_LEVEL >= ESPHOME_LOG_LEVEL_VERBOSE
// Skip logging during roaming scans to avoid log buffer overflow
@@ -1332,20 +1320,61 @@ void WiFiComponent::start_scanning() {
// Using insertion sort instead of std::stable_sort saves flash memory
// by avoiding template instantiations (std::rotate, std::stable_sort, lambdas)
// IMPORTANT: This sort is stable (preserves relative order of equal elements)
//
// Uses raw memcpy instead of copy assignment to avoid CompactString's
// destructor/constructor overhead (heap delete[]/new[] for long SSIDs).
// Copy assignment calls ~CompactString() then placement-new for every shift,
// which means delete[]/new[] per shift for heap-allocated SSIDs. With 70+
// networks (e.g., captive portal showing full scan results), this caused
// event loop blocking from hundreds of heap operations in a tight loop.
//
// This is safe because we're permuting elements within the same array —
// each slot is overwritten exactly once, so no ownership duplication occurs.
// All members of WiFiScanResult are either trivially copyable (bssid, channel,
// rssi, priority, flags) or CompactString, which stores either inline data or
// a heap pointer — never a self-referential pointer (unlike std::string's SSO
// on some implementations). This was not possible before PR#13472 replaced
// std::string with CompactString, since std::string's internal layout is
// implementation-defined and may use self-referential pointers.
//
// TODO: If C++ standardizes std::trivially_relocatable, add the assertion for
// WiFiScanResult/CompactString here to formally express the memcpy safety guarantee.
template<typename VectorType> static void insertion_sort_scan_results(VectorType &results) {
// memcpy-based sort requires no self-referential pointers or virtual dispatch.
// These static_asserts guard the assumptions. If any fire, the memcpy sort
// must be reviewed for safety before updating the expected values.
//
// No vtable pointers (memcpy would corrupt vptr)
static_assert(!std::is_polymorphic<WiFiScanResult>::value, "WiFiScanResult must not have vtable");
static_assert(!std::is_polymorphic<CompactString>::value, "CompactString must not have vtable");
// Standard layout ensures predictable memory layout with no virtual bases
// and no mixed-access-specifier reordering
static_assert(std::is_standard_layout<WiFiScanResult>::value, "WiFiScanResult must be standard layout");
static_assert(std::is_standard_layout<CompactString>::value, "CompactString must be standard layout");
// Size checks catch added/removed fields that may need safety review
static_assert(sizeof(WiFiScanResult) == 32, "WiFiScanResult size changed - verify memcpy sort is still safe");
static_assert(sizeof(CompactString) == 20, "CompactString size changed - verify memcpy sort is still safe");
// Alignment must match for reinterpret_cast of key_buf to be valid
static_assert(alignof(WiFiScanResult) <= alignof(std::max_align_t), "WiFiScanResult alignment exceeds max_align_t");
const size_t size = results.size();
constexpr size_t elem_size = sizeof(WiFiScanResult);
// Suppress warnings for intentional memcpy on non-trivially-copyable type.
// Safety is guaranteed by the static_asserts above and the permutation invariant.
// NOLINTNEXTLINE(bugprone-undefined-memory-manipulation)
auto *memcpy_fn = &memcpy;
for (size_t i = 1; i < size; i++) {
// Make a copy to avoid issues with move semantics during comparison
WiFiScanResult key = results[i];
alignas(WiFiScanResult) uint8_t key_buf[elem_size];
memcpy_fn(key_buf, &results[i], elem_size);
const auto &key = *reinterpret_cast<const WiFiScanResult *>(key_buf);
int32_t j = i - 1;
// Move elements that are worse than key to the right
// For stability, we only move if key is strictly better than results[j]
while (j >= 0 && wifi_scan_result_is_better(key, results[j])) {
results[j + 1] = results[j];
memcpy_fn(&results[j + 1], &results[j], elem_size);
j--;
}
results[j + 1] = key;
memcpy_fn(&results[j + 1], key_buf, elem_size);
}
}

View File

@@ -10,6 +10,7 @@
#include <span>
#include <string>
#include <type_traits>
#include <vector>
#ifdef USE_LIBRETINY
@@ -219,6 +220,14 @@ class CompactString {
};
static_assert(sizeof(CompactString) == 20, "CompactString must be exactly 20 bytes");
// CompactString is not trivially copyable (non-trivial destructor/copy for heap case).
// However, its layout has no self-referential pointers: storage_[] contains either inline
// data or an external heap pointer — never a pointer to itself. This is unlike libstdc++
// std::string SSO where _M_p points to _M_local_buf within the same object.
// This property allows memcpy-based permutation sorting where each element ends up in
// exactly one slot (no ownership duplication). These asserts document that layout property.
static_assert(std::is_standard_layout<CompactString>::value, "CompactString must be standard layout");
static_assert(!std::is_polymorphic<CompactString>::value, "CompactString must not have vtable");
class WiFiAP {
friend class WiFiComponent;
@@ -488,7 +497,18 @@ class WiFiComponent : public Component {
}
return 0;
}
void set_sta_priority(bssid_t bssid, int8_t priority);
void set_sta_priority(const bssid_t bssid, int8_t priority) {
for (auto &it : this->sta_priorities_) {
if (it.bssid == bssid) {
it.priority = priority;
return;
}
}
this->sta_priorities_.push_back(WiFiSTAPriority{
.bssid = bssid,
.priority = priority,
});
}
network::IPAddresses wifi_sta_ip_addresses();
// Remove before 2026.9.0

View File

@@ -270,14 +270,6 @@ async def test_alarm_control_panel_state_transitions(
# The chime_sensor has chime: true, so opening it while disarmed
# should trigger on_chime callback
# Set up future for the on_ready from opening the chime sensor
# (alarm becomes "not ready" when chime sensor opens).
# We must wait for this BEFORE creating the close future, otherwise
# the open event's log can arrive late and resolve the close future,
# causing the test to proceed before the chime close is processed.
ready_after_chime_open: asyncio.Future[bool] = loop.create_future()
ready_futures.append(ready_after_chime_open)
# We're currently DISARMED - open the chime sensor
client.switch_command(chime_switch_info.key, True)
@@ -287,18 +279,11 @@ async def test_alarm_control_panel_state_transitions(
except TimeoutError:
pytest.fail(f"on_chime callback not fired. Log lines: {log_lines[-20:]}")
# Wait for the on_ready from the chime sensor opening
try:
await asyncio.wait_for(ready_after_chime_open, timeout=2.0)
except TimeoutError:
pytest.fail(
f"on_ready callback not fired when chime sensor opened. "
f"Log lines: {log_lines[-20:]}"
)
# Now create the future for the close event and close the sensor.
# Since we waited for the open event above, the close event's
# on_ready log cannot be confused with the open event's.
# Close the chime sensor and wait for alarm to become ready again
# We need to wait for this transition before testing door sensor,
# otherwise there's a race where the door sensor state change could
# arrive before the chime sensor state change, leaving the alarm in
# a continuous "not ready" state with no on_ready callback fired.
ready_after_chime_close: asyncio.Future[bool] = loop.create_future()
ready_futures.append(ready_after_chime_close)