From 398923615420643dbda372bb253acddfcaa3bd4d Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Tue, 17 Feb 2026 18:54:25 -0600 Subject: [PATCH] [api] Split ProtoVarInt::parse into 32-bit and 64-bit phases On 32-bit platforms (ESP32 Xtensa), 64-bit shifts in varint parsing compile to __ashldi3 library calls. Since the vast majority of protobuf varint fields (message types, sizes, enum values, sensor readings) fit in 4 bytes, the 64-bit arithmetic is unnecessary overhead on the common path. Split parse() into two phases: - Bytes 0-3: uint32_t loop with native 32-bit shifts (0, 7, 14, 21) - Bytes 4-9: noinline parse_wide_() with uint64_t, only for BLE addresses and other 64-bit fields The code generator auto-detects which proto messages use int64/uint64/ sint64 fields and emits USE_API_VARINT64 conditionally. On non-BLE configs, parse_wide_() and the 64-bit accessors (as_uint64, as_int64, as_sint64) are compiled out entirely. Saves ~40 bytes flash on non-BLE configs. Benchmark shows 25-50% faster parsing for 1-4 byte varints (the common case). --- esphome/components/api/api_pb2.h | 3 ++ esphome/components/api/proto.cpp | 17 +++++++ esphome/components/api/proto.h | 73 ++++++++++++++--------------- esphome/core/defines.h | 1 + script/api_protobuf/api_protobuf.py | 50 ++++++++++++++++++-- 5 files changed, 100 insertions(+), 44 deletions(-) diff --git a/esphome/components/api/api_pb2.h b/esphome/components/api/api_pb2.h index d001f869c5..8424f3b629 100644 --- a/esphome/components/api/api_pb2.h +++ b/esphome/components/api/api_pb2.h @@ -3,6 +3,9 @@ #pragma once #include "esphome/core/defines.h" +#ifdef USE_BLUETOOTH_PROXY +#define USE_API_VARINT64 +#endif #include "esphome/core/string_ref.h" #include "proto.h" diff --git a/esphome/components/api/proto.cpp b/esphome/components/api/proto.cpp index 2a0ddf91db..74aca55103 100644 --- a/esphome/components/api/proto.cpp +++ b/esphome/components/api/proto.cpp @@ -7,6 +7,23 @@ namespace esphome::api { static const char *const TAG = "api.proto"; +#ifdef USE_API_VARINT64 +optional ProtoVarInt::parse_wide_(const uint8_t *buffer, uint32_t len, uint32_t *consumed, + uint32_t result32) { + uint64_t result64 = result32; + uint32_t limit = std::min(len, uint32_t(10)); + for (uint32_t i = 4; i < limit; i++) { + uint8_t val = buffer[i]; + result64 |= uint64_t(val & 0x7F) << (i * 7); + if ((val & 0x80) == 0) { + *consumed = i + 1; + return ProtoVarInt(result64); + } + } + return {}; +} +#endif + uint32_t ProtoDecodableMessage::count_repeated_field(const uint8_t *buffer, size_t length, uint32_t target_field_id) { uint32_t count = 0; const uint8_t *ptr = buffer; diff --git a/esphome/components/api/proto.h b/esphome/components/api/proto.h index 41ea0043f9..2dc92e46a7 100644 --- a/esphome/components/api/proto.h +++ b/esphome/components/api/proto.h @@ -94,65 +94,60 @@ class ProtoVarInt { explicit ProtoVarInt(uint64_t value) : value_(value) {} static optional parse(const uint8_t *buffer, uint32_t len, uint32_t *consumed) { - if (len == 0) { - if (consumed != nullptr) - *consumed = 0; - return {}; - } - - // Most common case: single-byte varint (values 0-127) - if ((buffer[0] & 0x80) == 0) { - if (consumed != nullptr) - *consumed = 1; - return ProtoVarInt(buffer[0]); - } - - // General case for multi-byte varints - // Since we know buffer[0]'s high bit is set, initialize with its value - uint64_t result = buffer[0] & 0x7F; - uint8_t bitpos = 7; - - // A 64-bit varint is at most 10 bytes (ceil(64/7)). Reject overlong encodings - // to avoid undefined behavior from shifting uint64_t by >= 64 bits. - uint32_t max_len = std::min(len, uint32_t(10)); - - // Start from the second byte since we've already processed the first - for (uint32_t i = 1; i < max_len; i++) { +#ifdef ESPHOME_DEBUG_API + assert(consumed != nullptr); +#endif + // 32-bit phase: bytes 0-3 (shifts 0, 7, 14, 21 — all native on 32-bit platforms) + uint32_t result32 = 0; + uint32_t limit = std::min(len, uint32_t(4)); + for (uint32_t i = 0; i < limit; i++) { uint8_t val = buffer[i]; - result |= uint64_t(val & 0x7F) << uint64_t(bitpos); - bitpos += 7; + result32 |= uint32_t(val & 0x7F) << (i * 7); if ((val & 0x80) == 0) { - if (consumed != nullptr) - *consumed = i + 1; - return ProtoVarInt(result); + *consumed = i + 1; + return ProtoVarInt(result32); } } - - if (consumed != nullptr) - *consumed = 0; - return {}; // Incomplete or invalid varint + // 64-bit phase for values > 28 bits (BLE addresses etc.) +#ifdef USE_API_VARINT64 + return parse_wide_(buffer, len, consumed, result32); +#else + return {}; +#endif } +#ifdef USE_API_VARINT64 + protected: + /// Continue parsing varint bytes 4-9 with 64-bit arithmetic. + /// Separated to keep 64-bit shift code (__ashldi3 on 32-bit platforms) out of the common path. + static optional parse_wide_(const uint8_t *buffer, uint32_t len, uint32_t *consumed, uint32_t result32) + __attribute__((noinline)); + + public: +#endif + constexpr uint16_t as_uint16() const { return this->value_; } constexpr uint32_t as_uint32() const { return this->value_; } - constexpr uint64_t as_uint64() const { return this->value_; } constexpr bool as_bool() const { return this->value_; } constexpr int32_t as_int32() const { // Not ZigZag encoded - return static_cast(this->as_int64()); - } - constexpr int64_t as_int64() const { - // Not ZigZag encoded - return static_cast(this->value_); + return static_cast(this->value_); } constexpr int32_t as_sint32() const { // with ZigZag encoding return decode_zigzag32(static_cast(this->value_)); } +#ifdef USE_API_VARINT64 + constexpr uint64_t as_uint64() const { return this->value_; } + constexpr int64_t as_int64() const { + // Not ZigZag encoded + return static_cast(this->value_); + } constexpr int64_t as_sint64() const { // with ZigZag encoding return decode_zigzag64(this->value_); } +#endif /** * Encode the varint value to a pre-allocated buffer without bounds checking. * diff --git a/esphome/core/defines.h b/esphome/core/defines.h index ee865a7e65..80f4d228ec 100644 --- a/esphome/core/defines.h +++ b/esphome/core/defines.h @@ -136,6 +136,7 @@ #define USE_API_HOMEASSISTANT_SERVICES #define USE_API_HOMEASSISTANT_STATES #define USE_API_NOISE +#define USE_API_VARINT64 #define USE_API_PLAINTEXT #define USE_API_USER_DEFINED_ACTIONS #define USE_API_CUSTOM_SERVICES diff --git a/script/api_protobuf/api_protobuf.py b/script/api_protobuf/api_protobuf.py index 4fbee49dae..2324708324 100755 --- a/script/api_protobuf/api_protobuf.py +++ b/script/api_protobuf/api_protobuf.py @@ -1905,6 +1905,34 @@ def build_type_usage_map( ) +def get_varint64_ifdef( + file_desc: descriptor.FileDescriptorProto, + message_ifdef_map: dict[str, str | None], +) -> tuple[bool, str | None]: + """Check if 64-bit varint fields exist and get their common ifdef guard. + + Returns: + (has_varint64, ifdef_guard) - has_varint64 is True if any fields exist, + ifdef_guard is the common guard or None if unconditional. + """ + varint64_types = { + FieldDescriptorProto.TYPE_INT64, + FieldDescriptorProto.TYPE_UINT64, + FieldDescriptorProto.TYPE_SINT64, + } + ifdefs: set[str | None] = { + message_ifdef_map.get(msg.name) + for msg in file_desc.message_type + if not msg.options.deprecated + for field in msg.field + if not field.options.deprecated and field.type in varint64_types + } + if not ifdefs: + return False, None + ifdefs.discard(None) + return True, ifdefs.pop() if len(ifdefs) == 1 else None + + def build_enum_type(desc, enum_ifdef_map) -> tuple[str, str, str]: """Builds the enum type. @@ -2559,11 +2587,28 @@ def main() -> None: file = d.file[0] + # Build dynamic ifdef mappings early so we can emit USE_API_VARINT64 before includes + enum_ifdef_map, message_ifdef_map, message_source_map, used_messages = ( + build_type_usage_map(file) + ) + + # Find the ifdef guard for 64-bit varint fields (int64/uint64/sint64). + # Emitted before proto.h so parse_wide_() and 64-bit accessors are available. + has_varint64, varint64_guard = get_varint64_ifdef(file, message_ifdef_map) + content = FILE_HEADER content += """\ #pragma once #include "esphome/core/defines.h" +""" + if has_varint64: + content += "\n".join( + wrap_with_ifdef(["#define USE_API_VARINT64"], varint64_guard) + ) + content += "\n" + + content += """\ #include "esphome/core/string_ref.h" #include "proto.h" @@ -2694,11 +2739,6 @@ static void dump_bytes_field(DumpBuffer &out, const char *field_name, const uint content += "namespace enums {\n\n" - # Build dynamic ifdef mappings for both enums and messages - enum_ifdef_map, message_ifdef_map, message_source_map, used_messages = ( - build_type_usage_map(file) - ) - # Simple grouping of enums by ifdef current_ifdef = None