[api] Split ProtoVarInt::parse into 32-bit and 64-bit phases

On 32-bit platforms (ESP32 Xtensa), 64-bit shifts in varint parsing
compile to __ashldi3 library calls. Since the vast majority of protobuf
varint fields (message types, sizes, enum values, sensor readings) fit
in 4 bytes, the 64-bit arithmetic is unnecessary overhead on the common
path.

Split parse() into two phases:
- Bytes 0-3: uint32_t loop with native 32-bit shifts (0, 7, 14, 21)
- Bytes 4-9: noinline parse_wide_() with uint64_t, only for BLE
  addresses and other 64-bit fields

The code generator auto-detects which proto messages use int64/uint64/
sint64 fields and emits USE_API_VARINT64 conditionally. On non-BLE
configs, parse_wide_() and the 64-bit accessors (as_uint64, as_int64,
as_sint64) are compiled out entirely.

Saves ~40 bytes flash on non-BLE configs. Benchmark shows 25-50%
faster parsing for 1-4 byte varints (the common case).
This commit is contained in:
J. Nick Koston
2026-02-17 18:54:25 -06:00
parent 2585779f11
commit 3989236154
5 changed files with 100 additions and 44 deletions

View File

@@ -3,6 +3,9 @@
#pragma once
#include "esphome/core/defines.h"
#ifdef USE_BLUETOOTH_PROXY
#define USE_API_VARINT64
#endif
#include "esphome/core/string_ref.h"
#include "proto.h"

View File

@@ -7,6 +7,23 @@ namespace esphome::api {
static const char *const TAG = "api.proto";
#ifdef USE_API_VARINT64
optional<ProtoVarInt> ProtoVarInt::parse_wide_(const uint8_t *buffer, uint32_t len, uint32_t *consumed,
uint32_t result32) {
uint64_t result64 = result32;
uint32_t limit = std::min(len, uint32_t(10));
for (uint32_t i = 4; i < limit; i++) {
uint8_t val = buffer[i];
result64 |= uint64_t(val & 0x7F) << (i * 7);
if ((val & 0x80) == 0) {
*consumed = i + 1;
return ProtoVarInt(result64);
}
}
return {};
}
#endif
uint32_t ProtoDecodableMessage::count_repeated_field(const uint8_t *buffer, size_t length, uint32_t target_field_id) {
uint32_t count = 0;
const uint8_t *ptr = buffer;

View File

@@ -94,65 +94,60 @@ class ProtoVarInt {
explicit ProtoVarInt(uint64_t value) : value_(value) {}
static optional<ProtoVarInt> parse(const uint8_t *buffer, uint32_t len, uint32_t *consumed) {
if (len == 0) {
if (consumed != nullptr)
*consumed = 0;
return {};
}
// Most common case: single-byte varint (values 0-127)
if ((buffer[0] & 0x80) == 0) {
if (consumed != nullptr)
*consumed = 1;
return ProtoVarInt(buffer[0]);
}
// General case for multi-byte varints
// Since we know buffer[0]'s high bit is set, initialize with its value
uint64_t result = buffer[0] & 0x7F;
uint8_t bitpos = 7;
// A 64-bit varint is at most 10 bytes (ceil(64/7)). Reject overlong encodings
// to avoid undefined behavior from shifting uint64_t by >= 64 bits.
uint32_t max_len = std::min(len, uint32_t(10));
// Start from the second byte since we've already processed the first
for (uint32_t i = 1; i < max_len; i++) {
#ifdef ESPHOME_DEBUG_API
assert(consumed != nullptr);
#endif
// 32-bit phase: bytes 0-3 (shifts 0, 7, 14, 21 — all native on 32-bit platforms)
uint32_t result32 = 0;
uint32_t limit = std::min(len, uint32_t(4));
for (uint32_t i = 0; i < limit; i++) {
uint8_t val = buffer[i];
result |= uint64_t(val & 0x7F) << uint64_t(bitpos);
bitpos += 7;
result32 |= uint32_t(val & 0x7F) << (i * 7);
if ((val & 0x80) == 0) {
if (consumed != nullptr)
*consumed = i + 1;
return ProtoVarInt(result);
*consumed = i + 1;
return ProtoVarInt(result32);
}
}
if (consumed != nullptr)
*consumed = 0;
return {}; // Incomplete or invalid varint
// 64-bit phase for values > 28 bits (BLE addresses etc.)
#ifdef USE_API_VARINT64
return parse_wide_(buffer, len, consumed, result32);
#else
return {};
#endif
}
#ifdef USE_API_VARINT64
protected:
/// Continue parsing varint bytes 4-9 with 64-bit arithmetic.
/// Separated to keep 64-bit shift code (__ashldi3 on 32-bit platforms) out of the common path.
static optional<ProtoVarInt> parse_wide_(const uint8_t *buffer, uint32_t len, uint32_t *consumed, uint32_t result32)
__attribute__((noinline));
public:
#endif
constexpr uint16_t as_uint16() const { return this->value_; }
constexpr uint32_t as_uint32() const { return this->value_; }
constexpr uint64_t as_uint64() const { return this->value_; }
constexpr bool as_bool() const { return this->value_; }
constexpr int32_t as_int32() const {
// Not ZigZag encoded
return static_cast<int32_t>(this->as_int64());
}
constexpr int64_t as_int64() const {
// Not ZigZag encoded
return static_cast<int64_t>(this->value_);
return static_cast<int32_t>(this->value_);
}
constexpr int32_t as_sint32() const {
// with ZigZag encoding
return decode_zigzag32(static_cast<uint32_t>(this->value_));
}
#ifdef USE_API_VARINT64
constexpr uint64_t as_uint64() const { return this->value_; }
constexpr int64_t as_int64() const {
// Not ZigZag encoded
return static_cast<int64_t>(this->value_);
}
constexpr int64_t as_sint64() const {
// with ZigZag encoding
return decode_zigzag64(this->value_);
}
#endif
/**
* Encode the varint value to a pre-allocated buffer without bounds checking.
*

View File

@@ -136,6 +136,7 @@
#define USE_API_HOMEASSISTANT_SERVICES
#define USE_API_HOMEASSISTANT_STATES
#define USE_API_NOISE
#define USE_API_VARINT64
#define USE_API_PLAINTEXT
#define USE_API_USER_DEFINED_ACTIONS
#define USE_API_CUSTOM_SERVICES

View File

@@ -1905,6 +1905,34 @@ def build_type_usage_map(
)
def get_varint64_ifdef(
file_desc: descriptor.FileDescriptorProto,
message_ifdef_map: dict[str, str | None],
) -> tuple[bool, str | None]:
"""Check if 64-bit varint fields exist and get their common ifdef guard.
Returns:
(has_varint64, ifdef_guard) - has_varint64 is True if any fields exist,
ifdef_guard is the common guard or None if unconditional.
"""
varint64_types = {
FieldDescriptorProto.TYPE_INT64,
FieldDescriptorProto.TYPE_UINT64,
FieldDescriptorProto.TYPE_SINT64,
}
ifdefs: set[str | None] = {
message_ifdef_map.get(msg.name)
for msg in file_desc.message_type
if not msg.options.deprecated
for field in msg.field
if not field.options.deprecated and field.type in varint64_types
}
if not ifdefs:
return False, None
ifdefs.discard(None)
return True, ifdefs.pop() if len(ifdefs) == 1 else None
def build_enum_type(desc, enum_ifdef_map) -> tuple[str, str, str]:
"""Builds the enum type.
@@ -2559,11 +2587,28 @@ def main() -> None:
file = d.file[0]
# Build dynamic ifdef mappings early so we can emit USE_API_VARINT64 before includes
enum_ifdef_map, message_ifdef_map, message_source_map, used_messages = (
build_type_usage_map(file)
)
# Find the ifdef guard for 64-bit varint fields (int64/uint64/sint64).
# Emitted before proto.h so parse_wide_() and 64-bit accessors are available.
has_varint64, varint64_guard = get_varint64_ifdef(file, message_ifdef_map)
content = FILE_HEADER
content += """\
#pragma once
#include "esphome/core/defines.h"
"""
if has_varint64:
content += "\n".join(
wrap_with_ifdef(["#define USE_API_VARINT64"], varint64_guard)
)
content += "\n"
content += """\
#include "esphome/core/string_ref.h"
#include "proto.h"
@@ -2694,11 +2739,6 @@ static void dump_bytes_field(DumpBuffer &out, const char *field_name, const uint
content += "namespace enums {\n\n"
# Build dynamic ifdef mappings for both enums and messages
enum_ifdef_map, message_ifdef_map, message_source_map, used_messages = (
build_type_usage_map(file)
)
# Simple grouping of enums by ifdef
current_ifdef = None