From 99a77e95499206487f3d908939d6752ebfa6bd94 Mon Sep 17 00:00:00 2001
From: "J. Nick Koston" <nick@home-assistant.io>
Date: Wed, 18 Feb 2026 08:44:13 -0600
Subject: [PATCH] Add single-byte fast path to ProtoVarInt::parse

Single-byte varints (0-127) are the most common case in protobuf
messages (booleans, small enums, field tags). Skip the loop entirely
for these values by checking the first byte before entering the
multi-byte parsing loop.
---
 esphome/components/api/proto.h | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/esphome/components/api/proto.h b/esphome/components/api/proto.h
index bb4abdfc49..69b0440e38 100644
--- a/esphome/components/api/proto.h
+++ b/esphome/components/api/proto.h
@@ -108,18 +108,26 @@ class ProtoVarInt {
 #ifdef ESPHOME_DEBUG_API
     assert(consumed != nullptr);
 #endif
-    // 32-bit phase: shifts 0, 7, 14, 21 are native on 32-bit platforms.
-    // Without USE_API_VARINT64: also cover byte 4 (shift 28) — the uint32_t
-    // shift truncates upper bits but those are always zero for valid uint32 values.
-    // With USE_API_VARINT64: stop at byte 3 so parse_wide handles byte 4+
-    // with full 64-bit arithmetic (avoids truncating values > UINT32_MAX).
-    uint32_t result32 = 0;
+    if (len == 0)
+      return {};
+    // Fast path: single-byte varints (0-127) are the most common case
+    // (booleans, small enums, field tags). Avoid loop overhead entirely.
+    if ((buffer[0] & 0x80) == 0) {
+      *consumed = 1;
+      return ProtoVarInt(buffer[0]);
+    }
+    // 32-bit phase: process remaining bytes with native 32-bit shifts.
+    // Without USE_API_VARINT64: cover bytes 1-4 (shifts 7, 14, 21, 28) — the uint32_t
+    // shift at byte 4 truncates upper bits but those are always zero for valid uint32 values.
+    // With USE_API_VARINT64: cover bytes 1-3 (shifts 7, 14, 21) so parse_wide handles
+    // byte 4+ with full 64-bit arithmetic (avoids truncating values > UINT32_MAX).
+    uint32_t result32 = buffer[0] & 0x7F;
 #ifdef USE_API_VARINT64
     uint32_t limit = std::min(len, uint32_t(4));
 #else
     uint32_t limit = std::min(len, uint32_t(5));
 #endif
-    for (uint32_t i = 0; i < limit; i++) {
+    for (uint32_t i = 1; i < limit; i++) {
       uint8_t val = buffer[i];
       result32 |= uint32_t(val & 0x7F) << (i * 7);
       if ((val & 0x80) == 0) {