[external_components] Clean up incomplete clone on failed ref fetch (#14051)

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-18 15:35:59 -07:00 · 2026-02-18 13:09:33 -05:00
parent 973656191b
commit 6b8264fcaa
5 changed files with 162 additions and 49 deletions
--- a/esphome/components/esp32/init.py
+++ b/esphome/components/esp32/init.py
@@ -44,9 +44,9 @@ from esphome.const import (
 from esphome.core import CORE, HexInt, TimePeriod
 from esphome.coroutine import CoroPriority, coroutine_with_priority
 import esphome.final_validate as fv
-from esphome.helpers import copy_file_if_changed, write_file_if_changed
+from esphome.helpers import copy_file_if_changed, rmtree, write_file_if_changed
 from esphome.types import ConfigType
-from esphome.writer import clean_cmake_cache, rmtree
+from esphome.writer import clean_cmake_cache

 from .boards import BOARDS, STANDARD_BOARDS
 from .const import (  # noqa
--- a/esphome/git.py
+++ b/esphome/git.py
@@ -5,12 +5,12 @@ import hashlib
 import logging
 from pathlib import Path
 import re
-import shutil
 import subprocess
 import urllib.parse

 import esphome.config_validation as cv
 from esphome.core import CORE, TimePeriodSeconds
+from esphome.helpers import rmtree

 _LOGGER = logging.getLogger(__name__)

@@ -115,24 +115,35 @@ def clone_or_update(
    if not repo_dir.is_dir():
        _LOGGER.info("Cloning %s", key)
        _LOGGER.debug("Location: %s", repo_dir)
-        cmd = ["git", "clone", "--depth=1"]
-        cmd += ["--", url, str(repo_dir)]
-        run_git_command(cmd)
+        try:
+            cmd = ["git", "clone", "--depth=1"]
+            cmd += ["--", url, str(repo_dir)]
+            run_git_command(cmd)

-        if ref is not None:
-            # We need to fetch the PR branch first, otherwise git will complain
-            # about missing objects
-            _LOGGER.info("Fetching %s", ref)
-            run_git_command(["git", "fetch", "--", "origin", ref], git_dir=repo_dir)
-            run_git_command(["git", "reset", "--hard", "FETCH_HEAD"], git_dir=repo_dir)
+            if ref is not None:
+                # We need to fetch the PR branch first, otherwise git will complain
+                # about missing objects
+                _LOGGER.info("Fetching %s", ref)
+                run_git_command(["git", "fetch", "--", "origin", ref], git_dir=repo_dir)
+                run_git_command(
+                    ["git", "reset", "--hard", "FETCH_HEAD"], git_dir=repo_dir
+                )

-        if submodules is not None:
-            _LOGGER.info(
-                "Initializing submodules (%s) for %s", ", ".join(submodules), key
-            )
-            run_git_command(
-                ["git", "submodule", "update", "--init"] + submodules, git_dir=repo_dir
-            )
+            if submodules is not None:
+                _LOGGER.info(
+                    "Initializing submodules (%s) for %s", ", ".join(submodules), key
+                )
+                run_git_command(
+                    ["git", "submodule", "update", "--init"] + submodules,
+                    git_dir=repo_dir,
+                )
+        except GitException:
+            # Remove incomplete clone to prevent stale state. Without this,
+            # a failed ref fetch leaves a clone on the default branch, and
+            # subsequent calls skip the update due to the refresh window.
+            if repo_dir.is_dir():
+                rmtree(repo_dir)
+            raise

    else:
        # Check refresh needed
@@ -193,7 +204,7 @@ def clone_or_update(
                    err,
                )
                _LOGGER.info("Removing broken repository at %s", repo_dir)
-                shutil.rmtree(repo_dir)
+                rmtree(repo_dir)
                _LOGGER.info("Successfully removed broken repository, re-cloning...")

                # Recursively call clone_or_update to re-clone
--- a/esphome/helpers.py
+++ b/esphome/helpers.py
@@ -8,6 +8,7 @@ from pathlib import Path
 import platform
 import re
 import shutil
+import stat
 import tempfile
 from typing import TYPE_CHECKING
 from urllib.parse import urlparse
@@ -354,6 +355,23 @@ def is_ha_addon():
    return get_bool_env("ESPHOME_IS_HA_ADDON")


+def rmtree(path: Path | str) -> None:
+    """Remove a directory tree, handling read-only files on Windows.
+
+    On Windows, git pack files and other files may be marked read-only,
+    causing shutil.rmtree to fail. This handles that by removing the
+    read-only flag and retrying.
+    """
+
+    def _onerror(func, path, exc_info):
+        if os.access(path, os.W_OK):
+            raise exc_info[1].with_traceback(exc_info[2])
+        os.chmod(path, stat.S_IWUSR | stat.S_IRUSR)
+        func(path)
+
+    shutil.rmtree(path, onerror=_onerror)
+
+
 def walk_files(path: Path):
    for root, _, files in os.walk(path):
        for name in files:
@@ -481,8 +499,6 @@ def list_starts_with(list_, sub):

 def file_compare(path1: Path, path2: Path) -> bool:
    """Return True if the files path1 and path2 have the same contents."""
-    import stat
-
    try:
        stat1, stat2 = path1.stat(), path2.stat()
    except OSError:
--- a/esphome/writer.py
+++ b/esphome/writer.py
@@ -1,14 +1,10 @@
-from collections.abc import Callable
 import importlib
 import json
 import logging
 import os
 from pathlib import Path
 import re
-import shutil
-import stat
 import time
-from types import TracebackType

 from esphome import loader
 from esphome.config import iter_component_configs, iter_components
@@ -25,6 +21,7 @@ from esphome.helpers import (
    get_str_env,
    is_ha_addon,
    read_file,
+    rmtree,
    walk_files,
    write_file,
    write_file_if_changed,
@@ -404,28 +401,6 @@ def clean_cmake_cache():
            pioenvs_cmake_path.unlink()


-def _rmtree_error_handler(
-    func: Callable[[str], object],
-    path: str,
-    exc_info: tuple[type[BaseException], BaseException, TracebackType | None],
-) -> None:
-    """Error handler for shutil.rmtree to handle read-only files on Windows.
-
-    On Windows, git pack files and other files may be marked read-only,
-    causing shutil.rmtree to fail with "Access is denied". This handler
-    removes the read-only flag and retries the deletion.
-    """
-    if os.access(path, os.W_OK):
-        raise exc_info[1].with_traceback(exc_info[2])
-    os.chmod(path, stat.S_IWUSR | stat.S_IRUSR)
-    func(path)
-
-
-def rmtree(path: Path | str) -> None:
-    """Remove a directory tree, handling read-only files on Windows."""
-    shutil.rmtree(path, onerror=_rmtree_error_handler)
-
-
 def clean_build(clear_pio_cache: bool = True):
    # Allow skipping cache cleaning for integration tests
    if os.environ.get("ESPHOME_SKIP_CLEAN_BUILD"):
--- a/tests/unit_tests/test_git.py
+++ b/tests/unit_tests/test_git.py
@@ -656,7 +656,7 @@ def test_clone_or_update_recover_broken_flag_prevents_infinite_loop(
    # Should raise on the second attempt when _recover_broken=False
    # This hits the "if not _recover_broken: raise" path
    with (
-        unittest.mock.patch("esphome.git.shutil.rmtree", side_effect=mock_rmtree),
+        unittest.mock.patch("esphome.git.rmtree", side_effect=mock_rmtree),
        pytest.raises(GitCommandError, match="fatal: unable to write new index file"),
    ):
        git.clone_or_update(
@@ -671,3 +671,114 @@ def test_clone_or_update_recover_broken_flag_prevents_infinite_loop(
    stash_calls = [c for c in call_list if "stash" in c[0][0]]
    # Should have exactly two stash calls
    assert len(stash_calls) == 2
+
+
+def test_clone_or_update_cleans_up_on_failed_ref_fetch(
+    tmp_path: Path, mock_run_git_command: Mock
+) -> None:
+    """Test that a failed ref fetch removes the incomplete clone directory.
+
+    When cloning with a specific ref, if `git clone` succeeds but the
+    subsequent `git fetch <ref>` fails, the clone directory should be
+    removed so the next attempt starts fresh instead of finding a stale
+    clone on the default branch.
+    """
+    CORE.config_path = tmp_path / "test.yaml"
+
+    url = "https://github.com/test/repo"
+    ref = "pull/123/head"
+    domain = "test"
+    repo_dir = _compute_repo_dir(url, ref, domain)
+
+    def git_command_side_effect(
+        cmd: list[str], cwd: str | None = None, **kwargs: Any
+    ) -> str:
+        cmd_type = _get_git_command_type(cmd)
+        if cmd_type == "clone":
+            # Simulate successful clone by creating the directory
+            repo_dir.mkdir(parents=True, exist_ok=True)
+            (repo_dir / ".git").mkdir(exist_ok=True)
+            return ""
+        if cmd_type == "fetch":
+            raise GitCommandError("fatal: couldn't find remote ref pull/123/head")
+        return ""
+
+    mock_run_git_command.side_effect = git_command_side_effect
+
+    refresh = TimePeriodSeconds(days=1)
+
+    with pytest.raises(GitCommandError, match="couldn't find remote ref"):
+        git.clone_or_update(
+            url=url,
+            ref=ref,
+            refresh=refresh,
+            domain=domain,
+        )
+
+    # The incomplete clone directory should have been removed
+    assert not repo_dir.exists()
+
+    # Verify clone was attempted then fetch failed
+    call_list = mock_run_git_command.call_args_list
+    clone_calls = [c for c in call_list if "clone" in c[0][0]]
+    assert len(clone_calls) == 1
+    fetch_calls = [c for c in call_list if "fetch" in c[0][0]]
+    assert len(fetch_calls) == 1
+
+
+def test_clone_or_update_stale_clone_is_retried_after_cleanup(
+    tmp_path: Path, mock_run_git_command: Mock
+) -> None:
+    """Test that after cleanup, a subsequent call does a fresh clone.
+
+    This is the full scenario: first call fails at fetch (directory cleaned up),
+    second call sees no directory and clones fresh.
+    """
+    CORE.config_path = tmp_path / "test.yaml"
+
+    url = "https://github.com/test/repo"
+    ref = "pull/123/head"
+    domain = "test"
+    repo_dir = _compute_repo_dir(url, ref, domain)
+
+    call_count = {"clone": 0, "fetch": 0}
+
+    def git_command_side_effect(
+        cmd: list[str], cwd: str | None = None, **kwargs: Any
+    ) -> str:
+        cmd_type = _get_git_command_type(cmd)
+        if cmd_type == "clone":
+            call_count["clone"] += 1
+            repo_dir.mkdir(parents=True, exist_ok=True)
+            (repo_dir / ".git").mkdir(exist_ok=True)
+            return ""
+        if cmd_type == "fetch":
+            call_count["fetch"] += 1
+            if call_count["fetch"] == 1:
+                # First fetch fails
+                raise GitCommandError("fatal: couldn't find remote ref pull/123/head")
+            # Second fetch succeeds
+            return ""
+        if cmd_type == "reset":
+            return ""
+        return ""
+
+    mock_run_git_command.side_effect = git_command_side_effect
+
+    refresh = TimePeriodSeconds(days=1)
+
+    # First call: clone succeeds, fetch fails, directory cleaned up
+    with pytest.raises(GitCommandError, match="couldn't find remote ref"):
+        git.clone_or_update(url=url, ref=ref, refresh=refresh, domain=domain)
+
+    assert not repo_dir.exists()
+
+    # Second call: fresh clone + fetch succeeds
+    result_dir, _ = git.clone_or_update(
+        url=url, ref=ref, refresh=refresh, domain=domain
+    )
+
+    assert result_dir == repo_dir
+    assert repo_dir.exists()
+    assert call_count["clone"] == 2
+    assert call_count["fetch"] == 2