From 87fe5f3bd242a630e8ab7ee9af4db0e2a6e0b660 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Saugat=20Pachhai=20=28=E0=A4=B8=E0=A5=8C=E0=A4=97=E0=A4=BE?= =?UTF-8?q?=E0=A4=A4=29?= Date: Tue, 2 Sep 2025 11:15:57 +0545 Subject: [PATCH] fix(data status): handle missing DVC repo at Git HEAD Avoid raising errors in `data_status()` when Git HEAD does not contain a DVC repository. Instead, fall back to an empty index. Tests now cover both unborn and committed Git repos, with and without subdirs. --- dvc/repo/data.py | 6 ++++++ tests/func/test_data_status.py | 19 +++++++++++++------ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/dvc/repo/data.py b/dvc/repo/data.py index 80f678b5e1..e539e4c5be 100644 --- a/dvc/repo/data.py +++ b/dvc/repo/data.py @@ -325,6 +325,7 @@ def _diff_head_to_index( granular: bool = False, with_renames: bool = False, ) -> DiffResult: + from dvc.exceptions import NotDvcRepoError from dvc.scm import RevError from dvc_data.index import DataIndex @@ -338,6 +339,11 @@ def _diff_head_to_index( except RevError: logger.debug("failed to switch to '%s'", head) head_view = DataIndex() + except NotDvcRepoError as exc: + # NOTE: this only gets raised on subdir repos at the moment, + # which looks like a bug in `repo.switch`. + logger.warning(exc) + head_view = DataIndex() with ui.progress(desc="Calculating diff between head/index", unit="entry") as pb: return _diff( diff --git a/tests/func/test_data_status.py b/tests/func/test_data_status.py index 8445747954..cf9f3b07d6 100644 --- a/tests/func/test_data_status.py +++ b/tests/func/test_data_status.py @@ -138,18 +138,25 @@ def test_tracked_directory_deep(M, tmp_dir, dvc, scm): } -def test_new_empty_git_repo(M, tmp_dir, scm): - dvc = Repo.init() +@pytest.mark.parametrize("git_repo_state", ["unborn", "committed"]) +@pytest.mark.parametrize("subdir", [True, False]) +def test_new_dvc_repo(M, tmp_dir, scm, subdir, git_repo_state): + if git_repo_state == "committed": + tmp_dir.scm_gen("test", "test", commit="init") + + is_empty = git_repo_state == "unborn" + dir_ = tmp_dir / "sub" if subdir else tmp_dir + dvc = Repo.init(dir_, subdir=subdir) assert dvc.data_status() == { **EMPTY_STATUS, - "git": M.dict(is_empty=True, is_dirty=True), + "git": M.dict(is_dirty=True, is_empty=is_empty), } - tmp_dir.gen("foo", "foo") - dvc.add(["foo"]) + dir_.gen("foo", "foo") + dvc.add([dir_ / "foo"]) assert dvc.data_status() == { **EMPTY_STATUS, - "git": M.dict(is_empty=True, is_dirty=True), + "git": M.dict(is_empty=is_empty, is_dirty=True), "committed": {"added": ["foo"]}, }