Skip to content

Commit 45fd102

Browse files
Northoskshetry
andauthored
feat: ignore files not in remote when push is false (#10749)
Co-authored-by: Saugat Pachhai (सौगात) <suagatchhetri@outlook.com>
1 parent c7c7ba6 commit 45fd102

File tree

2 files changed

+101
-22
lines changed

2 files changed

+101
-22
lines changed

dvc/repo/data.py

Lines changed: 49 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from typing import TYPE_CHECKING, Any, TypedDict, Union
55

66
from dvc.fs.callbacks import DEFAULT_CALLBACK
7+
from dvc.repo.worktree import worktree_view
78
from dvc.ui import ui
89

910
if TYPE_CHECKING:
@@ -51,11 +52,8 @@ def _diff(
5152
*,
5253
granular: bool = False,
5354
not_in_cache: bool = False,
54-
not_in_remote: bool = False,
55-
remote_refresh: bool = False,
5655
callback: "Callback" = DEFAULT_CALLBACK,
5756
) -> dict[str, list[str]]:
58-
from dvc_data.index import StorageError
5957
from dvc_data.index.diff import UNCHANGED, UNKNOWN, diff
6058

6159
ret: dict[str, list[str]] = {}
@@ -97,19 +95,6 @@ def _add_change(typ, change):
9795
# NOTE: emulating previous behaviour
9896
_add_change("not_in_cache", change)
9997

100-
try:
101-
if (
102-
not_in_remote
103-
and change.old
104-
and change.old.hash_info
105-
and not old.storage_map.remote_exists(
106-
change.old, refresh=remote_refresh
107-
)
108-
):
109-
_add_change("not_in_remote", change)
110-
except StorageError:
111-
pass
112-
11398
_add_change(change.typ, change)
11499

115100
return ret
@@ -217,15 +202,57 @@ def _transform_git_paths_to_dvc(repo: "Repo", files: Iterable[str]) -> list[str]
217202
return [repo.fs.relpath(file, start) for file in files]
218203

219204

220-
def status(repo: "Repo", untracked_files: str = "no", **kwargs: Any) -> Status:
205+
def _get_entries_not_in_remote(
206+
repo: "Repo",
207+
granular: bool = False,
208+
remote_refresh: bool = False,
209+
) -> list[str]:
210+
"""Get entries that are not in remote storage."""
211+
from dvc_data.index import StorageKeyError
212+
213+
# View into the index, with only pushable entries
214+
view = worktree_view(repo.index, push=True).data["repo"]
215+
216+
missing_entries = []
217+
for key, entry in view.iteritems(shallow=not granular):
218+
if not (entry and entry.hash_info):
219+
continue
220+
221+
k = (*key, "") if entry.meta and entry.meta.isdir else key
222+
try:
223+
if not view.storage_map.remote_exists(entry, refresh=remote_refresh):
224+
missing_entries.append(os.path.sep.join(k))
225+
except StorageKeyError:
226+
pass
227+
228+
return missing_entries
229+
230+
231+
def status(
232+
repo: "Repo",
233+
untracked_files: str = "no",
234+
not_in_remote: bool = False,
235+
remote_refresh: bool = False,
236+
granular: bool = False,
237+
head: str = "HEAD",
238+
) -> Status:
221239
from dvc.scm import NoSCMError, SCMError
222240

223-
head = kwargs.pop("head", "HEAD")
224-
uncommitted_diff = _diff_index_to_wtree(repo, **kwargs)
241+
uncommitted_diff = _diff_index_to_wtree(repo, granular=granular)
225242
unchanged = set(uncommitted_diff.pop("unchanged", []))
226243

244+
entries_not_in_remote = (
245+
_get_entries_not_in_remote(
246+
repo,
247+
granular=granular,
248+
remote_refresh=remote_refresh,
249+
)
250+
if not_in_remote
251+
else []
252+
)
253+
227254
try:
228-
committed_diff = _diff_head_to_index(repo, head=head, **kwargs)
255+
committed_diff = _diff_head_to_index(repo, head=head, granular=granular)
229256
except (SCMError, NoSCMError):
230257
committed_diff = {}
231258
else:
@@ -234,10 +261,11 @@ def status(repo: "Repo", untracked_files: str = "no", **kwargs: Any) -> Status:
234261
git_info = _git_info(repo.scm, untracked_files=untracked_files)
235262
untracked = git_info.get("untracked", [])
236263
untracked = _transform_git_paths_to_dvc(repo, untracked)
264+
237265
# order matters here
238266
return Status(
239267
not_in_cache=uncommitted_diff.pop("not_in_cache", []),
240-
not_in_remote=uncommitted_diff.pop("not_in_remote", []),
268+
not_in_remote=entries_not_in_remote,
241269
committed=committed_diff,
242270
uncommitted=uncommitted_diff,
243271
untracked=untracked,

tests/func/test_data_status.py

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,19 @@
11
from os import fspath
22
from os.path import join
3+
from typing import TYPE_CHECKING
34

45
import pytest
56

67
from dvc.repo import Repo
78
from dvc.repo.data import _transform_git_paths_to_dvc, posixpath_to_os_path
8-
from dvc.testing.tmp_dir import make_subrepo
9+
from dvc.testing.tmp_dir import TmpDir, make_subrepo
910
from dvc.utils.fs import remove
1011

12+
if TYPE_CHECKING:
13+
from pytest_test_utils import matchers
14+
15+
from dvc.stage import Stage
16+
1117
EMPTY_STATUS = {
1218
"committed": {},
1319
"uncommitted": {},
@@ -423,6 +429,51 @@ def test_missing_remote_cache(M, tmp_dir, dvc, scm, local_remote):
423429
}
424430

425431

432+
def test_not_in_remote_respects_not_pushable(
433+
M: type["matchers.Matcher"], tmp_dir: TmpDir, dvc: Repo, scm, mocker, local_remote
434+
):
435+
stages: list[Stage] = tmp_dir.dvc_gen({"foo": "foo", "dir": {"foobar": "foobar"}})
436+
# Make foo not pushable
437+
stages[0].outs[0].can_push = False
438+
stages[0].dump()
439+
440+
def assert_not_in_remote_is(
441+
granular: bool, not_in_remote: list[str], committed: list[str]
442+
):
443+
assert dvc.data_status(
444+
granular=granular, remote_refresh=True, not_in_remote=True
445+
) == {
446+
**EMPTY_STATUS,
447+
"git": M.dict(),
448+
"not_in_remote": M.unordered(*not_in_remote),
449+
"committed": {"added": M.unordered(*committed)},
450+
}
451+
452+
foo = "foo"
453+
dir_ = join("dir", "")
454+
foobar = join("dir", "foobar")
455+
456+
assert_not_in_remote_is(
457+
granular=True,
458+
not_in_remote=[dir_, foobar],
459+
committed=[foo, dir_, foobar],
460+
)
461+
assert_not_in_remote_is(granular=False, not_in_remote=[dir_], committed=[foo, dir_])
462+
463+
dvc.push()
464+
465+
assert_not_in_remote_is(
466+
granular=True,
467+
not_in_remote=[],
468+
committed=[foo, dir_, foobar],
469+
)
470+
assert_not_in_remote_is(
471+
granular=False,
472+
not_in_remote=[],
473+
committed=[foo, dir_],
474+
)
475+
476+
426477
def test_root_from_dir_to_file(M, tmp_dir, dvc, scm):
427478
tmp_dir.dvc_gen({"data": {"foo": "foo", "bar": "bar"}})
428479
remove("data")

0 commit comments

Comments
 (0)