Skip to content

Commit f51bef0

Browse files
ju0griefiop
andauthored
pull: add glob option (#5032)
* api: add glob option for pull command Related: #4816 Signed-off-by: Ioana Grigoropol <ioana.grigoropol@gmail.com> * api: add globbing utility function Related: #4816 Signed-off-by: Ioana Grigoropol <ioana.grigoropol@gmail.com> * api: use utility function for pull command Signed-off-by: Ioana Grigoropol <ioana.grigoropol@gmail.com> * Update dvc/utils/__init__.py Co-authored-by: Ruslan Kuprieiev <kupruser@gmail.com>
1 parent 2ce617c commit f51bef0

File tree

5 files changed

+46
-13
lines changed

5 files changed

+46
-13
lines changed

dvc/command/data_sync.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,12 @@ def add_parser(subparsers, _parent_parser):
179179
default=False,
180180
help="Fetch run history for all stages.",
181181
)
182+
pull_parser.add_argument(
183+
"--glob",
184+
action="store_true",
185+
default=False,
186+
help="Pull cache for targets matching shell-style wildcards.",
187+
)
182188
pull_parser.set_defaults(func=CmdDataPull)
183189

184190
# Push

dvc/repo/add.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from ..output.base import OutputDoesNotExistError
1515
from ..progress import Tqdm
1616
from ..repo.scm_context import scm_context
17-
from ..utils import LARGE_DIR_SIZE, resolve_paths
17+
from ..utils import LARGE_DIR_SIZE, glob_targets, resolve_paths
1818
from . import locked
1919

2020
logger = logging.getLogger(__name__)
@@ -152,18 +152,9 @@ def _find_all_targets(repo, target, recursive):
152152
def _create_stages(
153153
repo, targets, fname, pbar=None, external=False, glob=False, desc=None,
154154
):
155-
from glob import iglob
156-
157155
from dvc.stage import Stage, create_stage, restore_meta
158156

159-
if glob:
160-
expanded_targets = [
161-
exp_target
162-
for target in targets
163-
for exp_target in iglob(target, recursive=True)
164-
]
165-
else:
166-
expanded_targets = targets
157+
expanded_targets = glob_targets(targets, glob=glob)
167158

168159
stages = []
169160
for out in Tqdm(

dvc/repo/pull.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import logging
22

33
from dvc.repo import locked
4+
from dvc.utils import glob_targets
45

56
logger = logging.getLogger(__name__)
67

@@ -18,12 +19,15 @@ def pull(
1819
recursive=False,
1920
all_commits=False,
2021
run_cache=False,
22+
glob=False,
2123
):
2224
if isinstance(targets, str):
2325
targets = [targets]
2426

27+
expanded_targets = glob_targets(targets, glob=glob)
28+
2529
processed_files_count = self.fetch(
26-
targets,
30+
expanded_targets,
2731
jobs,
2832
remote=remote,
2933
all_branches=all_branches,
@@ -34,7 +38,10 @@ def pull(
3438
run_cache=run_cache,
3539
)
3640
stats = self.checkout(
37-
targets=targets, with_deps=with_deps, force=force, recursive=recursive
41+
targets=expanded_targets,
42+
with_deps=with_deps,
43+
force=force,
44+
recursive=recursive,
3845
)
3946

4047
stats["fetched"] = processed_files_count

dvc/utils/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,3 +463,16 @@ def parse_target(
463463

464464
def is_exec(mode):
465465
return mode & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
466+
467+
468+
def glob_targets(targets, glob=True, recursive=True):
469+
if not glob:
470+
return targets
471+
472+
from glob import iglob
473+
474+
return [
475+
exp_target
476+
for target in targets
477+
for exp_target in iglob(target, recursive=recursive)
478+
]

tests/func/test_import.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,22 @@ def test_pull_imported_directory_stage(tmp_dir, dvc, erepo_dir):
248248
assert (tmp_dir / "dir_imported").read_text() == {"foo": "foo content"}
249249

250250

251+
def test_pull_wildcard_imported_directory_stage(tmp_dir, dvc, erepo_dir):
252+
with erepo_dir.chdir():
253+
erepo_dir.dvc_gen(
254+
{"dir123": {"foo": "foo content"}}, commit="create dir"
255+
)
256+
257+
dvc.imp(os.fspath(erepo_dir), "dir123", "dir_imported123")
258+
259+
remove("dir_imported123")
260+
remove(dvc.cache.local.cache_dir)
261+
262+
dvc.pull(["dir_imported*.dvc"], glob=True)
263+
264+
assert (tmp_dir / "dir_imported123").read_text() == {"foo": "foo content"}
265+
266+
251267
def test_download_error_pulling_imported_stage(tmp_dir, dvc, erepo_dir):
252268
with erepo_dir.chdir():
253269
erepo_dir.dvc_gen("foo", "foo content", commit="create foo")

0 commit comments

Comments
 (0)