Skip to content

Commit dcea83e

Browse files
dudarevefiop
andauthored
[WIP] dvc: preserve exec bit for tracked files (#5061)
* #4578 added is_user_executable to dvc file * Addressed PR feedback and introduced test_add_executable Also fixed all tests in test_add.py * Added a test to checkout executable file * Pop isexec in dvc/output/__init__.py:loadd_from * Implement test that file is executable after checkout * Address PR feedback * Address PR feedback * Changed BaseTree.isexec to use path_info as an argument * Update tests/func/test_add.py * Update tests/func/test_add.py * Update tests/func/test_add.py * Update tests/func/test_add.py * fix formatting * add: preserve exec * checkout: fix setting exec * tests: refactor checkout exec test * tests: fix windows cases * tests: add: fix exec test on windows * Update dvc/tree/local.py * Update dvc/output/base.py * Update dvc/output/base.py * Update dvc/cache/base.py * Update tests/func/test_add.py * tests: add: skip whole exec test on windows * save isexec to lockfiles * fix formatting Co-authored-by: Ruslan Kuprieiev <kupruser@gmail.com> Co-authored-by: Ruslan Kuprieiev <ruslan@iterative.ai>
1 parent f51bef0 commit dcea83e

File tree

16 files changed

+140
-12
lines changed

16 files changed

+140
-12
lines changed

dvc/output/__init__.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
SCHEMA[HashInfo.PARAM_SIZE] = int
7070
SCHEMA[HashInfo.PARAM_NFILES] = int
7171
SCHEMA[BaseOutput.PARAM_DESC] = str
72+
SCHEMA[BaseOutput.PARAM_ISEXEC] = bool
7273

7374

7475
def _get(
@@ -81,6 +82,7 @@ def _get(
8182
persist=False,
8283
checkpoint=False,
8384
desc=None,
85+
isexec=False,
8486
):
8587
parsed = urlparse(p)
8688

@@ -97,6 +99,7 @@ def _get(
9799
persist=persist,
98100
checkpoint=checkpoint,
99101
desc=desc,
102+
isexec=isexec,
100103
)
101104

102105
for o in OUTS:
@@ -112,6 +115,7 @@ def _get(
112115
persist=persist,
113116
checkpoint=checkpoint,
114117
desc=desc,
118+
isexec=isexec,
115119
)
116120
return LocalOutput(
117121
stage,
@@ -124,6 +128,7 @@ def _get(
124128
persist=persist,
125129
checkpoint=checkpoint,
126130
desc=desc,
131+
isexec=isexec,
127132
)
128133

129134

@@ -137,6 +142,7 @@ def loadd_from(stage, d_list):
137142
persist = d.pop(BaseOutput.PARAM_PERSIST, False)
138143
checkpoint = d.pop(BaseOutput.PARAM_CHECKPOINT, False)
139144
desc = d.pop(BaseOutput.PARAM_DESC, False)
145+
isexec = d.pop(BaseOutput.PARAM_ISEXEC, False)
140146
ret.append(
141147
_get(
142148
stage,
@@ -148,6 +154,7 @@ def loadd_from(stage, d_list):
148154
persist=persist,
149155
checkpoint=checkpoint,
150156
desc=desc,
157+
isexec=isexec,
151158
)
152159
)
153160
return ret
@@ -161,6 +168,7 @@ def loads_from(
161168
plot=False,
162169
persist=False,
163170
checkpoint=False,
171+
isexec=False,
164172
):
165173
return [
166174
_get(
@@ -172,6 +180,7 @@ def loads_from(
172180
plot=plot,
173181
persist=persist,
174182
checkpoint=checkpoint,
183+
isexec=isexec,
175184
)
176185
for s in s_list
177186
]

dvc/output/base.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ class BaseOutput:
7676
PARAM_PLOT_HEADER = "header"
7777
PARAM_PERSIST = "persist"
7878
PARAM_DESC = "desc"
79+
PARAM_ISEXEC = "isexec"
7980

8081
METRIC_SCHEMA = Any(
8182
None,
@@ -105,6 +106,7 @@ def __init__(
105106
persist=False,
106107
checkpoint=False,
107108
desc=None,
109+
isexec=False,
108110
):
109111
self._validate_output_path(path, stage)
110112
# This output (and dependency) objects have too many paths/urls
@@ -136,6 +138,8 @@ def __init__(
136138
if self.use_cache and self.cache is None:
137139
raise RemoteCacheRequiredError(self.path_info)
138140

141+
self.isexec = False if self.IS_DEPENDENCY else isexec
142+
139143
def _parse_path(self, tree, path):
140144
if tree:
141145
parsed = urlparse(path)
@@ -287,6 +291,11 @@ def save(self):
287291
return
288292

289293
self.hash_info = self.get_hash()
294+
self.isexec = self.isfile() and self.tree.isexec(self.path_info)
295+
296+
def set_exec(self):
297+
if self.isfile() and self.isexec:
298+
self.tree.set_exec(self.path_info)
290299

291300
def commit(self):
292301
if not self.exists:
@@ -295,6 +304,7 @@ def commit(self):
295304
assert self.hash_info
296305
if self.use_cache:
297306
self.cache.save(self.path_info, self.cache.tree, self.hash_info)
307+
self.set_exec()
298308

299309
def dumpd(self):
300310
ret = copy(self.hash_info.to_dict())
@@ -328,6 +338,9 @@ def dumpd(self):
328338
if self.checkpoint:
329339
ret[self.PARAM_CHECKPOINT] = self.checkpoint
330340

341+
if self.isexec:
342+
ret[self.PARAM_ISEXEC] = self.isexec
343+
331344
return ret
332345

333346
def verify_metric(self):
@@ -353,7 +366,7 @@ def checkout(
353366
return None
354367

355368
try:
356-
return self.cache.checkout(
369+
res = self.cache.checkout(
357370
self.path_info,
358371
self.hash_info,
359372
force=force,
@@ -366,6 +379,8 @@ def checkout(
366379
if allow_missing or self.checkpoint:
367380
return None
368381
raise
382+
self.set_exec()
383+
return res
369384

370385
def remove(self, ignore_remove=False):
371386
self.tree.remove(self.path_info)

dvc/schema.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
Required("path"): str,
2626
HashInfo.PARAM_SIZE: int,
2727
HashInfo.PARAM_NFILES: int,
28+
BaseOutput.PARAM_ISEXEC: bool,
2829
}
2930
LOCK_FILE_STAGE_SCHEMA = {
3031
Required(StageParams.PARAM_CMD): Any(str, list),

dvc/stage/loader.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ def fill_from_lock(stage, lock_data=None):
6161
info = get_in(checksums, [key, path], {})
6262
info = info.copy()
6363
info.pop("path", None)
64+
item.isexec = info.pop("isexec", None)
6465
item.hash_info = HashInfo.from_dict(info)
6566

6667
@classmethod

dvc/stage/serialize.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,9 @@ def _dumpd(item):
150150
*item.hash_info.to_dict().items(),
151151
]
152152

153+
if item.isexec:
154+
ret.append((item.PARAM_ISEXEC, True))
155+
153156
return OrderedDict(ret)
154157

155158
res = OrderedDict([("cmd", stage.cmd)])

dvc/stage/utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ def compute_md5(stage):
179179
BaseOutput.PARAM_METRIC,
180180
BaseOutput.PARAM_PERSIST,
181181
BaseOutput.PARAM_CHECKPOINT,
182+
BaseOutput.PARAM_ISEXEC,
182183
HashInfo.PARAM_SIZE,
183184
HashInfo.PARAM_NFILES,
184185
],

dvc/tree/base.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,15 @@ def isfile(self, path_info):
195195
"""
196196
return True
197197

198+
def set_exec(self, path_info):
199+
raise RemoteActionNotImplemented("set_exec", self.scheme)
200+
201+
def isexec(self, path_info):
202+
"""Optional: Overwrite only if the remote has a way to distinguish
203+
between executable and non-executable file.
204+
"""
205+
return False
206+
198207
def iscopy(self, path_info):
199208
"""Check if this file is an independent copy."""
200209
return False # We can't be sure by default

dvc/tree/dvc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ def isdvc(self, path, recursive=False, strict=True):
234234
recurse = recursive or not strict
235235
return meta.output_exists if recurse else meta.is_output
236236

237-
def isexec(self, path): # pylint: disable=unused-argument
237+
def isexec(self, path_info): # pylint: disable=unused-argument
238238
return False
239239

240240
def get_dir_hash(self, path_info, **kwargs):

dvc/tree/git.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,12 @@ def walk(
131131
)
132132
yield root, dirs, files
133133

134-
def isexec(self, path):
135-
return is_exec(self.stat(path).st_mode)
134+
def isexec(self, path_info):
135+
if not self.exists(path_info):
136+
return False
137+
138+
mode = self.stat(path_info).st_mode
139+
return is_exec(mode)
136140

137141
def stat(self, path):
138142
key = self._get_key(path)

dvc/tree/local.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,8 +152,12 @@ def remove(self, path_info):
152152
def makedirs(self, path_info):
153153
makedirs(path_info, exist_ok=True, mode=self.dir_mode)
154154

155-
def isexec(self, path):
156-
mode = os.stat(path).st_mode
155+
def set_exec(self, path_info):
156+
mode = self.stat(path_info).st_mode
157+
self.chmod(path_info, mode | stat.S_IEXEC)
158+
159+
def isexec(self, path_info):
160+
mode = self.stat(path_info).st_mode
157161
return is_exec(mode)
158162

159163
def stat(self, path):

0 commit comments

Comments
 (0)