1
1
import logging
2
+ from typing import Optional
2
3
3
4
import click
4
5
@@ -175,15 +176,28 @@ def _download_job_data(dbgym_cfg: DBGymConfig) -> None:
175
176
176
177
177
178
def _download_job_queries (dbgym_cfg : DBGymConfig ) -> None :
178
- _download_and_untar_dir (dbgym_cfg , JOB_QUERIES_URL , "job.tgz" , JOB_QUERIES_DNAME )
179
+ _download_and_untar_dir (
180
+ dbgym_cfg ,
181
+ JOB_QUERIES_URL ,
182
+ "job.tgz" ,
183
+ JOB_QUERIES_DNAME ,
184
+ untarred_original_dname = "job" ,
185
+ )
179
186
180
187
181
188
def _download_and_untar_dir (
182
189
dbgym_cfg : DBGymConfig ,
183
190
download_url : str ,
184
191
download_tarred_fname : str ,
185
192
untarred_dname : str ,
193
+ untarred_original_dname : Optional [str ] = None ,
186
194
) -> None :
195
+ """
196
+ Some .tgz files are built from a directory while others are built from the contents of
197
+ the directory. If the .tgz file we're untarring is built from a directory, it will have
198
+ an "original" directory name. If this is the case, you should set
199
+ `untarred_original_dname` to ensure that it gets renamed to `untarred_dname`.
200
+ """
187
201
expected_symlink_dpath = (
188
202
dbgym_cfg .cur_symlinks_data_path (mkdir = True ) / f"{ untarred_dname } .link"
189
203
)
@@ -196,8 +210,20 @@ def _download_and_untar_dir(
196
210
logging .getLogger (DBGYM_LOGGER_NAME ).info (f"Downloading: { expected_symlink_dpath } " )
197
211
real_data_path = dbgym_cfg .cur_task_runs_data_path (mkdir = True )
198
212
subprocess_run (f"curl -O { download_url } " , cwd = real_data_path )
199
- untarred_data_dpath = dbgym_cfg .cur_task_runs_data_path (untarred_dname , mkdir = True )
200
- subprocess_run (f"tar -zxvf ../{ download_tarred_fname } " , cwd = untarred_data_dpath )
213
+ untarred_data_dpath = dbgym_cfg .cur_task_runs_data_path (untarred_dname )
214
+
215
+ if untarred_original_dname is not None :
216
+ assert not untarred_data_dpath .exists ()
217
+ subprocess_run (f"tar -zxvf { download_tarred_fname } " , cwd = real_data_path )
218
+ assert (real_data_path / untarred_original_dname ).exists ()
219
+ subprocess_run (
220
+ f"mv { untarred_original_dname } { untarred_dname } " , cwd = real_data_path
221
+ )
222
+ else :
223
+ untarred_data_dpath .mkdir (parents = True , exist_ok = False )
224
+ subprocess_run (f"tar -zxvf ../{ download_tarred_fname } " , cwd = untarred_data_dpath )
225
+
226
+ assert untarred_data_dpath .exists ()
201
227
subprocess_run (f"rm { download_tarred_fname } " , cwd = real_data_path )
202
228
symlink_dpath = link_result (dbgym_cfg , untarred_data_dpath )
203
229
assert expected_symlink_dpath .samefile (symlink_dpath )
0 commit comments