File tree Expand file tree Collapse file tree 1 file changed +14
-3
lines changed
v03_pipeline/lib/reference_data Expand file tree Collapse file tree 1 file changed +14
-3
lines changed Original file line number Diff line number Diff line change 1
1
import gzip
2
2
import os
3
+ import shutil
3
4
import subprocess
4
5
import tempfile
5
6
import urllib
@@ -175,13 +176,23 @@ def download_and_import_clinvar_submission_summary() -> hl.Table:
175
176
with tempfile .NamedTemporaryFile (
176
177
suffix = '.txt.gz' ,
177
178
delete = False ,
178
- ) as tmp_file :
179
+ ) as tmp_file , tempfile .NamedTemporaryFile (
180
+ suffix = '.txt' ,
181
+ delete = False ,
182
+ ) as unzipped_tmp_file :
179
183
urllib .request .urlretrieve (CLINVAR_SUBMISSION_SUMMARY_URL , tmp_file .name ) # noqa: S310
184
+ # Unzip the gzipped file first to fix gzip files being read by hail with single partition
185
+ with gzip .open (tmp_file .name , 'rb' ) as f_in , open (
186
+ unzipped_tmp_file .name ,
187
+ 'wb' ,
188
+ ) as f_out :
189
+ shutil .copyfileobj (f_in , f_out )
190
+
180
191
gcs_tmp_file_name = os .path .join (
181
192
Env .HAIL_TMPDIR ,
182
- os .path .basename (tmp_file .name ),
193
+ os .path .basename (unzipped_tmp_file .name ),
183
194
)
184
- safely_move_to_gcs (tmp_file .name , gcs_tmp_file_name )
195
+ safely_move_to_gcs (unzipped_tmp_file .name , gcs_tmp_file_name )
185
196
return hl .import_table (
186
197
gcs_tmp_file_name ,
187
198
force = True ,
You can’t perform that action at this time.
0 commit comments