Skip to content

Commit aaf62a4

Browse files
authored
add it back (#819)
1 parent e5290ed commit aaf62a4

File tree

1 file changed

+14
-3
lines changed

1 file changed

+14
-3
lines changed

v03_pipeline/lib/reference_data/clinvar.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import gzip
22
import os
3+
import shutil
34
import subprocess
45
import tempfile
56
import urllib
@@ -175,13 +176,23 @@ def download_and_import_clinvar_submission_summary() -> hl.Table:
175176
with tempfile.NamedTemporaryFile(
176177
suffix='.txt.gz',
177178
delete=False,
178-
) as tmp_file:
179+
) as tmp_file, tempfile.NamedTemporaryFile(
180+
suffix='.txt',
181+
delete=False,
182+
) as unzipped_tmp_file:
179183
urllib.request.urlretrieve(CLINVAR_SUBMISSION_SUMMARY_URL, tmp_file.name) # noqa: S310
184+
# Unzip the gzipped file first to fix gzip files being read by hail with single partition
185+
with gzip.open(tmp_file.name, 'rb') as f_in, open(
186+
unzipped_tmp_file.name,
187+
'wb',
188+
) as f_out:
189+
shutil.copyfileobj(f_in, f_out)
190+
180191
gcs_tmp_file_name = os.path.join(
181192
Env.HAIL_TMPDIR,
182-
os.path.basename(tmp_file.name),
193+
os.path.basename(unzipped_tmp_file.name),
183194
)
184-
safely_move_to_gcs(tmp_file.name, gcs_tmp_file_name)
195+
safely_move_to_gcs(unzipped_tmp_file.name, gcs_tmp_file_name)
185196
return hl.import_table(
186197
gcs_tmp_file_name,
187198
force=True,

0 commit comments

Comments
 (0)