Skip to content

Commit 794fafe

Browse files
authored
cloud agnostic clinvar (#924)
* cloud agnostic clinvar * lint
1 parent 95acd6d commit 794fafe

File tree

1 file changed

+6
-23
lines changed

1 file changed

+6
-23
lines changed

v03_pipeline/lib/reference_data/clinvar.py

Lines changed: 6 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import gzip
22
import os
33
import shutil
4-
import subprocess
54
import tempfile
65
import urllib
76

@@ -50,21 +49,6 @@
5049
logger = get_logger(__name__)
5150

5251

53-
def safely_move_to_gcs(tmp_file_name, gcs_tmp_file_name):
54-
try:
55-
subprocess.run(
56-
[ # noqa: S603, S607
57-
'gsutil',
58-
'cp',
59-
tmp_file_name,
60-
gcs_tmp_file_name,
61-
],
62-
check=True,
63-
)
64-
except subprocess.CalledProcessError:
65-
logger.exception(f'Failed to move local tmp file {tmp_file_name} to gcs')
66-
67-
6852
def parsed_clnsig(ht: hl.Table):
6953
return (
7054
hl.delimit(ht.info.CLNSIG)
@@ -138,13 +122,13 @@ def download_and_import_latest_clinvar_vcf(
138122
) -> hl.Table:
139123
with tempfile.NamedTemporaryFile(suffix='.vcf.gz', delete=False) as tmp_file:
140124
urllib.request.urlretrieve(clinvar_url, tmp_file.name) # noqa: S310
141-
gcs_tmp_file_name = os.path.join(
125+
cached_tmp_file_name = os.path.join(
142126
Env.HAIL_TMP_DIR,
143127
os.path.basename(tmp_file.name),
144128
)
145-
safely_move_to_gcs(tmp_file.name, gcs_tmp_file_name)
129+
hfs.copy(tmp_file.name, cached_tmp_file_name)
146130
mt = hl.import_vcf(
147-
gcs_tmp_file_name,
131+
cached_tmp_file_name,
148132
reference_genome=reference_genome.value,
149133
drop_samples=True,
150134
skip_invalid_loci=True,
@@ -201,13 +185,12 @@ def download_and_import_clinvar_submission_summary() -> hl.Table:
201185
'wb',
202186
) as f_out:
203187
shutil.copyfileobj(f_in, f_out)
204-
205-
gcs_tmp_file_name = os.path.join(
188+
cached_tmp_file_name = os.path.join(
206189
Env.HAIL_TMP_DIR,
207190
os.path.basename(unzipped_tmp_file.name),
208191
)
209-
safely_move_to_gcs(unzipped_tmp_file.name, gcs_tmp_file_name)
210-
return import_submission_table(gcs_tmp_file_name)
192+
hfs.copy(unzipped_tmp_file.name, cached_tmp_file_name)
193+
return import_submission_table(cached_tmp_file_name)
211194

212195

213196
def import_submission_table(file_name: str) -> hl.Table:

0 commit comments

Comments
 (0)