1
1
import gzip
2
2
import os
3
- import shutil
4
3
import subprocess
5
4
import tempfile
6
5
import urllib
@@ -166,7 +165,6 @@ def join_to_submission_summary_ht(vcf_ht: hl.Table) -> hl.Table:
166
165
Submitters = hl .agg .collect (ht .Submitter ),
167
166
Conditions = hl .agg .collect (ht .ReportedPhenotypeInfo ),
168
167
)
169
- ht = ht .key_by ('VariationID' )
170
168
return vcf_ht .annotate (
171
169
submitters = ht [vcf_ht .rsid ].Submitters ,
172
170
conditions = ht [vcf_ht .rsid ].Conditions ,
@@ -177,20 +175,13 @@ def download_and_import_clinvar_submission_summary() -> hl.Table:
177
175
with tempfile .NamedTemporaryFile (
178
176
suffix = '.txt.gz' ,
179
177
delete = False ,
180
- ) as tmp_file , tempfile .NamedTemporaryFile (
181
- suffix = '.txt' ,
182
- delete = False ,
183
- ) as unzipped_tmp_file :
178
+ ) as tmp_file :
184
179
urllib .request .urlretrieve (CLINVAR_SUBMISSION_SUMMARY_URL , tmp_file .name ) # noqa: S310
185
- # Unzip the gzipped file first to fix gzip files being read by hail with single partition
186
- with gzip .open (tmp_file .name , 'rb' ) as f_in , open (unzipped_tmp_file .name , 'wb' ) as f_out :
187
- shutil .copyfileobj (f_in , f_out )
188
-
189
180
gcs_tmp_file_name = os .path .join (
190
181
Env .HAIL_TMPDIR ,
191
- os .path .basename (unzipped_tmp_file .name ),
182
+ os .path .basename (tmp_file .name ),
192
183
)
193
- safely_move_to_gcs (unzipped_tmp_file .name , gcs_tmp_file_name )
184
+ safely_move_to_gcs (tmp_file .name , gcs_tmp_file_name )
194
185
return hl .import_table (
195
186
gcs_tmp_file_name ,
196
187
force = True ,
0 commit comments