Skip to content

Commit 85bf51d

Browse files
authored
Merge pull request #1075 from broadinstitute/remap-from-ped-file
Remap from ped file
2 parents 08692fa + 96edfd3 commit 85bf51d

36 files changed

+110
-248
lines changed

v03_pipeline/bin/pipeline_worker.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
from v03_pipeline.lib.paths import (
1212
loading_pipeline_queue_path,
1313
project_pedigree_path,
14-
project_remap_path,
1514
)
1615
from v03_pipeline.lib.tasks.trigger_hail_backend_reload import TriggerHailBackendReload
1716
from v03_pipeline.lib.tasks.write_success_file import WriteSuccessFileTask
@@ -26,15 +25,6 @@ def main():
2625
continue
2726
with open(loading_pipeline_queue_path()) as f:
2827
lpr = LoadingPipelineRequest.model_validate_json(f.read())
29-
project_remap_paths = [
30-
project_remap_path(
31-
lpr.reference_genome,
32-
lpr.dataset_type,
33-
lpr.sample_type,
34-
project_guid,
35-
)
36-
for project_guid in lpr.projects_to_run
37-
]
3828
project_pedigree_paths = [
3929
project_pedigree_path(
4030
lpr.reference_genome,
@@ -49,7 +39,6 @@ def main():
4939
)
5040
loading_run_task_params = {
5141
'project_guids': lpr.projects_to_run,
52-
'project_remap_paths': project_remap_paths,
5342
'project_pedigree_paths': project_pedigree_paths,
5443
'run_id': run_id,
5544
**{k: v for k, v in lpr.model_dump().items() if k != 'projects_to_run'},

v03_pipeline/lib/misc/io.py

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -283,31 +283,21 @@ def import_tdr_qc_metrics(file_path: str) -> hl.Table:
283283
return ht.key_by(ht.s)
284284

285285

286-
def import_remap(remap_path: str) -> hl.Table:
287-
ht = hl.import_table(remap_path)
288-
ht = ht.select(
289-
s=ht.s,
290-
seqr_id=ht.seqr_id,
291-
)
292-
return ht.key_by(ht.s)
293-
294-
295286
def import_pedigree(pedigree_path: str) -> hl.Table:
296287
ht = hl.import_table(pedigree_path, missing='')
288+
optional_selects = {'remap_id': ht.VCF_ID} if 'VCF_ID' in ht.row else {}
297289
return ht.select(
298290
sex=ht.Sex,
299291
family_guid=ht.Family_GUID,
300292
s=ht.Individual_ID,
301293
maternal_s=ht.Maternal_ID,
302294
paternal_s=ht.Paternal_ID,
295+
**optional_selects,
303296
)
304297

305298

306-
def remap_pedigree_hash(remap_path: str, pedigree_path: str) -> hl.Int32Expression:
299+
def remap_pedigree_hash(pedigree_path: str) -> hl.Int32Expression:
307300
sha256 = hashlib.sha256()
308-
if hfs.exists(remap_path):
309-
with hfs.open(remap_path) as f1:
310-
sha256.update(f1.read().encode('utf8'))
311301
with hfs.open(pedigree_path) as f2:
312302
sha256.update(f2.read().encode('utf8'))
313303
# maximum 4 byte int

v03_pipeline/lib/misc/io_test.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,8 @@
2020
'v03_pipeline/var/test/sex_check/test_imputed_sex_unexpected_value.tsv'
2121
)
2222
TEST_INVALID_VCF = 'v03_pipeline/var/test/callsets/improperly_formatted.vcf'
23-
TEST_PEDIGREE_3 = 'v03_pipeline/var/test/pedigrees/test_pedigree_3.tsv'
23+
TEST_PEDIGREE_3_REMAP = 'v03_pipeline/var/test/pedigrees/test_pedigree_3_remap.tsv'
2424
TEST_MITO_MT = 'v03_pipeline/var/test/callsets/mito_1.mt'
25-
TEST_REMAP = 'v03_pipeline/var/test/remaps/test_remap_1.tsv'
2625

2726

2827
class IOTest(unittest.TestCase):
@@ -61,11 +60,10 @@ def test_remap_pedigree_hash(self) -> None:
6160
self.assertEqual(
6261
hl.eval(
6362
remap_pedigree_hash(
64-
TEST_REMAP,
65-
TEST_PEDIGREE_3,
63+
TEST_PEDIGREE_3_REMAP,
6664
),
6765
),
68-
-560434714,
66+
573002191,
6967
)
7068

7169
def test_import_vcf(self) -> None:
@@ -97,7 +95,7 @@ def test_import_vcf(self) -> None:
9795
SeqrValidationError,
9896
'VCF failed file format validation: Your input file has a malformed header: We never saw the required CHROM header line \\(starting with one #\\) for the input VCF file',
9997
import_vcf,
100-
TEST_PEDIGREE_3,
98+
TEST_PEDIGREE_3_REMAP,
10199
ReferenceGenome.GRCh38,
102100
)
103101
self.assertRaisesRegex(

v03_pipeline/lib/misc/pedigree.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,3 +170,10 @@ def parse_pedigree_ht_to_families(
170170
):
171171
families.add(Family.parse(family_guid, list(rows)))
172172
return families
173+
174+
175+
def parse_pedigree_ht_to_remap_ht(pedigree_ht: hl.Table) -> hl.Table:
176+
ht = pedigree_ht.filter(hl.is_defined(pedigree_ht.remap_id))
177+
ht = ht.annotate(seqr_id=ht.s)
178+
ht = ht.key_by(s=ht.remap_id)
179+
return ht.select('seqr_id')

v03_pipeline/lib/paths.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -399,24 +399,6 @@ def clinvar_dataset_path(reference_genome: ReferenceGenome, etag: str) -> str:
399399
)
400400

401401

402-
def project_remap_path(
403-
reference_genome: ReferenceGenome,
404-
dataset_type: DatasetType,
405-
sample_type: SampleType,
406-
project_guid: str,
407-
) -> str:
408-
return os.path.join(
409-
pipeline_prefix(
410-
Env.LOADING_DATASETS_DIR,
411-
reference_genome,
412-
dataset_type,
413-
),
414-
'remaps',
415-
sample_type.value,
416-
f'{project_guid}_remap.tsv',
417-
)
418-
419-
420402
def project_pedigree_path(
421403
reference_genome: ReferenceGenome,
422404
dataset_type: DatasetType,

v03_pipeline/lib/paths_test.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
metadata_for_run_path,
1616
new_variants_table_path,
1717
project_pedigree_path,
18-
project_remap_path,
1918
project_table_path,
2019
relatedness_check_table_path,
2120
remapped_and_subsetted_callset_path,
@@ -225,17 +224,6 @@ def test_new_variants_table_path(self) -> None:
225224
'/var/seqr/seqr-hail-search-data/v3.1/GRCh38/SNV_INDEL/runs/manual__2023-06-26T18:30:09.349671+00:00/new_variants.ht',
226225
)
227226

228-
def test_project_remap_path(self) -> None:
229-
self.assertEqual(
230-
project_remap_path(
231-
ReferenceGenome.GRCh38,
232-
DatasetType.SNV_INDEL,
233-
SampleType.WGS,
234-
'R0652_pipeline_test',
235-
),
236-
'/var/seqr/seqr-loading-temp/v3.1/GRCh38/SNV_INDEL/remaps/WGS/R0652_pipeline_test_remap.tsv',
237-
)
238-
239227
def test_project_pedigree_path(self) -> None:
240228
self.assertEqual(
241229
project_pedigree_path(

v03_pipeline/lib/tasks/base/base_loading_run_params.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ class BaseLoadingRunParams(luigi.Task):
2020
sample_type = luigi.EnumParameter(enum=SampleType)
2121
callset_path = luigi.Parameter()
2222
project_guids = luigi.ListParameter(default=[])
23-
project_remap_paths = luigi.ListParameter(default=[])
2423
project_pedigree_paths = luigi.ListParameter(default=[])
2524
skip_check_sex_and_relatedness = luigi.BoolParameter(
2625
default=False,

v03_pipeline/lib/tasks/dataproc/misc_test.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ def test_to_kebab_str_args(self, _: Mock):
1919
sample_type=SampleType.WGS,
2020
callset_path='test_callset',
2121
project_guids=['R0113_test_project'],
22-
project_remap_paths=['test_remap'],
2322
project_pedigree_paths=['test_pedigree'],
2423
run_id='a_misc_run',
2524
)
@@ -38,8 +37,6 @@ def test_to_kebab_str_args(self, _: Mock):
3837
'test_callset',
3938
'--project-guids',
4039
'["R0113_test_project"]',
41-
'--project-remap-paths',
42-
'["test_remap"]',
4340
'--project-pedigree-paths',
4441
'["test_pedigree"]',
4542
'--skip-check-sex-and-relatedness',

v03_pipeline/lib/tasks/dataproc/rsync_to_seqr_app_dirs_test.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ def test_rsync_to_seqr_app_dirs_no_sync(
3434
sample_type=SampleType.WGS,
3535
callset_path='test_callset',
3636
project_guids=['R0113_test_project'],
37-
project_remap_paths=['test_remap'],
3837
project_pedigree_paths=['test_pedigree'],
3938
run_id='manual__2024-04-01',
4039
)
@@ -77,7 +76,6 @@ def test_rsync_to_seqr_app_dirs_sync(
7776
sample_type=SampleType.WGS,
7877
callset_path='test_callset',
7978
project_guids=['R0113_test_project'],
80-
project_remap_paths=['test_remap'],
8179
project_pedigree_paths=['test_pedigree'],
8280
run_id='manual__2024-04-02',
8381
)

v03_pipeline/lib/tasks/dataproc/run_pipeline_on_dataproc_test.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ def test_job_already_exists_failed(
4545
sample_type=SampleType.WGS,
4646
callset_path='test_callset',
4747
project_guids=['R0113_test_project'],
48-
project_remap_paths=['test_remap'],
4948
project_pedigree_paths=['test_pedigree'],
5049
run_id='manual__2024-04-03',
5150
)
@@ -79,7 +78,6 @@ def test_job_already_exists_success(
7978
sample_type=SampleType.WGS,
8079
callset_path='test_callset',
8180
project_guids=['R0113_test_project'],
82-
project_remap_paths=['test_remap'],
8381
project_pedigree_paths=['test_pedigree'],
8482
run_id='manual__2024-04-04',
8583
)
@@ -111,7 +109,6 @@ def test_job_failed(
111109
sample_type=SampleType.WGS,
112110
callset_path='test_callset',
113111
project_guids=['R0113_test_project'],
114-
project_remap_paths=['test_remap'],
115112
project_pedigree_paths=['test_pedigree'],
116113
run_id='manual__2024-04-05',
117114
)
@@ -152,7 +149,6 @@ def test_job_success(
152149
sample_type=SampleType.WGS,
153150
callset_path='test_callset',
154151
project_guids=['R0113_test_project'],
155-
project_remap_paths=['test_remap'],
156152
project_pedigree_paths=['test_pedigree'],
157153
run_id='manual__2024-04-06',
158154
)

0 commit comments

Comments
 (0)