Skip to content

Commit 6206bdc

Browse files
committed
merge
2 parents 6d6b8d5 + b3e996a commit 6206bdc

29 files changed

+47
-36
lines changed

download_and_create_reference_datasets/v02/mito/write_mito_mitimpact_ht.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@
33
from download_and_create_reference_datasets.v02.mito.utils import load
44

55
CONFIG = {
6-
'input_path': 'https://mitimpact.css-mendel.it/cdn/MitImpact_db_3.0.7.txt.zip',
6+
'input_path': 'https://mitimpact.css-mendel.it/cdn/MitImpact_db_3.1.3.txt.zip',
77
'input_type': 'tsv',
8-
'output_path': 'gs://seqr-reference-data/GRCh38/mitochondrial/MitImpact/MitImpact_db_3.0.7.ht',
8+
'output_path': 'gs://seqr-reference-data/GRCh38/mitochondrial/MitImpact/MitImpact_db_3.1.3.ht',
99
'annotate': {
1010
'locus': lambda ht: hl.locus('chrM', hl.parse_int32(ht.Start)),
1111
'alleles': lambda ht: [ht.Ref, ht.Alt],
12-
'APOGEE_score': lambda ht: hl.parse_float(ht.APOGEE_score),
12+
'APOGEE2_score': lambda ht: hl.parse_float(ht.APOGEE2_score),
1313
},
1414
}
1515

v03_pipeline/lib/misc/pedigree.py

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def __hash__(self):
8181
return hash(self.family_guid)
8282

8383
@staticmethod
84-
def parse_direct_lineage(rows: list[hl.Struct]) -> dict[str, Sample]: # noqa: C901
84+
def parse_direct_lineage(rows: list[hl.Struct]) -> dict[str, Sample]:
8585
samples = {}
8686
for row in rows:
8787
samples[row.s] = Sample(
@@ -94,22 +94,15 @@ def parse_direct_lineage(rows: list[hl.Struct]) -> dict[str, Sample]: # noqa: C
9494
for row in rows:
9595
# Maternal GrandParents
9696
maternal_s = samples[row.s].mother
97-
if maternal_s and maternal_s not in samples:
98-
# A sample id may be referenced for a proband that has been
99-
# removed from the pedigree as an individual. We handle this by
100-
# nulling out the parent here.
101-
samples[row.s].mother = None
102-
elif maternal_s:
97+
if maternal_s and maternal_s in samples:
10398
if samples[maternal_s].mother:
10499
samples[row.s].maternal_grandmother = samples[maternal_s].mother
105100
if samples[maternal_s].father:
106101
samples[row.s].maternal_grandfather = samples[maternal_s].father
107102

108103
# Paternal GrandParents
109104
paternal_s = samples[row.s].father
110-
if paternal_s and paternal_s not in samples:
111-
samples[row.s].father = None
112-
elif paternal_s:
105+
if paternal_s and paternal_s in samples:
113106
if samples[paternal_s].mother:
114107
samples[row.s].paternal_grandmother = samples[paternal_s].mother
115108
if samples[paternal_s].father:

v03_pipeline/lib/misc/pedigree_test.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -479,7 +479,13 @@ def test_subsetted_pedigree_with_removed_parent(self) -> None:
479479
),
480480
)
481481
self.assertEqual(len(family.samples), 2)
482-
self.assertIsNone(family.samples['BBL_BC1-000345_01_D1'].father)
482+
self.assertFalse(
483+
'BBL_BC1-000345_02_D1' in family.samples,
484+
)
485+
self.assertEqual(
486+
family.samples['BBL_BC1-000345_01_D1'].father,
487+
'BBL_BC1-000345_02_D1',
488+
)
483489
self.assertEqual(
484490
family.samples['BBL_BC1-000345_01_D1'].mother,
485491
'BBL_BC1-000345_03_D1',

v03_pipeline/lib/model/cached_reference_dataset_query.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55

66
from v03_pipeline.lib.model.dataset_type import DatasetType
77
from v03_pipeline.lib.model.definitions import ReferenceGenome
8+
from v03_pipeline.lib.model.reference_dataset_collection import (
9+
ReferenceDatasetCollection,
10+
)
811
from v03_pipeline.lib.reference_data.queries import (
912
clinvar_path_variants,
1013
gnomad_coding_and_noncoding_variants,
@@ -30,11 +33,13 @@ def dataset(self, dataset_type: DatasetType) -> str | None:
3033
}.get(self)
3134

3235
@property
33-
def query_raw_dataset(self) -> bool:
36+
def reference_dataset_collection(self) -> ReferenceDatasetCollection:
3437
return {
35-
CachedReferenceDatasetQuery.GNOMAD_CODING_AND_NONCODING_VARIANTS: True,
36-
CachedReferenceDatasetQuery.GNOMAD_QC: True,
37-
}.get(self, False)
38+
CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS: ReferenceDatasetCollection.COMBINED,
39+
CachedReferenceDatasetQuery.GNOMAD_CODING_AND_NONCODING_VARIANTS: None,
40+
CachedReferenceDatasetQuery.GNOMAD_QC: None,
41+
CachedReferenceDatasetQuery.HIGH_AF_VARIANTS: ReferenceDatasetCollection.COMBINED,
42+
}[self]
3843

3944
@property
4045
def query(self) -> Callable[[hl.Table, ReferenceGenome], hl.Table]:

v03_pipeline/lib/reference_data/config.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -503,9 +503,9 @@ def custom_mpc_select(ht):
503503
},
504504
'mitimpact': {
505505
'38': {
506-
'version': '3.0.7',
507-
'path': 'gs://seqr-reference-data/GRCh38/mitochondrial/MitImpact/MitImpact_db_3.0.7.ht',
508-
'select': {'score': 'APOGEE_score'},
506+
'version': '3.1.3',
507+
'path': 'gs://seqr-reference-data/GRCh38/mitochondrial/MitImpact/MitImpact_db_3.1.3.ht',
508+
'select': {'score': 'APOGEE2_score'},
509509
},
510510
},
511511
'hmtvar': {

v03_pipeline/lib/tasks/reference_data/update_variant_annotations_table_with_updated_reference_dataset_test.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -604,13 +604,13 @@
604604
'mitimpact': {
605605
'38': {
606606
**CONFIG['mitimpact']['38'],
607-
'source_path': 'gs://seqr-reference-data/GRCh38/mitochondrial/MitImpact/MitImpact_db_3.0.7.ht',
607+
'source_path': 'gs://seqr-reference-data/GRCh38/mitochondrial/MitImpact/MitImpact_db_3.1.3.ht',
608608
'custom_import': lambda *_: hl.Table.parallelize(
609609
[],
610610
hl.tstruct(
611611
locus=hl.tlocus('GRCh38'),
612612
alleles=hl.tarray(hl.tstr),
613-
APOGEE_score=hl.tfloat64,
613+
APOGEE2_score=hl.tfloat64,
614614
),
615615
key=['locus', 'alleles'],
616616
globals=hl.Struct(),
@@ -1008,7 +1008,7 @@ def test_update_vat_with_updated_rdc_mito_38(
10081008
helix_mito='gs://seqr-reference-data/GRCh38/mitochondrial/Helix/HelixMTdb_20200327.ht',
10091009
hmtvar='gs://seqr-reference-data/GRCh38/mitochondrial/HmtVar/HmtVar%20Jan.%2010%202022.ht',
10101010
mitomap='gs://seqr-reference-data/GRCh38/mitochondrial/MITOMAP/mitomap-confirmed-mutations-2022-02-04.ht',
1011-
mitimpact='gs://seqr-reference-data/GRCh38/mitochondrial/MitImpact/MitImpact_db_3.0.7.ht',
1011+
mitimpact='gs://seqr-reference-data/GRCh38/mitochondrial/MitImpact/MitImpact_db_3.1.3.ht',
10121012
clinvar_mito='https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh38/clinvar.vcf.gz',
10131013
dbnsfp_mito='gs://seqr-reference-data/GRCh38/dbNSFP/v4.2/dbNSFP4.2a_variant.with_new_scores.ht',
10141014
high_constraint_region_mito='gs://seqr-reference-data/GRCh38/mitochondrial/Helix high constraint intervals Feb-15-2022.tsv',
@@ -1019,7 +1019,7 @@ def test_update_vat_with_updated_rdc_mito_38(
10191019
helix_mito='20200327',
10201020
hmtvar='Jan. 10 2022',
10211021
mitomap='Feb. 04 2022',
1022-
mitimpact='3.0.7',
1022+
mitimpact='3.1.3',
10231023
clinvar_mito='2023-07-22',
10241024
dbnsfp_mito='4.2',
10251025
high_constraint_region_mito='Feb-15-2022',
@@ -1095,7 +1095,7 @@ def test_update_vat_with_updated_rdc_mito_38(
10951095
),
10961096
hmtvar=hl.Struct(score=0.6700000166893005),
10971097
mitomap=None,
1098-
mitimpact=hl.Struct(score=0.5199999809265137),
1098+
mitimpact=hl.Struct(score=0.42500001192092896),
10991099
high_constraint_region_mito=True,
11001100
local_constraint_mito=hl.Struct(score=0.5),
11011101
),

v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ def output(self) -> luigi.Target:
7272
)
7373

7474
def requires(self) -> luigi.Task:
75-
if self.crdq.query_raw_dataset:
75+
if not self.crdq.reference_dataset_collection:
7676
return HailTableTask(
7777
get_ht_path(
7878
CONFIG[self.crdq.dataset(self.dataset_type)][
@@ -83,7 +83,7 @@ def requires(self) -> luigi.Task:
8383
# Special nested import to avoid a circular dependency issue
8484
# (ValidateCallset -> this file -> UpdatedReferenceDatasetCollection -> ValidateCallset)
8585
# The specific CRDQ referenced in ValidateCallset will never reach
86-
# this line due to it being a "query_raw_dataset". In theory this
86+
# this line due to it being a raw dataset query. In theory this
8787
# would be fixed by splitting the CRDQ into raw_dataset and non-raw_dataset
8888
# queries.
8989
from v03_pipeline.lib.tasks.reference_data.updated_reference_dataset_collection import (
@@ -97,7 +97,7 @@ def requires(self) -> luigi.Task:
9797

9898
def create_table(self) -> hl.Table:
9999
dataset: str = self.crdq.dataset(self.dataset_type)
100-
if self.crdq.query_raw_dataset:
100+
if not self.crdq.reference_dataset_collection:
101101
query_ht = import_ht_from_config_path(
102102
CONFIG[dataset][self.reference_genome.v02_value],
103103
dataset,
@@ -120,21 +120,21 @@ def create_table(self) -> hl.Table:
120120
paths=hl.Struct(
121121
**{
122122
dataset: query_ht.index_globals().path
123-
if self.crdq.query_raw_dataset
123+
if not self.crdq.reference_dataset_collection
124124
else query_ht.index_globals().paths[dataset],
125125
},
126126
),
127127
versions=hl.Struct(
128128
**{
129129
dataset: query_ht.index_globals().version
130-
if self.crdq.query_raw_dataset
130+
if not self.crdq.reference_dataset_collection
131131
else query_ht.index_globals().versions[dataset],
132132
},
133133
),
134134
enums=hl.Struct(
135135
**{
136136
dataset: query_ht.index_globals().enums
137-
if self.crdq.query_raw_dataset
137+
if not self.crdq.reference_dataset_collection
138138
else query_ht.index_globals().enums[dataset],
139139
},
140140
),

v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -971,7 +971,7 @@ def test_mito_update_vat(
971971
helix_mito='gs://seqr-reference-data/GRCh38/mitochondrial/Helix/HelixMTdb_20200327.ht',
972972
hmtvar='gs://seqr-reference-data/GRCh38/mitochondrial/HmtVar/HmtVar%20Jan.%2010%202022.ht',
973973
mitomap='gs://seqr-reference-data/GRCh38/mitochondrial/MITOMAP/mitomap-confirmed-mutations-2022-02-04.ht',
974-
mitimpact='gs://seqr-reference-data/GRCh38/mitochondrial/MitImpact/MitImpact_db_3.0.7.ht',
974+
mitimpact='gs://seqr-reference-data/GRCh38/mitochondrial/MitImpact/MitImpact_db_3.1.3.ht',
975975
local_constraint_mito='gs://seqr-reference-data/GRCh38/mitochondrial/local_constraint.tsv',
976976
),
977977
versions=hl.Struct(
@@ -982,7 +982,7 @@ def test_mito_update_vat(
982982
helix_mito='20200327',
983983
hmtvar='Jan. 10 2022',
984984
mitomap='Feb. 04 2022',
985-
mitimpact='3.0.7',
985+
mitimpact='3.1.3',
986986
local_constraint_mito='2024-07-24',
987987
),
988988
enums=hl.Struct(

v03_pipeline/lib/tasks/write_success_file.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from v03_pipeline.lib.tasks.update_variant_annotations_table_with_new_samples import (
1111
UpdateVariantAnnotationsTableWithNewSamplesTask,
1212
)
13+
from v03_pipeline.lib.tasks.write_metadata_for_run import WriteMetadataForRunTask
1314

1415

1516
@luigi.util.inherits(BaseLoadingRunParams)
@@ -25,6 +26,7 @@ def output(self) -> luigi.Target:
2526

2627
def requires(self):
2728
requirements = [
29+
self.clone(WriteMetadataForRunTask),
2830
self.clone(UpdateVariantAnnotationsTableWithNewSamplesTask),
2931
]
3032
return [

v03_pipeline/lib/tasks/write_success_file_test.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99

1010

1111
class WriteSuccessFileTaskTest(MockedDatarootTestCase):
12+
@mock.patch(
13+
'v03_pipeline.lib.tasks.write_success_file.WriteMetadataForRunTask',
14+
)
1215
@mock.patch(
1316
'v03_pipeline.lib.tasks.write_success_file.WriteProjectFamilyTablesTask',
1417
)
@@ -19,7 +22,9 @@ def test_write_success_file_task(
1922
self,
2023
mock_update_variant_annotations_task,
2124
mock_write_project_fam_tables,
25+
mock_write_metadata_for_run_task,
2226
) -> None:
27+
mock_write_metadata_for_run_task.return_value = MockCompleteTask()
2328
mock_update_variant_annotations_task.return_value = MockCompleteTask()
2429
mock_write_project_fam_tables.return_value = MockCompleteTask()
2530

0 commit comments

Comments
 (0)