Skip to content

Commit a13f781

Browse files
committed
Merge branch 'dev' of github.com:broadinstitute/seqr-loading-pipelines into benb/lookup_table_refactor
2 parents 7644ea5 + 1a78183 commit a13f781

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+40
-9
lines changed

v03_pipeline/lib/annotations/enums.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@
198198
CLINVAR_PATHOGENICITIES = [
199199
'Pathogenic',
200200
'Pathogenic/Likely_pathogenic',
201+
'Pathogenic/Likely_pathogenic/Established_risk_allele',
201202
'Pathogenic/Likely_pathogenic/Likely_risk_allele',
202203
'Pathogenic/Likely_risk_allele',
203204
'Likely_pathogenic',

v03_pipeline/lib/logger.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
'': {
2121
'level': 'INFO',
2222
'handlers': ['default'],
23-
'propagate': True,
23+
'propagate': False,
2424
},
2525
'py4j': {
2626
'level': 'CRITICAL',

v03_pipeline/lib/reference_data/clinvar.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ def parsed_clnsig(ht: hl.Table):
6464
'Likely_pathogenic,_low_penetrance',
6565
'Likely_pathogenic|low_penetrance',
6666
)
67+
.replace(
68+
'/Pathogenic,_low_penetrance/Established_risk_allele',
69+
'/Established_risk_allele|low_penetrance',
70+
)
6771
.replace(
6872
'/Pathogenic,_low_penetrance',
6973
'|low_penetrance',

v03_pipeline/lib/reference_data/clinvar_test.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,14 @@ def test_parsed_clnsig(self):
3131
},
3232
{'info': hl.Struct(CLNSIG=['Likely_pathogenic', '_low_penetrance'])},
3333
{'info': hl.Struct(CLNSIG=['association|protective'])},
34+
{
35+
'info': hl.Struct(
36+
CLNSIG=[
37+
'Pathogenic/Likely_pathogenic/Pathogenic',
38+
'_low_penetrance/Established_risk_allele',
39+
],
40+
),
41+
},
3442
],
3543
hl.tstruct(info=hl.tstruct(CLNSIG=hl.tarray(hl.tstr))),
3644
)
@@ -42,6 +50,10 @@ def test_parsed_clnsig(self):
4250
['Likely_pathogenic', 'low_penetrance', 'association', 'protective'],
4351
['Likely_pathogenic', 'low_penetrance'],
4452
['association', 'protective'],
53+
[
54+
'Pathogenic/Likely_pathogenic/Established_risk_allele',
55+
'low_penetrance',
56+
],
4557
],
4658
)
4759

@@ -66,8 +78,8 @@ def test_parsed_and_mapped_clnsigconf(self):
6678
None,
6779
[
6880
hl.Struct(count=9, pathogenicity_id=0),
69-
hl.Struct(count=2, pathogenicity_id=4),
70-
hl.Struct(count=1, pathogenicity_id=11),
81+
hl.Struct(count=2, pathogenicity_id=5),
82+
hl.Struct(count=1, pathogenicity_id=12),
7183
],
7284
],
7385
)

v03_pipeline/lib/reference_data/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ def custom_mpc_select(ht):
192192
'pathogenicity': CLINVAR_PATHOGENICITIES,
193193
'assertion': CLINVAR_ASSERTIONS,
194194
},
195-
'filter': lambda ht: ht.locus.contig != 'MT',
195+
'filter': lambda ht: ~(ht.locus.contig == 'MT'),
196196
},
197197
'38': {
198198
'custom_import': download_and_import_latest_clinvar_vcf,
@@ -203,7 +203,7 @@ def custom_mpc_select(ht):
203203
'pathogenicity': CLINVAR_PATHOGENICITIES,
204204
'assertion': CLINVAR_ASSERTIONS,
205205
},
206-
'filter': lambda ht: ht.locus.contig != 'chrM',
206+
'filter': lambda ht: ~(ht.locus.contig == 'chrM'),
207207
},
208208
},
209209
'dbnsfp': {

v03_pipeline/lib/reference_data/dataset_table_operations.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ def parse_dataset_version(
179179
.when(hl.is_missing(annotated_version), config_version)
180180
.when(annotated_version == config_version, config_version)
181181
.or_error(
182-
f'found mismatching versions for dataset {dataset}, {config_version}, {hl.eval(annotated_version)}',
182+
f'found mismatching versions for dataset {dataset}. config version: {config_version}, ht version: {annotated_version}',
183183
)
184184
)
185185

v03_pipeline/lib/tasks/base/base_hail_table_task.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,13 @@ def init_hail(self):
2929

3030
# NB: these are defined over luigi.Task instead of the BaseHailTableTask so that
3131
# they work on file dependencies.
32+
33+
34+
@luigi.Task.event_handler(luigi.Event.DEPENDENCY_DISCOVERED)
35+
def dependency_discovered(task, dependency):
36+
logger.info(f'{task} dependency_discovered {dependency}')
37+
38+
3239
@luigi.Task.event_handler(luigi.Event.DEPENDENCY_MISSING)
3340
def dependency_missing(task):
3441
logger.info(f'{task} dependency_missing at {task.output()}')

v03_pipeline/lib/tasks/reference_data/update_variant_annotations_table_with_updated_reference_dataset.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import hail as hl
22

33
from v03_pipeline.lib.annotations.fields import get_fields
4+
from v03_pipeline.lib.logger import get_logger
45
from v03_pipeline.lib.model import ReferenceDatasetCollection
56
from v03_pipeline.lib.reference_data.compare_globals import (
67
Globals,
@@ -10,6 +11,8 @@
1011
BaseVariantAnnotationsTableTask,
1112
)
1213

14+
logger = get_logger(__name__)
15+
1316

1417
class UpdateVariantAnnotationsTableWithUpdatedReferenceDataset(
1518
BaseVariantAnnotationsTableTask,
@@ -26,6 +29,9 @@ def reference_dataset_collections(self) -> list[ReferenceDatasetCollection]:
2629
)
2730

2831
def complete(self) -> bool:
32+
logger.info(
33+
'Checking if UpdateVariantAnnotationsTableWithUpdatedReferenceDataset is complete',
34+
)
2935
self._datasets_to_update = []
3036

3137
if not super().complete():
@@ -56,6 +62,7 @@ def complete(self) -> bool:
5662
self.dataset_type,
5763
),
5864
)
65+
logger.info(f'Datasets to update: {self._datasets_to_update}')
5966
return not self._datasets_to_update
6067

6168
def update_table(self, ht: hl.Table) -> hl.Table:
Binary file not shown.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
This folder comprises a Hail (www.hail.is) native Table or MatrixTable.
22
Written with version 0.2.128-eead8100a1c1
3-
Created at 2024/02/28 14:36:31
3+
Created at 2024/03/01 15:52:48

0 commit comments

Comments
 (0)