Skip to content

Commit 2770d0a

Browse files
committed
comments
1 parent c8526d6 commit 2770d0a

File tree

4 files changed

+16
-30
lines changed

4 files changed

+16
-30
lines changed

v03_pipeline/lib/methods/sample_qc.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
import hail as hl
22
from gnomad.sample_qc.pipeline import filter_rows_for_qc
33

4+
from v03_pipeline.lib.misc.terra_data_repository import BIGQUERY_METRICS
45
from v03_pipeline.lib.model import SampleType
56

7+
GNOMAD_FILTER_MIN_AF = 0.001
8+
GNOMAD_FILTER_MIN_CALLRATE = 0.99
9+
610
CALLRATE_LOW_THRESHOLD = 0.85
711
CONTAMINATION_UPPER_THRESHOLD = 5
812
WES_COVERAGE_LOW_THRESHOLD = 85
@@ -14,7 +18,6 @@ def call_sample_qc(
1418
tdr_metrics_ht: hl.Table,
1519
sample_type: SampleType,
1620
):
17-
mt = mt.annotate_cols(sample_type=sample_type)
1821
mt = mt.annotate_entries(
1922
GT=hl.case()
2023
.when(mt.GT.is_diploid(), hl.call(mt.GT[0], mt.GT[1], phased=False))
@@ -28,8 +31,8 @@ def call_sample_qc(
2831
def annotate_filtered_callrate(mt: hl.MatrixTable) -> hl.MatrixTable:
2932
filtered_mt = filter_rows_for_qc(
3033
mt,
31-
min_af=0.001,
32-
min_callrate=0.99,
34+
min_af=GNOMAD_FILTER_MIN_AF,
35+
min_callrate=GNOMAD_FILTER_MIN_CALLRATE,
3336
bi_allelic_only=True,
3437
snv_only=True,
3538
apply_hard_filters=False,
@@ -62,8 +65,6 @@ def annotate_filter_flags(
6265
[hl.or_missing(filter_cond, name) for name, filter_cond in flags.items()],
6366
).filter(hl.is_defined),
6467
).drop(
65-
'contamination_rate',
66-
'percent_bases_at_20x',
67-
'mean_coverage',
68+
*BIGQUERY_METRICS[2:5],
6869
'filtered_callrate',
6970
)

v03_pipeline/lib/tasks/write_metadata_for_run_test.py

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -96,19 +96,10 @@ def test_write_metadata_for_run_task(
9696
TEST_VCF,
9797
),
9898
'sample_qc': {
99-
'HG00731': {
100-
'sample_type': 'WGS',
101-
'filter_flags': ['coverage', 'contamination'],
102-
},
103-
'HG00732': {
104-
'sample_type': 'WGS',
105-
'filter_flags': ['coverage'],
106-
},
107-
'HG00733': {
108-
'sample_type': 'WGS',
109-
'filter_flags': ['contamination'],
110-
},
111-
'NA19675': {'sample_type': 'WGS', 'filter_flags': []},
99+
'HG00731': {'filter_flags': ['coverage', 'contamination']},
100+
'HG00732': {'filter_flags': ['coverage']},
101+
'HG00733': {'filter_flags': ['contamination']},
102+
'NA19675': {'filter_flags': []},
112103
},
113104
},
114105
)

v03_pipeline/lib/tasks/write_sample_qc_json_test.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -47,23 +47,17 @@ def test_call_sample_qc(
4747

4848
self.assertCountEqual(
4949
res['HG00731'],
50-
{
51-
'sample_type': 'WGS',
52-
'filter_flags': ['contamination', 'coverage'],
53-
},
50+
{'filter_flags': ['contamination', 'coverage']},
5451
)
5552
self.assertCountEqual(
5653
res['HG00732'],
57-
{'sample_type': 'WGS', 'filter_flags': ['coverage']},
54+
{'filter_flags': ['coverage']},
5855
)
5956
self.assertCountEqual(
6057
res['HG00733'],
61-
{
62-
'sample_type': 'WGS',
63-
'filter_flags': ['contamination'],
64-
},
58+
{'filter_flags': ['contamination']},
6559
)
6660
self.assertCountEqual(
6761
res['HG00732'],
68-
{'sample_type': 'WGS', 'filter_flags': []},
62+
{'filter_flags': []},
6963
)
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"HG00731": {"sample_type": "WGS", "filter_flags": ["coverage", "contamination"]}, "HG00732": {"sample_type": "WGS", "filter_flags": ["coverage"]}, "HG00733": {"sample_type": "WGS", "filter_flags": ["contamination"]}, "NA19675": {"sample_type": "WGS", "filter_flags": []}}
1+
{"HG00731": {"filter_flags": ["coverage", "contamination"]}, "HG00732": {"filter_flags": ["coverage"]}, "HG00733": {"filter_flags": ["contamination"]}, "NA19675": {"filter_flags": []}}

0 commit comments

Comments
 (0)