Skip to content

Commit 4b32dc5

Browse files
authored
Merge pull request #720 from broadinstitute/dev
Dev
2 parents 2adaa72 + 2b703c9 commit 4b32dc5

File tree

7 files changed

+32
-23
lines changed

7 files changed

+32
-23
lines changed

v03_pipeline/lib/misc/io.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,12 @@ def select_relevant_fields(
146146
dataset_type: DatasetType,
147147
) -> hl.MatrixTable:
148148
mt = mt.select_globals()
149-
mt = mt.select_rows(*dataset_type.row_fields)
149+
optional_row_fields = [
150+
row_field
151+
for row_field in dataset_type.optional_row_fields
152+
if hasattr(mt, row_field)
153+
]
154+
mt = mt.select_rows(*dataset_type.row_fields, *optional_row_fields)
150155
mt = mt.select_cols(*dataset_type.col_fields)
151156
return mt.select_entries(*dataset_type.entries_fields)
152157

v03_pipeline/lib/model/dataset_type.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
from v03_pipeline.lib.annotations import gcnv, mito, shared, snv_indel, sv
77
from v03_pipeline.lib.model.definitions import ReferenceGenome
8-
from v03_pipeline.lib.model.environment import Env
98

109
MITO_MIN_HOM_THRESHOLD = 0.95
1110
ZERO = 0.0
@@ -65,19 +64,23 @@ def entries_fields(
6564
],
6665
}[self]
6766

67+
@property
68+
def optional_row_fields(
69+
self,
70+
) -> list[str]:
71+
return {
72+
DatasetType.SNV_INDEL: ['info'],
73+
DatasetType.MITO: [],
74+
DatasetType.SV: [],
75+
DatasetType.GCNV: [],
76+
}[self]
77+
6878
@property
6979
def row_fields(
7080
self,
7181
) -> list[str]:
7282
return {
73-
DatasetType.SNV_INDEL: (
74-
# The "info" field is used by one of the relatedness
75-
# check methods and not by an annotation method. We
76-
# only want to keep the field for callsets that require it.
77-
['rsid', 'filters', 'info']
78-
if Env.CHECK_SEX_AND_RELATEDNESS
79-
else ['rsid', 'filters']
80-
),
83+
DatasetType.SNV_INDEL: ['rsid', 'filters'],
8184
DatasetType.MITO: [
8285
'rsid',
8386
'filters',

v03_pipeline/lib/model/environment.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
ACCESS_PRIVATE_REFERENCE_DATASETS = (
66
os.environ.get('ACCESS_PRIVATE_REFERENCE_DATASETS') == '1'
77
)
8-
CHECK_SEX_AND_RELATEDNESS = os.environ.get('CHECK_SEX_AND_RELATEDNESS') == '1'
98
REFERENCE_DATA_AUTO_UPDATE = os.environ.get('REFERENCE_DATA_AUTO_UPDATE') == '1'
109
HAIL_TMPDIR = os.environ.get('HAIL_TMPDIR', '/tmp') # noqa: S108
1110
HAIL_SEARCH_DATA = os.environ.get('HAIL_SEARCH_DATA', '/hail-search-data')
@@ -25,7 +24,6 @@
2524
@dataclass
2625
class Env:
2726
ACCESS_PRIVATE_REFERENCE_DATASETS: bool = ACCESS_PRIVATE_REFERENCE_DATASETS
28-
CHECK_SEX_AND_RELATEDNESS: bool = CHECK_SEX_AND_RELATEDNESS
2927
REFERENCE_DATA_AUTO_UPDATE: bool = REFERENCE_DATA_AUTO_UPDATE
3028
HAIL_TMPDIR: str = HAIL_TMPDIR
3129
HAIL_SEARCH_DATA: str = HAIL_SEARCH_DATA

v03_pipeline/lib/tasks/write_metadata_for_run.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ class WriteMetadataForRunTask(BaseHailTableTask):
2929
default=True,
3030
parsing=luigi.BoolParameter.EXPLICIT_PARSING,
3131
)
32+
check_sex_and_relatedness = luigi.BoolParameter(
33+
default=True,
34+
parsing=luigi.BoolParameter.EXPLICIT_PARSING,
35+
)
3236
run_id = luigi.Parameter()
3337

3438
def output(self) -> luigi.Target:
@@ -56,6 +60,7 @@ def requires(self) -> luigi.Task:
5660
self.ignore_missing_samples_when_subsetting,
5761
self.ignore_missing_samples_when_remapping,
5862
self.validate,
63+
self.check_sex_and_relatedness,
5964
)
6065
for (
6166
callset_path,

v03_pipeline/lib/tasks/write_metadata_for_run_test.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ def test_write_metadata_for_run_task(self) -> None:
2424
project_remap_paths=[TEST_REMAP_2, TEST_REMAP_2],
2525
project_pedigree_paths=[TEST_PEDIGREE_3, TEST_PEDIGREE_4],
2626
validate=False,
27+
check_sex_and_relatedness=False,
2728
run_id='run_123456',
2829
)
2930
worker.add(write_metadata_for_run_task)

v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
from v03_pipeline.lib.misc.io import does_file_exist, import_pedigree, import_remap
1010
from v03_pipeline.lib.misc.pedigree import parse_pedigree_ht_to_families
1111
from v03_pipeline.lib.misc.sample_ids import remap_sample_ids, subset_samples
12-
from v03_pipeline.lib.model import Env
1312
from v03_pipeline.lib.paths import remapped_and_subsetted_callset_path
1413
from v03_pipeline.lib.tasks.base.base_write_task import BaseWriteTask
1514
from v03_pipeline.lib.tasks.files import GCSorLocalTarget, RawFileTask
@@ -37,6 +36,10 @@ class WriteRemappedAndSubsettedCallsetTask(BaseWriteTask):
3736
default=True,
3837
parsing=luigi.BoolParameter.EXPLICIT_PARSING,
3938
)
39+
check_sex_and_relatedness = luigi.BoolParameter(
40+
default=False,
41+
parsing=luigi.BoolParameter.EXPLICIT_PARSING,
42+
)
4043

4144
def output(self) -> luigi.Target:
4245
return GCSorLocalTarget(
@@ -64,7 +67,7 @@ def requires(self) -> list[luigi.Task]:
6467
RawFileTask(self.project_pedigree_path),
6568
]
6669
if (
67-
Env.CHECK_SEX_AND_RELATEDNESS
70+
self.check_sex_and_relatedness
6871
and self.dataset_type.check_sex_and_relatedness
6972
):
7073
requirements = [
@@ -109,7 +112,7 @@ def create_table(self) -> hl.MatrixTable:
109112
families_failed_relatedness_check = {}
110113
families_failed_sex_check = {}
111114
if (
112-
Env.CHECK_SEX_AND_RELATEDNESS
115+
self.check_sex_and_relatedness
113116
and self.dataset_type.check_sex_and_relatedness
114117
):
115118
relatedness_check_ht = hl.read_table(self.input()[2].path)

v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset_test.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
import shutil
2-
import unittest
3-
from unittest.mock import patch
42

53
import hail as hl
64
import luigi.worker
@@ -72,12 +70,9 @@ def setUp(self) -> None:
7270
),
7371
)
7472

75-
@patch('v03_pipeline.lib.tasks.write_remapped_and_subsetted_callset.Env')
7673
def test_write_remapped_and_subsetted_callset_task(
7774
self,
78-
mock_env: unittest.mock.Mock,
7975
) -> None:
80-
mock_env.CHECK_SEX_AND_RELATEDNESS = True
8176
worker = luigi.worker.Worker()
8277
wrsc_task = WriteRemappedAndSubsettedCallsetTask(
8378
reference_genome=ReferenceGenome.GRCh38,
@@ -88,6 +83,7 @@ def test_write_remapped_and_subsetted_callset_task(
8883
project_remap_path=TEST_REMAP,
8984
project_pedigree_path=TEST_PEDIGREE_3,
9085
validate=False,
86+
check_sex_and_relatedness=True,
9187
)
9288
worker.add(wrsc_task)
9389
worker.run()
@@ -108,12 +104,9 @@ def test_write_remapped_and_subsetted_callset_task(
108104
],
109105
)
110106

111-
@patch('v03_pipeline.lib.tasks.write_remapped_and_subsetted_callset.Env')
112107
def test_write_remapped_and_subsetted_callset_task_failed_sex_check_family(
113108
self,
114-
mock_env: unittest.mock.Mock,
115109
) -> None:
116-
mock_env.CHECK_SEX_AND_RELATEDNESS = True
117110
worker = luigi.worker.Worker()
118111
wrsc_task = WriteRemappedAndSubsettedCallsetTask(
119112
reference_genome=ReferenceGenome.GRCh38,
@@ -124,6 +117,7 @@ def test_write_remapped_and_subsetted_callset_task_failed_sex_check_family(
124117
project_remap_path=TEST_REMAP,
125118
project_pedigree_path=TEST_PEDIGREE_4,
126119
validate=False,
120+
check_sex_and_relatedness=True,
127121
)
128122
worker.add(wrsc_task)
129123
worker.run()

0 commit comments

Comments
 (0)