Skip to content

Commit 63b7bb7

Browse files
authored
remove "ignore_missing_samples_when_subsetting" (#803)
* remove "ignore_when_subsetting" * sample ids
1 parent d8d84cf commit 63b7bb7

10 files changed

+1
-76
lines changed

v03_pipeline/lib/misc/sample_ids.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,6 @@ def remap_sample_ids(
6161
def subset_samples(
6262
mt: hl.MatrixTable,
6363
sample_subset_ht: hl.Table,
64-
ignore_missing_samples_when_subsetting: bool,
6564
) -> hl.MatrixTable:
6665
subset_count = sample_subset_ht.count()
6766
anti_join_ht = sample_subset_ht.anti_join(mt.cols())
@@ -78,12 +77,7 @@ def subset_samples(
7877
f"IDs that aren't in the callset: {missing_samples}\n"
7978
f'All callset sample IDs:{mt.s.collect()}'
8079
)
81-
if (
82-
subset_count > anti_join_ht_count
83-
) and ignore_missing_samples_when_subsetting:
84-
logger.info(message)
85-
else:
86-
raise MatrixTableSampleSetError(message, missing_samples)
80+
raise MatrixTableSampleSetError(message, missing_samples)
8781
logger.info(f'Subsetted to {subset_count} sample ids')
8882
mt = mt.semi_join_cols(sample_subset_ht)
8983
return mt.filter_rows(hl.agg.any(hl.is_defined(mt.GT)))

v03_pipeline/lib/misc/sample_ids_test.py

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -106,32 +106,6 @@ def test_remap_sample_ids_remap_has_missing_samples(self) -> None:
106106
ignore_missing_samples_when_remapping=False,
107107
)
108108

109-
def test_subset_samples(self):
110-
# subset 2 of 3 samples in callset
111-
sample_subset_ht = hl.Table.parallelize(
112-
[
113-
{'s': 'HG00731'},
114-
{'s': 'HG00732'},
115-
],
116-
hl.tstruct(s=hl.tstr),
117-
key='s',
118-
)
119-
120-
subset_mt = subset_samples(
121-
CALLSET_MT,
122-
sample_subset_ht,
123-
ignore_missing_samples_when_subsetting=True,
124-
)
125-
126-
self.assertEqual(subset_mt.cols().count(), 2)
127-
self.assertEqual(
128-
subset_mt.cols().collect(),
129-
[
130-
hl.Struct(col_idx=0, s='HG00731'),
131-
hl.Struct(col_idx=1, s='HG00732'),
132-
],
133-
)
134-
135109
def test_subset_samples_zero_samples(self):
136110
# subset 0 of 3 samples in callset
137111
sample_subset_ht = hl.Table.parallelize(
@@ -144,7 +118,6 @@ def test_subset_samples_zero_samples(self):
144118
subset_samples(
145119
CALLSET_MT,
146120
sample_subset_ht,
147-
ignore_missing_samples_when_subsetting=True,
148121
)
149122

150123
def test_subset_samples_missing_samples(self):
@@ -163,7 +136,6 @@ def test_subset_samples_missing_samples(self):
163136
subset_samples(
164137
CALLSET_MT,
165138
sample_subset_ht,
166-
ignore_missing_samples_when_subsetting=False,
167139
)
168140

169141
def test_subset_no_defined_gt(self):
@@ -187,6 +159,5 @@ def test_subset_no_defined_gt(self):
187159
mt = subset_samples(
188160
mt,
189161
sample_subset_ht,
190-
False,
191162
)
192163
self.assertEqual(mt.count(), (1, 1))

v03_pipeline/lib/tasks/update_lookup_table.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,6 @@ class UpdateLookupTableTask(BaseUpdateLookupTableTask):
2222
project_remap_paths = luigi.ListParameter()
2323
project_pedigree_paths = luigi.ListParameter()
2424
imputed_sex_paths = luigi.ListParameter(default=None)
25-
ignore_missing_samples_when_subsetting = luigi.BoolParameter(
26-
default=False,
27-
parsing=luigi.BoolParameter.EXPLICIT_PARSING,
28-
)
2925
ignore_missing_samples_when_remapping = luigi.BoolParameter(
3026
default=False,
3127
parsing=luigi.BoolParameter.EXPLICIT_PARSING,
@@ -84,7 +80,6 @@ def requires(self) -> list[luigi.Task]:
8480
project_remap_path,
8581
project_pedigree_path,
8682
imputed_sex_path,
87-
self.ignore_missing_samples_when_subsetting,
8883
self.ignore_missing_samples_when_remapping,
8984
self.validate,
9085
False,

v03_pipeline/lib/tasks/update_project_table.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,6 @@ class UpdateProjectTableTask(BaseUpdateProjectTableTask):
2020
project_remap_path = luigi.Parameter()
2121
project_pedigree_path = luigi.Parameter()
2222
imputed_sex_path = luigi.Parameter(default=None)
23-
ignore_missing_samples_when_subsetting = luigi.BoolParameter(
24-
default=False,
25-
parsing=luigi.BoolParameter.EXPLICIT_PARSING,
26-
)
2723
ignore_missing_samples_when_remapping = luigi.BoolParameter(
2824
default=False,
2925
parsing=luigi.BoolParameter.EXPLICIT_PARSING,
@@ -62,7 +58,6 @@ def requires(self) -> luigi.Task:
6258
self.project_remap_path,
6359
self.project_pedigree_path,
6460
self.imputed_sex_path,
65-
self.ignore_missing_samples_when_subsetting,
6661
self.ignore_missing_samples_when_remapping,
6762
self.validate,
6863
False,

v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,6 @@ class UpdateVariantAnnotationsTableWithNewSamplesTask(
2121
project_remap_paths = luigi.ListParameter()
2222
project_pedigree_paths = luigi.ListParameter()
2323
imputed_sex_paths = luigi.ListParameter(default=None)
24-
ignore_missing_samples_when_subsetting = luigi.BoolParameter(
25-
default=False,
26-
parsing=luigi.BoolParameter.EXPLICIT_PARSING,
27-
)
2824
ignore_missing_samples_when_remapping = luigi.BoolParameter(
2925
default=False,
3026
parsing=luigi.BoolParameter.EXPLICIT_PARSING,
@@ -55,7 +51,6 @@ def requires(self) -> list[luigi.Task]:
5551
self.project_remap_paths,
5652
self.project_pedigree_paths,
5753
self.imputed_sex_paths,
58-
self.ignore_missing_samples_when_subsetting,
5954
self.ignore_missing_samples_when_remapping,
6055
self.validate,
6156
self.force,

v03_pipeline/lib/tasks/write_family_table.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,6 @@ class WriteFamilyTableTask(BaseWriteTask):
1515
project_remap_path = luigi.Parameter()
1616
project_pedigree_path = luigi.Parameter()
1717
imputed_sex_path = luigi.Parameter(default=None)
18-
ignore_missing_samples_when_subsetting = luigi.BoolParameter(
19-
parsing=luigi.BoolParameter.EXPLICIT_PARSING,
20-
)
2118
ignore_missing_samples_when_remapping = luigi.BoolParameter(
2219
parsing=luigi.BoolParameter.EXPLICIT_PARSING,
2320
)
@@ -61,7 +58,6 @@ def requires(self) -> luigi.Task:
6158
self.project_remap_path,
6259
self.project_pedigree_path,
6360
self.imputed_sex_path,
64-
self.ignore_missing_samples_when_subsetting,
6561
self.ignore_missing_samples_when_remapping,
6662
self.validate,
6763
False,

v03_pipeline/lib/tasks/write_metadata_for_run.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,6 @@ class WriteMetadataForRunTask(BaseHailTableTask):
1818
project_remap_paths = luigi.ListParameter()
1919
project_pedigree_paths = luigi.ListParameter()
2020
imputed_sex_paths = luigi.ListParameter(default=None)
21-
ignore_missing_samples_when_subsetting = luigi.BoolParameter(
22-
default=False,
23-
parsing=luigi.BoolParameter.EXPLICIT_PARSING,
24-
)
2521
ignore_missing_samples_when_remapping = luigi.BoolParameter(
2622
default=False,
2723
parsing=luigi.BoolParameter.EXPLICIT_PARSING,
@@ -63,7 +59,6 @@ def requires(self) -> list[luigi.Task]:
6359
project_remap_path,
6460
project_pedigree_path,
6561
imputed_sex_path,
66-
self.ignore_missing_samples_when_subsetting,
6762
self.ignore_missing_samples_when_remapping,
6863
self.validate,
6964
self.force,

v03_pipeline/lib/tasks/write_new_variants_table.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,6 @@ class WriteNewVariantsTableTask(BaseWriteTask):
4242
project_remap_paths = luigi.ListParameter()
4343
project_pedigree_paths = luigi.ListParameter()
4444
imputed_sex_paths = luigi.ListParameter(default=None)
45-
ignore_missing_samples_when_subsetting = luigi.BoolParameter(
46-
default=False,
47-
parsing=luigi.BoolParameter.EXPLICIT_PARSING,
48-
)
4945
ignore_missing_samples_when_remapping = luigi.BoolParameter(
5046
default=False,
5147
parsing=luigi.BoolParameter.EXPLICIT_PARSING,
@@ -112,7 +108,6 @@ def requires(self) -> list[luigi.Task]:
112108
self.project_remap_paths,
113109
self.project_pedigree_paths,
114110
self.imputed_sex_paths,
115-
self.ignore_missing_samples_when_subsetting,
116111
self.ignore_missing_samples_when_remapping,
117112
self.validate,
118113
self.force,
@@ -131,7 +126,6 @@ def requires(self) -> list[luigi.Task]:
131126
project_remap_path,
132127
project_pedigree_path,
133128
imputed_sex_path,
134-
self.ignore_missing_samples_when_subsetting,
135129
self.ignore_missing_samples_when_remapping,
136130
self.validate,
137131
False,

v03_pipeline/lib/tasks/write_project_family_tables.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,6 @@ class WriteProjectFamilyTablesTask(BaseHailTableTask):
1212
project_remap_path = luigi.Parameter()
1313
project_pedigree_path = luigi.Parameter()
1414
imputed_sex_path = luigi.Parameter(default=None)
15-
ignore_missing_samples_when_subsetting = luigi.BoolParameter(
16-
default=False,
17-
parsing=luigi.BoolParameter.EXPLICIT_PARSING,
18-
)
1915
ignore_missing_samples_when_remapping = luigi.BoolParameter(
2016
default=False,
2117
parsing=luigi.BoolParameter.EXPLICIT_PARSING,
@@ -58,7 +54,6 @@ def run(self):
5854
self.project_remap_path,
5955
self.project_pedigree_path,
6056
self.imputed_sex_path,
61-
self.ignore_missing_samples_when_subsetting,
6257
self.ignore_missing_samples_when_remapping,
6358
self.validate,
6459
False,

v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,6 @@ class WriteRemappedAndSubsettedCallsetTask(BaseWriteTask):
3232
project_remap_path = luigi.Parameter()
3333
project_pedigree_path = luigi.Parameter()
3434
imputed_sex_path = luigi.Parameter(default=None)
35-
ignore_missing_samples_when_subsetting = luigi.BoolParameter(
36-
default=False,
37-
parsing=luigi.BoolParameter.EXPLICIT_PARSING,
38-
)
3935
ignore_missing_samples_when_remapping = luigi.BoolParameter(
4036
default=False,
4137
parsing=luigi.BoolParameter.EXPLICIT_PARSING,
@@ -178,7 +174,6 @@ def create_table(self) -> hl.MatrixTable:
178174
hl.tstruct(s=hl.dtype('str')),
179175
key='s',
180176
),
181-
self.ignore_missing_samples_when_subsetting,
182177
)
183178
# Drop additional fields imported onto the intermediate callsets but
184179
# not used when creating the downstream optimized tables.

0 commit comments

Comments
 (0)