diff --git a/v03_pipeline/lib/misc/sample_ids.py b/v03_pipeline/lib/misc/sample_ids.py index ca5407b5e..77f173ffc 100644 --- a/v03_pipeline/lib/misc/sample_ids.py +++ b/v03_pipeline/lib/misc/sample_ids.py @@ -61,7 +61,6 @@ def remap_sample_ids( def subset_samples( mt: hl.MatrixTable, sample_subset_ht: hl.Table, - ignore_missing_samples_when_subsetting: bool, ) -> hl.MatrixTable: subset_count = sample_subset_ht.count() anti_join_ht = sample_subset_ht.anti_join(mt.cols()) @@ -78,12 +77,7 @@ def subset_samples( f"IDs that aren't in the callset: {missing_samples}\n" f'All callset sample IDs:{mt.s.collect()}' ) - if ( - subset_count > anti_join_ht_count - ) and ignore_missing_samples_when_subsetting: - logger.info(message) - else: - raise MatrixTableSampleSetError(message, missing_samples) + raise MatrixTableSampleSetError(message, missing_samples) logger.info(f'Subsetted to {subset_count} sample ids') mt = mt.semi_join_cols(sample_subset_ht) return mt.filter_rows(hl.agg.any(hl.is_defined(mt.GT))) diff --git a/v03_pipeline/lib/misc/sample_ids_test.py b/v03_pipeline/lib/misc/sample_ids_test.py index 41f53d9c8..db264a1f9 100644 --- a/v03_pipeline/lib/misc/sample_ids_test.py +++ b/v03_pipeline/lib/misc/sample_ids_test.py @@ -106,32 +106,6 @@ def test_remap_sample_ids_remap_has_missing_samples(self) -> None: ignore_missing_samples_when_remapping=False, ) - def test_subset_samples(self): - # subset 2 of 3 samples in callset - sample_subset_ht = hl.Table.parallelize( - [ - {'s': 'HG00731'}, - {'s': 'HG00732'}, - ], - hl.tstruct(s=hl.tstr), - key='s', - ) - - subset_mt = subset_samples( - CALLSET_MT, - sample_subset_ht, - ignore_missing_samples_when_subsetting=True, - ) - - self.assertEqual(subset_mt.cols().count(), 2) - self.assertEqual( - subset_mt.cols().collect(), - [ - hl.Struct(col_idx=0, s='HG00731'), - hl.Struct(col_idx=1, s='HG00732'), - ], - ) - def test_subset_samples_zero_samples(self): # subset 0 of 3 samples in callset sample_subset_ht = hl.Table.parallelize( @@ -144,7 +118,6 @@ def test_subset_samples_zero_samples(self): subset_samples( CALLSET_MT, sample_subset_ht, - ignore_missing_samples_when_subsetting=True, ) def test_subset_samples_missing_samples(self): @@ -163,7 +136,6 @@ def test_subset_samples_missing_samples(self): subset_samples( CALLSET_MT, sample_subset_ht, - ignore_missing_samples_when_subsetting=False, ) def test_subset_no_defined_gt(self): @@ -187,6 +159,5 @@ def test_subset_no_defined_gt(self): mt = subset_samples( mt, sample_subset_ht, - False, ) self.assertEqual(mt.count(), (1, 1)) diff --git a/v03_pipeline/lib/tasks/update_lookup_table.py b/v03_pipeline/lib/tasks/update_lookup_table.py index 3844401bf..eb6068e76 100644 --- a/v03_pipeline/lib/tasks/update_lookup_table.py +++ b/v03_pipeline/lib/tasks/update_lookup_table.py @@ -22,10 +22,6 @@ class UpdateLookupTableTask(BaseUpdateLookupTableTask): project_remap_paths = luigi.ListParameter() project_pedigree_paths = luigi.ListParameter() imputed_sex_paths = luigi.ListParameter(default=None) - ignore_missing_samples_when_subsetting = luigi.BoolParameter( - default=False, - parsing=luigi.BoolParameter.EXPLICIT_PARSING, - ) ignore_missing_samples_when_remapping = luigi.BoolParameter( default=False, parsing=luigi.BoolParameter.EXPLICIT_PARSING, @@ -84,7 +80,6 @@ def requires(self) -> list[luigi.Task]: project_remap_path, project_pedigree_path, imputed_sex_path, - self.ignore_missing_samples_when_subsetting, self.ignore_missing_samples_when_remapping, self.validate, False, diff --git a/v03_pipeline/lib/tasks/update_project_table.py b/v03_pipeline/lib/tasks/update_project_table.py index 818c6e7f5..508c51c30 100644 --- a/v03_pipeline/lib/tasks/update_project_table.py +++ b/v03_pipeline/lib/tasks/update_project_table.py @@ -20,10 +20,6 @@ class UpdateProjectTableTask(BaseUpdateProjectTableTask): project_remap_path = luigi.Parameter() project_pedigree_path = luigi.Parameter() imputed_sex_path = luigi.Parameter(default=None) - ignore_missing_samples_when_subsetting = luigi.BoolParameter( - default=False, - parsing=luigi.BoolParameter.EXPLICIT_PARSING, - ) ignore_missing_samples_when_remapping = luigi.BoolParameter( default=False, parsing=luigi.BoolParameter.EXPLICIT_PARSING, @@ -62,7 +58,6 @@ def requires(self) -> luigi.Task: self.project_remap_path, self.project_pedigree_path, self.imputed_sex_path, - self.ignore_missing_samples_when_subsetting, self.ignore_missing_samples_when_remapping, self.validate, False, diff --git a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples.py b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples.py index 6562b428b..99e6c49df 100644 --- a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples.py +++ b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples.py @@ -21,10 +21,6 @@ class UpdateVariantAnnotationsTableWithNewSamplesTask( project_remap_paths = luigi.ListParameter() project_pedigree_paths = luigi.ListParameter() imputed_sex_paths = luigi.ListParameter(default=None) - ignore_missing_samples_when_subsetting = luigi.BoolParameter( - default=False, - parsing=luigi.BoolParameter.EXPLICIT_PARSING, - ) ignore_missing_samples_when_remapping = luigi.BoolParameter( default=False, parsing=luigi.BoolParameter.EXPLICIT_PARSING, @@ -55,7 +51,6 @@ def requires(self) -> list[luigi.Task]: self.project_remap_paths, self.project_pedigree_paths, self.imputed_sex_paths, - self.ignore_missing_samples_when_subsetting, self.ignore_missing_samples_when_remapping, self.validate, self.force, diff --git a/v03_pipeline/lib/tasks/write_family_table.py b/v03_pipeline/lib/tasks/write_family_table.py index 17942ca5e..ef3025842 100644 --- a/v03_pipeline/lib/tasks/write_family_table.py +++ b/v03_pipeline/lib/tasks/write_family_table.py @@ -15,9 +15,6 @@ class WriteFamilyTableTask(BaseWriteTask): project_remap_path = luigi.Parameter() project_pedigree_path = luigi.Parameter() imputed_sex_path = luigi.Parameter(default=None) - ignore_missing_samples_when_subsetting = luigi.BoolParameter( - parsing=luigi.BoolParameter.EXPLICIT_PARSING, - ) ignore_missing_samples_when_remapping = luigi.BoolParameter( parsing=luigi.BoolParameter.EXPLICIT_PARSING, ) @@ -61,7 +58,6 @@ def requires(self) -> luigi.Task: self.project_remap_path, self.project_pedigree_path, self.imputed_sex_path, - self.ignore_missing_samples_when_subsetting, self.ignore_missing_samples_when_remapping, self.validate, False, diff --git a/v03_pipeline/lib/tasks/write_metadata_for_run.py b/v03_pipeline/lib/tasks/write_metadata_for_run.py index c81af08cb..3ec7d4f64 100644 --- a/v03_pipeline/lib/tasks/write_metadata_for_run.py +++ b/v03_pipeline/lib/tasks/write_metadata_for_run.py @@ -18,10 +18,6 @@ class WriteMetadataForRunTask(BaseHailTableTask): project_remap_paths = luigi.ListParameter() project_pedigree_paths = luigi.ListParameter() imputed_sex_paths = luigi.ListParameter(default=None) - ignore_missing_samples_when_subsetting = luigi.BoolParameter( - default=False, - parsing=luigi.BoolParameter.EXPLICIT_PARSING, - ) ignore_missing_samples_when_remapping = luigi.BoolParameter( default=False, parsing=luigi.BoolParameter.EXPLICIT_PARSING, @@ -63,7 +59,6 @@ def requires(self) -> list[luigi.Task]: project_remap_path, project_pedigree_path, imputed_sex_path, - self.ignore_missing_samples_when_subsetting, self.ignore_missing_samples_when_remapping, self.validate, self.force, diff --git a/v03_pipeline/lib/tasks/write_new_variants_table.py b/v03_pipeline/lib/tasks/write_new_variants_table.py index 5e183b05a..734b07d73 100644 --- a/v03_pipeline/lib/tasks/write_new_variants_table.py +++ b/v03_pipeline/lib/tasks/write_new_variants_table.py @@ -42,10 +42,6 @@ class WriteNewVariantsTableTask(BaseWriteTask): project_remap_paths = luigi.ListParameter() project_pedigree_paths = luigi.ListParameter() imputed_sex_paths = luigi.ListParameter(default=None) - ignore_missing_samples_when_subsetting = luigi.BoolParameter( - default=False, - parsing=luigi.BoolParameter.EXPLICIT_PARSING, - ) ignore_missing_samples_when_remapping = luigi.BoolParameter( default=False, parsing=luigi.BoolParameter.EXPLICIT_PARSING, @@ -112,7 +108,6 @@ def requires(self) -> list[luigi.Task]: self.project_remap_paths, self.project_pedigree_paths, self.imputed_sex_paths, - self.ignore_missing_samples_when_subsetting, self.ignore_missing_samples_when_remapping, self.validate, self.force, @@ -131,7 +126,6 @@ def requires(self) -> list[luigi.Task]: project_remap_path, project_pedigree_path, imputed_sex_path, - self.ignore_missing_samples_when_subsetting, self.ignore_missing_samples_when_remapping, self.validate, False, diff --git a/v03_pipeline/lib/tasks/write_project_family_tables.py b/v03_pipeline/lib/tasks/write_project_family_tables.py index a0b06aa31..fe96f441b 100644 --- a/v03_pipeline/lib/tasks/write_project_family_tables.py +++ b/v03_pipeline/lib/tasks/write_project_family_tables.py @@ -12,10 +12,6 @@ class WriteProjectFamilyTablesTask(BaseHailTableTask): project_remap_path = luigi.Parameter() project_pedigree_path = luigi.Parameter() imputed_sex_path = luigi.Parameter(default=None) - ignore_missing_samples_when_subsetting = luigi.BoolParameter( - default=False, - parsing=luigi.BoolParameter.EXPLICIT_PARSING, - ) ignore_missing_samples_when_remapping = luigi.BoolParameter( default=False, parsing=luigi.BoolParameter.EXPLICIT_PARSING, @@ -58,7 +54,6 @@ def run(self): self.project_remap_path, self.project_pedigree_path, self.imputed_sex_path, - self.ignore_missing_samples_when_subsetting, self.ignore_missing_samples_when_remapping, self.validate, False, diff --git a/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py b/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py index 75b6bf7b3..7998fb689 100644 --- a/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py +++ b/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py @@ -32,10 +32,6 @@ class WriteRemappedAndSubsettedCallsetTask(BaseWriteTask): project_remap_path = luigi.Parameter() project_pedigree_path = luigi.Parameter() imputed_sex_path = luigi.Parameter(default=None) - ignore_missing_samples_when_subsetting = luigi.BoolParameter( - default=False, - parsing=luigi.BoolParameter.EXPLICIT_PARSING, - ) ignore_missing_samples_when_remapping = luigi.BoolParameter( default=False, parsing=luigi.BoolParameter.EXPLICIT_PARSING, @@ -178,7 +174,6 @@ def create_table(self) -> hl.MatrixTable: hl.tstruct(s=hl.dtype('str')), key='s', ), - self.ignore_missing_samples_when_subsetting, ) # Drop additional fields imported onto the intermediate callsets but # not used when creating the downstream optimized tables.