|
2 | 2 | import luigi.worker
|
3 | 3 |
|
4 | 4 | from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType
|
5 |
| -from v03_pipeline.lib.paths import project_table_path |
| 5 | +from v03_pipeline.lib.paths import ( |
| 6 | + project_table_path, |
| 7 | + remapped_and_subsetted_callset_path, |
| 8 | +) |
6 | 9 | from v03_pipeline.lib.tasks.write_project_family_tables import (
|
7 | 10 | WriteProjectFamilyTablesTask,
|
8 | 11 | )
|
@@ -38,6 +41,33 @@ def test_snv_write_project_family_tables_task(self) -> None:
|
38 | 41 | hl.read_table(write_family_table_task.output().path)
|
39 | 42 | for write_family_table_task in write_project_family_tables.dynamic_write_family_table_tasks
|
40 | 43 | ]
|
| 44 | + # Validate remapped and subsetted callset families |
| 45 | + remapped_and_subsetted_callset = hl.read_matrix_table( |
| 46 | + remapped_and_subsetted_callset_path( |
| 47 | + ReferenceGenome.GRCh38, |
| 48 | + DatasetType.SNV_INDEL, |
| 49 | + TEST_SNV_INDEL_VCF, |
| 50 | + 'R0113_test_project', |
| 51 | + ), |
| 52 | + ) |
| 53 | + self.assertCountEqual( |
| 54 | + hl.eval(remapped_and_subsetted_callset.globals.family_samples.keys()), |
| 55 | + { |
| 56 | + '123_1', |
| 57 | + '234_1', |
| 58 | + '345_1', |
| 59 | + '456_1', |
| 60 | + '567_1', |
| 61 | + '678_1', |
| 62 | + '789_1', |
| 63 | + '890_1', |
| 64 | + '901_1', |
| 65 | + 'bcd_1', |
| 66 | + 'cde_1', |
| 67 | + 'def_1', |
| 68 | + 'efg_1', |
| 69 | + }, |
| 70 | + ) |
41 | 71 | self.assertCountEqual(
|
42 | 72 | [ht.globals.sample_ids.collect() for ht in hts],
|
43 | 73 | [
|
@@ -73,13 +103,39 @@ def test_snv_write_project_family_tables_task(self) -> None:
|
73 | 103 | worker.run()
|
74 | 104 | self.assertTrue(write_project_family_tables_subset.complete())
|
75 | 105 | hts = [
|
76 |
| - hl.read_table(write_family_table_task.output().path) |
| 106 | + write_family_table_task.output().path |
77 | 107 | for write_family_table_task in write_project_family_tables_subset.dynamic_write_family_table_tasks
|
78 | 108 | ]
|
79 |
| - # Only one family table written |
80 |
| - self.assertEqual( |
81 |
| - len(hts), |
82 |
| - 1, |
| 109 | + self.assertTrue(len(hts)) |
| 110 | + self.assertTrue( |
| 111 | + '123_1' in hts[0], |
| 112 | + ) |
| 113 | + # Validate remapped and subsetted callset families |
| 114 | + # (and that it was re-written) |
| 115 | + remapped_and_subsetted_callset = hl.read_matrix_table( |
| 116 | + remapped_and_subsetted_callset_path( |
| 117 | + ReferenceGenome.GRCh38, |
| 118 | + DatasetType.SNV_INDEL, |
| 119 | + TEST_SNV_INDEL_VCF, |
| 120 | + 'R0113_test_project', |
| 121 | + ), |
| 122 | + ) |
| 123 | + self.assertCountEqual( |
| 124 | + hl.eval(remapped_and_subsetted_callset.globals.family_samples.keys()), |
| 125 | + {'123_1'}, |
| 126 | + ) |
| 127 | + self.assertCountEqual( |
| 128 | + hl.eval(remapped_and_subsetted_callset.globals.failed_family_samples), |
| 129 | + hl.Struct( |
| 130 | + missing_samples={ |
| 131 | + '234_1': { |
| 132 | + 'reasons': ["Missing samples: {'NA19678_999'}"], |
| 133 | + 'samples': ['NA19678_1', 'NA19678_999'], |
| 134 | + }, |
| 135 | + }, |
| 136 | + relatedness_check={}, |
| 137 | + sex_check={}, |
| 138 | + ), |
83 | 139 | )
|
84 | 140 | # Project table still contains all family guids
|
85 | 141 | self.assertCountEqual(
|
|
0 commit comments