|
4 | 4 | import hail as hl
|
5 | 5 |
|
6 | 6 | from v03_pipeline.lib.reference_data.clinvar import (
|
| 7 | + import_submission_table, |
7 | 8 | join_to_submission_summary_ht,
|
8 | 9 | parsed_and_mapped_clnsigconf,
|
9 | 10 | parsed_clnsig,
|
@@ -87,56 +88,10 @@ def test_parsed_and_mapped_clnsigconf(self):
|
87 | 88 | )
|
88 | 89 |
|
89 | 90 | @mock.patch(
|
90 |
| - 'v03_pipeline.lib.reference_data.clinvar.download_and_import_clinvar_submission_summary', |
| 91 | + 'v03_pipeline.lib.reference_data.clinvar.hl.import_table', |
91 | 92 | )
|
92 |
| - def test_join_to_submission_summary_ht(self, mock_download): |
93 |
| - clinvar_enums_struct = hl.Struct( |
94 |
| - CLNSIG=[ |
95 |
| - 'Pathogenic/Likely_pathogenic/Pathogenic', |
96 |
| - '_low_penetrance', |
97 |
| - ], |
98 |
| - CLNSIGCONF=[ |
99 |
| - 'Pathogenic(8)|Likely_pathogenic(2)|Pathogenic', |
100 |
| - '_low_penetrance(1)|Uncertain_significance(1)', |
101 |
| - ], |
102 |
| - CLNREVSTAT=['no_classifications_from_unflagged_records'], |
103 |
| - ) |
104 |
| - vcf_ht = hl.Table.parallelize( |
105 |
| - [ |
106 |
| - { |
107 |
| - 'locus': hl.Locus( |
108 |
| - contig='chr1', |
109 |
| - position=871269, |
110 |
| - reference_genome='GRCh38', |
111 |
| - ), |
112 |
| - 'alleles': ['A', 'C'], |
113 |
| - 'rsid': '5', |
114 |
| - 'info': hl.Struct(ALLELEID=1, **clinvar_enums_struct), |
115 |
| - }, |
116 |
| - { |
117 |
| - 'locus': hl.Locus( |
118 |
| - contig='chr1', |
119 |
| - position=871269, |
120 |
| - reference_genome='GRCh38', |
121 |
| - ), |
122 |
| - 'alleles': ['A', 'AC'], |
123 |
| - 'rsid': '7', |
124 |
| - 'info': hl.Struct(ALLELEID=1, **clinvar_enums_struct), |
125 |
| - }, |
126 |
| - ], |
127 |
| - hl.tstruct( |
128 |
| - locus=hl.tlocus('GRCh38'), |
129 |
| - alleles=hl.tarray(hl.tstr), |
130 |
| - rsid=hl.tstr, |
131 |
| - info=hl.tstruct( |
132 |
| - ALLELEID=hl.tint32, |
133 |
| - CLNSIG=hl.tarray(hl.tstr), |
134 |
| - CLNSIGCONF=hl.tarray(hl.tstr), |
135 |
| - CLNREVSTAT=hl.tarray(hl.tstr), |
136 |
| - ), |
137 |
| - ), |
138 |
| - ) |
139 |
| - mock_download.return_value = hl.Table.parallelize( |
| 93 | + def test_import_submission_table(self, mock_import_table): |
| 94 | + mock_import_table.return_value = hl.Table.parallelize( |
140 | 95 | [
|
141 | 96 | {
|
142 | 97 | '#VariationID': '5',
|
@@ -164,51 +119,137 @@ def test_join_to_submission_summary_ht(self, mock_download):
|
164 | 119 | 'ReportedPhenotypeInfo': 'na:B',
|
165 | 120 | },
|
166 | 121 | ],
|
167 |
| - hl.tstruct( |
168 |
| - **{ |
169 |
| - '#VariationID': hl.tstr, |
170 |
| - 'Submitter': hl.tstr, |
171 |
| - 'ReportedPhenotypeInfo': hl.tstr, |
172 |
| - }, |
173 |
| - ), |
174 | 122 | )
|
175 |
| - ht = join_to_submission_summary_ht(vcf_ht) |
| 123 | + ht = import_submission_table('mock_file_name') |
176 | 124 | self.assertEqual(
|
177 | 125 | ht.collect(),
|
178 | 126 | [
|
179 | 127 | hl.Struct(
|
180 |
| - locus=hl.Locus( |
181 |
| - contig='chr1', |
182 |
| - position=871269, |
183 |
| - reference_genome='GRCh38', |
184 |
| - ), |
185 |
| - alleles=['A', 'C'], |
186 |
| - rsid='5', |
187 |
| - info=hl.Struct(ALLELEID=1, **clinvar_enums_struct), |
188 |
| - submitters=[ |
| 128 | + VariationID='5', |
| 129 | + Submitters=[ |
189 | 130 | 'OMIM',
|
190 | 131 | 'Broad Institute Rare Disease Group, Broad Institute',
|
191 | 132 | 'PreventionGenetics, part of Exact Sciences',
|
192 | 133 | 'Invitae',
|
193 | 134 | ],
|
194 |
| - conditions=[ |
| 135 | + Conditions=[ |
195 | 136 | 'C3661900:not provided',
|
196 | 137 | 'C0023264:Leigh syndrome',
|
197 | 138 | 'na:FOXRED1-related condition',
|
198 | 139 | 'C4748791:Mitochondrial complex 1 deficiency, nuclear type 19',
|
199 | 140 | ],
|
200 | 141 | ),
|
201 | 142 | hl.Struct(
|
202 |
| - locus=hl.Locus( |
| 143 | + VariationID='6', |
| 144 | + Submitters=['A'], |
| 145 | + Conditions=['na:B'], |
| 146 | + ), |
| 147 | + ], |
| 148 | + ) |
| 149 | + |
| 150 | + @mock.patch( |
| 151 | + 'v03_pipeline.lib.reference_data.clinvar.download_and_import_clinvar_submission_summary', |
| 152 | + ) |
| 153 | + def test_join_to_submission_summary_ht( |
| 154 | + self, |
| 155 | + mock_download, |
| 156 | + ): |
| 157 | + vcf_ht = hl.Table.parallelize( |
| 158 | + [ |
| 159 | + { |
| 160 | + 'locus': hl.Locus( |
203 | 161 | contig='chr1',
|
204 | 162 | position=871269,
|
205 | 163 | reference_genome='GRCh38',
|
206 | 164 | ),
|
207 |
| - alleles=['A', 'AC'], |
208 |
| - rsid='7', |
209 |
| - info=hl.Struct(ALLELEID=1, **clinvar_enums_struct), |
210 |
| - submitters=None, |
211 |
| - conditions=None, |
212 |
| - ), |
| 165 | + 'alleles': ['A', 'C'], |
| 166 | + 'rsid': '5', |
| 167 | + 'info': hl.Struct(ALLELEID=1), |
| 168 | + }, |
| 169 | + { |
| 170 | + 'locus': hl.Locus( |
| 171 | + contig='chr1', |
| 172 | + position=871269, |
| 173 | + reference_genome='GRCh38', |
| 174 | + ), |
| 175 | + 'alleles': ['A', 'AC'], |
| 176 | + 'rsid': '7', |
| 177 | + 'info': hl.Struct(ALLELEID=1), |
| 178 | + }, |
| 179 | + ], |
| 180 | + hl.tstruct( |
| 181 | + locus=hl.tlocus('GRCh38'), |
| 182 | + alleles=hl.tarray(hl.tstr), |
| 183 | + rsid=hl.tstr, |
| 184 | + info=hl.tstruct(ALLELEID=hl.tint32), |
| 185 | + ), |
| 186 | + ) |
| 187 | + submitters_ht = hl.Table.parallelize( |
| 188 | + [ |
| 189 | + { |
| 190 | + 'VariationID': '5', |
| 191 | + 'Submitters': [ |
| 192 | + 'OMIM', |
| 193 | + 'Broad Institute Rare Disease Group, Broad Institute', |
| 194 | + 'PreventionGenetics, part of Exact Sciences', |
| 195 | + 'Invitae', |
| 196 | + ], |
| 197 | + 'Conditions': [ |
| 198 | + 'C3661900:not provided', |
| 199 | + 'C0023264:Leigh syndrome', |
| 200 | + 'na:FOXRED1-related condition', |
| 201 | + 'C4748791:Mitochondrial complex 1 deficiency, nuclear type 19', |
| 202 | + ], |
| 203 | + }, |
| 204 | + {'VariationID': '6', 'Submitters': ['A'], 'Conditions': ['na:B']}, |
213 | 205 | ],
|
| 206 | + hl.tstruct( |
| 207 | + VariationID=hl.tstr, |
| 208 | + Submitters=hl.tarray(hl.tstr), |
| 209 | + Conditions=hl.tarray(hl.tstr), |
| 210 | + ), |
| 211 | + key='VariationID', |
| 212 | + ) |
| 213 | + expected_clinvar_ht_rows = [ |
| 214 | + hl.Struct( |
| 215 | + locus=hl.Locus( |
| 216 | + contig='chr1', |
| 217 | + position=871269, |
| 218 | + reference_genome='GRCh38', |
| 219 | + ), |
| 220 | + alleles=['A', 'C'], |
| 221 | + rsid='5', |
| 222 | + info=hl.Struct(ALLELEID=1), |
| 223 | + submitters=[ |
| 224 | + 'OMIM', |
| 225 | + 'Broad Institute Rare Disease Group, Broad Institute', |
| 226 | + 'PreventionGenetics, part of Exact Sciences', |
| 227 | + 'Invitae', |
| 228 | + ], |
| 229 | + conditions=[ |
| 230 | + 'C3661900:not provided', |
| 231 | + 'C0023264:Leigh syndrome', |
| 232 | + 'na:FOXRED1-related condition', |
| 233 | + 'C4748791:Mitochondrial complex 1 deficiency, nuclear type 19', |
| 234 | + ], |
| 235 | + ), |
| 236 | + hl.Struct( |
| 237 | + locus=hl.Locus( |
| 238 | + contig='chr1', |
| 239 | + position=871269, |
| 240 | + reference_genome='GRCh38', |
| 241 | + ), |
| 242 | + alleles=['A', 'AC'], |
| 243 | + rsid='7', |
| 244 | + info=hl.Struct(ALLELEID=1), |
| 245 | + submitters=None, |
| 246 | + conditions=None, |
| 247 | + ), |
| 248 | + ] |
| 249 | + |
| 250 | + mock_download.return_value = submitters_ht |
| 251 | + ht = join_to_submission_summary_ht(vcf_ht) |
| 252 | + self.assertEqual( |
| 253 | + ht.collect(), |
| 254 | + expected_clinvar_ht_rows, |
214 | 255 | )
|
0 commit comments