Skip to content

Commit 199184a

Browse files
authored
Merge pull request #953 from broadinstitute/dev
Dev
2 parents 1386bb9 + 1f5b9a3 commit 199184a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+105
-13
lines changed

v03_pipeline/lib/misc/family_loading_failures_test.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ def test_build_sex_check_lookup(self):
6262
{'s': 'ROS_007_19Y05919_D1', 'predicted_sex': 'F'},
6363
{'s': 'ROS_007_19Y05939_D1', 'predicted_sex': 'M'},
6464
{'s': 'ROS_007_19Y05987_D1', 'predicted_sex': 'U'},
65+
{'s': 'ROS_007_19Y05989_D1', 'predicted_sex': 'X0'},
6566
],
6667
hl.tstruct(
6768
s=hl.tstr,
@@ -78,6 +79,7 @@ def test_build_sex_check_lookup(self):
7879
'ROS_007_19Y05919_D1': Sex.FEMALE,
7980
'ROS_007_19Y05939_D1': Sex.MALE,
8081
'ROS_007_19Y05987_D1': Sex.UNKNOWN,
82+
'ROS_007_19Y05989_D1': Sex.X0,
8183
},
8284
)
8385

@@ -184,6 +186,7 @@ def test_get_families_failed_sex_check(self):
184186
{'s': 'ROS_007_19Y05919_D1', 'predicted_sex': 'F'},
185187
{'s': 'ROS_007_19Y05939_D1', 'predicted_sex': 'M'},
186188
{'s': 'ROS_007_19Y05987_D1', 'predicted_sex': 'U'}, # Pedigree Sex F
189+
{'s': 'ROS_007_19Y05989_D1', 'predicted_sex': 'XXX'},
187190
],
188191
hl.tstruct(
189192
s=hl.tstr,

v03_pipeline/lib/misc/io_test.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ def test_import_imputed_sex(self) -> None:
4646
hl.Struct(s='abc_1', predicted_sex='M'),
4747
hl.Struct(s='abc_2', predicted_sex='F'),
4848
hl.Struct(s='abc_3', predicted_sex='U'),
49+
hl.Struct(s='abc_4', predicted_sex='XYY'),
4950
],
5051
)
5152

v03_pipeline/lib/misc/validation.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,9 +173,17 @@ def validate_imputed_sex_ploidy(
173173
& (sex_check_ht[mt.s].predicted_sex == Sex.MALE.value)
174174
)
175175
| (
176-
# At least one call is haploid but the sex is Female
176+
# At least one call is haploid but the sex is Female, X0, XXY, XYY, or XXX
177177
hl.agg.any(~mt.GT.is_diploid())
178-
& (sex_check_ht[mt.s].predicted_sex == Sex.FEMALE.value)
178+
& hl.literal(
179+
{
180+
Sex.FEMALE.value,
181+
Sex.X0.value,
182+
Sex.XYY.value,
183+
Sex.XXY.value,
184+
Sex.XXX.value,
185+
},
186+
).contains(sex_check_ht[mt.s].predicted_sex)
179187
)
180188
),
181189
)

v03_pipeline/lib/misc/validation_test.py

Lines changed: 81 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,18 @@ def test_validate_allele_type(self) -> None:
113113

114114
@patch('v03_pipeline.lib.misc.validation.Env')
115115
def test_validate_imputed_sex_ploidy(self, mock_env: Mock) -> None:
116+
female_sample = 'HG00731_1'
117+
male_sample_1 = 'HG00732_1'
118+
male_sample_2 = 'HG00732_1'
119+
x0_sample = 'NA20899_1'
120+
xxy_sample = 'NA20889_1'
121+
xyy_sample = 'NA20891_1'
122+
xxx_sample = 'NA20892_1'
123+
116124
mock_env.CHECK_SEX_AND_RELATEDNESS = True
117125
sex_check_ht = hl.read_table(TEST_SEX_CHECK_1)
126+
127+
# All calls on X chromosome are valid
118128
mt = hl.MatrixTable.from_parts(
119129
rows={
120130
'locus': [
@@ -125,42 +135,106 @@ def test_validate_imputed_sex_ploidy(self, mock_env: Mock) -> None:
125135
),
126136
],
127137
},
128-
cols={'s': ['HG00731_1', 'HG00732_1']},
138+
cols={
139+
's': [
140+
female_sample,
141+
male_sample_1,
142+
x0_sample,
143+
xxy_sample,
144+
xyy_sample,
145+
xxx_sample,
146+
],
147+
},
129148
entries={
130149
'GT': [
131150
[
132151
hl.Call(alleles=[0, 0], phased=False),
133152
hl.Call(alleles=[0], phased=False),
153+
hl.Call(alleles=[0, 0], phased=False), # X0
154+
hl.Call(alleles=[0, 0], phased=False), # XXY
155+
hl.Call(alleles=[0, 0], phased=False), # XYY
156+
hl.Call(alleles=[0, 0], phased=False), # XXX
134157
],
135158
],
136159
},
137160
).key_rows_by('locus')
138161
validate_imputed_sex_ploidy(mt, sex_check_ht)
162+
163+
# All calls on Y chromosome are valid
139164
mt = hl.MatrixTable.from_parts(
140165
rows={
141166
'locus': [
142167
hl.Locus(
143-
contig='chrX',
168+
contig='chrY',
144169
position=1,
145170
reference_genome='GRCh38',
146171
),
147172
],
148173
},
149-
# Male, Female, Male
150-
cols={'s': ['HG00731_1', 'HG00732_1', 'NA19678_1']},
174+
cols={
175+
's': [
176+
female_sample,
177+
male_sample_1,
178+
x0_sample,
179+
xxy_sample,
180+
xyy_sample,
181+
xxx_sample,
182+
],
183+
},
151184
entries={
152185
'GT': [
153186
[
154-
hl.Call(alleles=[0], phased=False),
155-
hl.Call(alleles=[0], phased=False),
156187
hl.missing(hl.tcall),
188+
hl.Call(alleles=[0], phased=False),
189+
hl.missing(hl.tcall), # X0
190+
hl.Call(alleles=[0, 0], phased=False), # XXY
191+
hl.Call(alleles=[0, 0], phased=False), # XYY
192+
hl.missing(hl.tcall), # XXX
193+
],
194+
],
195+
},
196+
).key_rows_by('locus')
197+
validate_imputed_sex_ploidy(mt, sex_check_ht)
198+
199+
# Invalid X chromosome case
200+
mt = hl.MatrixTable.from_parts(
201+
rows={
202+
'locus': [
203+
hl.Locus(
204+
contig='chrX',
205+
position=1,
206+
reference_genome='GRCh38',
207+
),
208+
],
209+
},
210+
cols={
211+
's': [
212+
female_sample,
213+
male_sample_1,
214+
male_sample_2,
215+
x0_sample,
216+
xxy_sample,
217+
xyy_sample,
218+
xxx_sample,
219+
],
220+
},
221+
entries={
222+
'GT': [
223+
[
224+
hl.Call(alleles=[0], phased=False), # invalid Female call
225+
hl.Call(alleles=[0], phased=False), # valid Male call
226+
hl.missing(hl.tcall), # invalid Male call
227+
hl.Call(alleles=[0], phased=False), # invalid X0 call
228+
hl.Call(alleles=[0], phased=False), # invalid XXY call
229+
hl.missing(hl.tcall), # valid XYY call
230+
hl.Call(alleles=[0, 0], phased=False), # valid XXX call
157231
],
158232
],
159233
},
160234
).key_rows_by('locus')
161235
self.assertRaisesRegex(
162236
SeqrValidationError,
163-
'66.67% of samples have misaligned ploidy',
237+
'57.14% of samples have misaligned ploidy',
164238
validate_imputed_sex_ploidy,
165239
mt,
166240
sex_check_ht,

v03_pipeline/lib/model/definitions.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,18 @@ class Sex(str, Enum):
1212
FEMALE = 'F'
1313
MALE = 'M'
1414
UNKNOWN = 'U'
15+
XXX = 'XXX'
16+
X0 = 'X0'
17+
XXY = 'XXY'
18+
XYY = 'XYY'
1519

1620
@property
1721
def imputed_sex_value(self):
1822
return {
1923
Sex.MALE: 'Male',
2024
Sex.FEMALE: 'Female',
2125
Sex.UNKNOWN: 'Unknown',
22-
}[self]
26+
}.get(self, self.name)
2327

2428

2529
class PipelineVersion(str, Enum):

v03_pipeline/var/test/pedigrees/test_pedigree_6.tsv

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@ R0116_sex_check_project2 family_1 family_1 ROS_006_18Y03227_D1 U
44
R0116_sex_check_project2 family_1 family_1 ROS_006_18Y03228_D1 ROS_006_18Y03226_D1 ROS_006_18Y03227_D1 F
55
R0116_sex_check_project2 family_2 family_2 ROS_007_19Y05919_D1 F
66
R0116_sex_check_project2 family_2 family_2 ROS_007_19Y05939_D1 F
7-
R0116_sex_check_project2 family_2 family_2 ROS_007_19Y05987_D1 ROS_007_19Y05919_D1 ROS_007_19Y05939_D1 F
7+
R0116_sex_check_project2 family_2 family_2 ROS_007_19Y05987_D1 ROS_007_19Y05919_D1 ROS_007_19Y05939_D1 F
8+
R0116_sex_check_project2 family_2 family_2 ROS_007_19Y05989_D1 ROS_007_19Y05919_D1 ROS_007_19Y05939_D1 XXX

v03_pipeline/var/test/sex_check/test_imputed_sex.tsv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ entity:sample_id collaborator_participant_id collaborator_sample_id contaminatio
22
SM-DM66X abc_1 abc_1 0E+00 gs://datarepo-9cafeffd-bucket/f511b131-3f0d-4eb7-a7f0-b2b3d73dca3a/6f30a41f-1d91-44d1-915c-5c10c6d87fcd/WAL_LIS6100_LIS6101.qc-coverage-region-1_coverage_metrics.csv gs://datarepo-9cafeffd-bucket/f511b131-3f0d-4eb7-a7f0-b2b3d73dca3a/3e204a66-f044-4bdc-ade4-1671a0269214/WAL_LIS6100_LIS6101.cram.crai gs://datarepo-9cafeffd-bucket/f511b131-3f0d-4eb7-a7f0-b2b3d73dca3a/a6ed4850-6a69-412e-a071-bf8cce04fca0/WAL_LIS6100_LIS6101.cram.md5sum gs://datarepo-9cafeffd-bucket/f511b131-3f0d-4eb7-a7f0-b2b3d73dca3a/c51bbfd6-42f0-40ca-aa0c-b5eece935516/WAL_LIS6100_LIS6101.cram 8a07ce00-16a1-40f4-8666-c4cfaad1bbe1 07.021.604.3.7.8 cc9d9ed9-785a-407d-910e-d9bd46936fa6 2024-04-17T14:58:10 98.450000000 gs://datarepo-9cafeffd-bucket/f511b131-3f0d-4eb7-a7f0-b2b3d73dca3a/9e745b1d-2c00-44ce-bbfb-31c44369f4fe/WAL_LIS6100_LIS6101.mapping_metrics.csv DNA:DNA Genomic 35.730000000 Whole Blood:Whole Blood PT-24FB4 Pass PDO-32851 96.140000000 97.850000000 Male P-WG-0139 2017-03-15 04:00:00 Male RP-3071 gs://datarepo-9cafeffd-bucket/f511b131-3f0d-4eb7-a7f0-b2b3d73dca3a/360ec721-0af8-4085-a677-38c018069559/WAL_LIS6100_LIS6101.vcf.gz.tbi gs://datarepo-9cafeffd-bucket/f511b131-3f0d-4eb7-a7f0-b2b3d73dca3a/8da8cda2-497f-4a8b-a642-af4a4ad28aac/WAL_LIS6100_LIS6101.vcf.gz.md5sum gs://datarepo-9cafeffd-bucket/f511b131-3f0d-4eb7-a7f0-b2b3d73dca3a/0a2d93fb-8837-4b6f-ac68-a6b9701f9a08/WAL_LIS6100_LIS6101.vcf.gz 134324623400.000000000 gs://datarepo-9cafeffd-bucket/f511b131-3f0d-4eb7-a7f0-b2b3d73dca3a/f7b62337-1339-4c2e-8280-281c48604e07/WAL_LIS6100_LIS6101.vc_metrics.csv
33
SM-DM69X abc_2 abc_2 0E+00 gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/c4c07edf-7735-4aa7-9283-7cb2607b60a2/GLE-5774-3-3.qc-coverage-region-1_coverage_metrics.csv gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/dcd4c271-0249-47f1-8e91-81f74735c5a1/GLE-5774-3-3.cram.crai gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/ec41ec06-673f-4fe2-a063-23dc5fe1dcce/GLE-5774-3-3.cram.md5sum gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/aad0e270-2ad5-4f39-b968-9b4beafeb5cc/GLE-5774-3-3.cram a4b04a39-9234-4028-a155-442c4acf12a0 07.021.604.3.7.8 ce74d94c-c33d-49d7-85c9-5f3cbd08aff7 2024-04-17T15:02:46 99.800000000 gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/c3a9e6f2-4c68-410b-823d-46ca406e5061/GLE-5774-3-3.mapping_metrics.csv DNA:DNA Genomic 35.300000000 Whole Blood:Whole Blood PT-24OHM Pass PDO-32755 96.320000000 97.340000000 Female P-WG-0139 2017-04-12 04:00:00 Female RP-3061 gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/c71cd2a1-c789-4715-9ebc-dbfc40d9f2e2/GLE-5774-3-3.vcf.gz.tbi gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/957a99cb-c9a9-4fc5-a0ec-53f9e461469e/GLE-5774-3-3.vcf.gz.md5sum gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/df520949-5f2b-4976-9d46-80d1cc299813/GLE-5774-3-3.vcf.gz 133253714921.000000000 gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/2e98e51b-9394-4e64-977f-e9010a4e16dc/GLE-5774-3-3.vc_metrics.csv
44
SM-DPB5G abc_3 abc_3 0E+00 gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/432f8354-77e0-4381-9bb5-dfdc0633b5b2/PIE_OGI1433_002628_1.qc-coverage-region-1_coverage_metrics.csv gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/3dc623fa-2a45-4b3d-a0f8-fcdec09f9418/PIE_OGI1433_002628_1.cram.crai gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/895966ef-c705-4c18-952d-03863243a184/PIE_OGI1433_002628_1.cram.md5sum gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/96ca6d5f-fb23-4102-bb5e-c7bbfd194e1c/PIE_OGI1433_002628_1.cram ffb50687-165e-425a-a545-c3797d3a28d4 07.021.604.3.7.8 55729ba9-3ce4-47b3-9c3b-1148737ae40f 2024-04-17T15:07:57 99.670000000 gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/30f8e208-5d2d-4ce8-b835-695b5ed673f4/PIE_OGI1433_002628_1.mapping_metrics.csv DNA:DNA Genomic 41.910000000 Whole Blood:Whole Blood PT-25BR5 Pass PDO-32756 92.920000000 97.990000000 Unknown P-WG-0139 2017-05-19 04:00:00 Unknown RP-3062 gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/1641d1b2-1035-4cc3-9c8b-0c8cb430f56b/PIE_OGI1433_002628_1.vcf.gz.tbi gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/f5ba2708-899e-42e8-b287-fdf72c2e404d/PIE_OGI1433_002628_1.vcf.gz.md5sum gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/e925ee5d-a75e-471f-adfd-2756c8690069/PIE_OGI1433_002628_1.vcf.gz 156149580126.000000000 gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/df076bc5-9db8-44f0-a3fe-f693370634cc/PIE_OGI1433_002628_1.vc_metrics.csv
5+
SM-DPB5G abc_4 abc_4 0E+00 gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/432f8354-77e0-4381-9bb5-dfdc0633b5b2/PIE_OGI1433_002628_1.qc-coverage-region-1_coverage_metrics.csv gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/3dc623fa-2a45-4b3d-a0f8-fcdec09f9418/PIE_OGI1433_002628_1.cram.crai gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/895966ef-c705-4c18-952d-03863243a184/PIE_OGI1433_002628_1.cram.md5sum gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/96ca6d5f-fb23-4102-bb5e-c7bbfd194e1c/PIE_OGI1433_002628_1.cram ffb50687-165e-425a-a545-c3797d3a28d4 07.021.604.3.7.8 55729ba9-3ce4-47b3-9c3b-1148737ae40f 2024-04-17T15:07:57 99.670000000 gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/30f8e208-5d2d-4ce8-b835-695b5ed673f4/PIE_OGI1433_002628_1.mapping_metrics.csv DNA:DNA Genomic 41.910000000 Whole Blood:Whole Blood PT-25BR5 Pass PDO-32756 92.920000000 97.990000000 XYY P-WG-0139 2017-05-19 04:00:00 XYY RP-3062 gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/1641d1b2-1035-4cc3-9c8b-0c8cb430f56b/PIE_OGI1433_002628_1.vcf.gz.tbi gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/f5ba2708-899e-42e8-b287-fdf72c2e404d/PIE_OGI1433_002628_1.vcf.gz.md5sum gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/e925ee5d-a75e-471f-adfd-2756c8690069/PIE_OGI1433_002628_1.vcf.gz 156149580126.000000000 gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/df076bc5-9db8-44f0-a3fe-f693370634cc/PIE_OGI1433_002628_1.vc_metrics.csv
Binary file not shown.
Binary file not shown.
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
This folder comprises a Hail (www.hail.is) native Table or MatrixTable.
2-
Written with version 0.2.128-eead8100a1c1
3-
Created at 2024/05/02 16:11:09
2+
Written with version 0.2.132-678e1f52b999
3+
Created at 2024/10/29 17:04:32

0 commit comments

Comments
 (0)