Skip to content

Commit 607401f

Browse files
committed
handle sex aneuploidy in pipeline
1 parent a4b08bb commit 607401f

File tree

62 files changed

+4709
-13
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+4709
-13
lines changed

v03_pipeline/lib/misc/family_loading_failures_test.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ def test_build_sex_check_lookup(self):
6262
{'s': 'ROS_007_19Y05919_D1', 'predicted_sex': 'F'},
6363
{'s': 'ROS_007_19Y05939_D1', 'predicted_sex': 'M'},
6464
{'s': 'ROS_007_19Y05987_D1', 'predicted_sex': 'U'},
65+
{'s': 'ROS_007_19Y05989_D1', 'predicted_sex': 'X0'},
6566
],
6667
hl.tstruct(
6768
s=hl.tstr,
@@ -78,6 +79,7 @@ def test_build_sex_check_lookup(self):
7879
'ROS_007_19Y05919_D1': Sex.FEMALE,
7980
'ROS_007_19Y05939_D1': Sex.MALE,
8081
'ROS_007_19Y05987_D1': Sex.UNKNOWN,
82+
'ROS_007_19Y05989_D1': Sex.X0,
8183
},
8284
)
8385

@@ -184,6 +186,7 @@ def test_get_families_failed_sex_check(self):
184186
{'s': 'ROS_007_19Y05919_D1', 'predicted_sex': 'F'},
185187
{'s': 'ROS_007_19Y05939_D1', 'predicted_sex': 'M'},
186188
{'s': 'ROS_007_19Y05987_D1', 'predicted_sex': 'U'}, # Pedigree Sex F
189+
{'s': 'ROS_007_19Y05989_D1', 'predicted_sex': 'XXX'},
187190
],
188191
hl.tstruct(
189192
s=hl.tstr,

v03_pipeline/lib/misc/io_test.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ def test_import_imputed_sex(self) -> None:
4646
hl.Struct(s='abc_1', predicted_sex='M'),
4747
hl.Struct(s='abc_2', predicted_sex='F'),
4848
hl.Struct(s='abc_3', predicted_sex='U'),
49+
hl.Struct(s='abc_4', predicted_sex='XYY'),
4950
],
5051
)
5152

v03_pipeline/lib/misc/validation.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,9 +173,17 @@ def validate_imputed_sex_ploidy(
173173
& (sex_check_ht[mt.s].predicted_sex == Sex.MALE.value)
174174
)
175175
| (
176-
# At least one call is haploid but the sex is Female
176+
# At least one call is haploid but the sex is Female, X0, XXY, XYY, or XXX
177177
hl.agg.any(~mt.GT.is_diploid())
178-
& (sex_check_ht[mt.s].predicted_sex == Sex.FEMALE.value)
178+
& hl.literal(
179+
{
180+
Sex.FEMALE.value,
181+
Sex.X0.value,
182+
Sex.XYY.value,
183+
Sex.XXY.value,
184+
Sex.XXX.value,
185+
},
186+
).contains(sex_check_ht[mt.s].predicted_sex)
179187
)
180188
),
181189
)

v03_pipeline/lib/misc/validation_test.py

Lines changed: 81 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,18 @@ def test_validate_allele_type(self) -> None:
113113

114114
@patch('v03_pipeline.lib.misc.validation.Env')
115115
def test_validate_imputed_sex_ploidy(self, mock_env: Mock) -> None:
116+
female_sample = 'HG00731_1'
117+
male_sample_1 = 'HG00732_1'
118+
male_sample_2 = 'HG00732_1'
119+
x0_sample = 'NA20899_1'
120+
xxy_sample = 'NA20889_1'
121+
xyy_sample = 'NA20891_1'
122+
xxx_sample = 'NA20892_1'
123+
116124
mock_env.CHECK_SEX_AND_RELATEDNESS = True
117125
sex_check_ht = hl.read_table(TEST_SEX_CHECK_1)
126+
127+
# All calls on X chromosome are valid
118128
mt = hl.MatrixTable.from_parts(
119129
rows={
120130
'locus': [
@@ -125,42 +135,106 @@ def test_validate_imputed_sex_ploidy(self, mock_env: Mock) -> None:
125135
),
126136
],
127137
},
128-
cols={'s': ['HG00731_1', 'HG00732_1']},
138+
cols={
139+
's': [
140+
female_sample,
141+
male_sample_1,
142+
x0_sample,
143+
xxy_sample,
144+
xyy_sample,
145+
xxx_sample,
146+
],
147+
},
129148
entries={
130149
'GT': [
131150
[
132151
hl.Call(alleles=[0, 0], phased=False),
133152
hl.Call(alleles=[0], phased=False),
153+
hl.Call(alleles=[0, 0], phased=False), # X0
154+
hl.Call(alleles=[0, 0], phased=False), # XXY
155+
hl.Call(alleles=[0, 0], phased=False), # XYY
156+
hl.Call(alleles=[0, 0], phased=False), # XXX
134157
],
135158
],
136159
},
137160
).key_rows_by('locus')
138161
validate_imputed_sex_ploidy(mt, sex_check_ht)
162+
163+
# All calls on Y chromosome are valid
139164
mt = hl.MatrixTable.from_parts(
140165
rows={
141166
'locus': [
142167
hl.Locus(
143-
contig='chrX',
168+
contig='chrY',
144169
position=1,
145170
reference_genome='GRCh38',
146171
),
147172
],
148173
},
149-
# Male, Female, Male
150-
cols={'s': ['HG00731_1', 'HG00732_1', 'NA19678_1']},
174+
cols={
175+
's': [
176+
female_sample,
177+
male_sample_1,
178+
x0_sample,
179+
xxy_sample,
180+
xyy_sample,
181+
xxx_sample,
182+
],
183+
},
151184
entries={
152185
'GT': [
153186
[
154-
hl.Call(alleles=[0], phased=False),
155-
hl.Call(alleles=[0], phased=False),
156187
hl.missing(hl.tcall),
188+
hl.Call(alleles=[0], phased=False),
189+
hl.missing(hl.tcall), # X0
190+
hl.Call(alleles=[0, 0], phased=False), # XXY
191+
hl.Call(alleles=[0, 0], phased=False), # XYY
192+
hl.missing(hl.tcall), # XXX
193+
],
194+
],
195+
},
196+
).key_rows_by('locus')
197+
validate_imputed_sex_ploidy(mt, sex_check_ht)
198+
199+
# Invalid X chromosome case
200+
mt = hl.MatrixTable.from_parts(
201+
rows={
202+
'locus': [
203+
hl.Locus(
204+
contig='chrX',
205+
position=1,
206+
reference_genome='GRCh38',
207+
),
208+
],
209+
},
210+
cols={
211+
's': [
212+
female_sample,
213+
male_sample_1,
214+
male_sample_2,
215+
x0_sample,
216+
xxy_sample,
217+
xyy_sample,
218+
xxx_sample,
219+
],
220+
},
221+
entries={
222+
'GT': [
223+
[
224+
hl.Call(alleles=[0], phased=False), # invalid Female call
225+
hl.Call(alleles=[0], phased=False), # valid Male call
226+
hl.missing(hl.tcall), # invalid Male call
227+
hl.Call(alleles=[0], phased=False), # invalid X0 call
228+
hl.Call(alleles=[0], phased=False), # invalid XXY call
229+
hl.missing(hl.tcall), # valid XYY call
230+
hl.Call(alleles=[0, 0], phased=False), # valid XXX call
157231
],
158232
],
159233
},
160234
).key_rows_by('locus')
161235
self.assertRaisesRegex(
162236
SeqrValidationError,
163-
'66.67% of samples have misaligned ploidy',
237+
'57.14% of samples have misaligned ploidy',
164238
validate_imputed_sex_ploidy,
165239
mt,
166240
sex_check_ht,

v03_pipeline/lib/model/definitions.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,18 @@ class Sex(str, Enum):
1212
FEMALE = 'F'
1313
MALE = 'M'
1414
UNKNOWN = 'U'
15+
XXX = 'XXX'
16+
X0 = 'X0'
17+
XXY = 'XXY'
18+
XYY = 'XYY'
1519

1620
@property
1721
def imputed_sex_value(self):
1822
return {
1923
Sex.MALE: 'Male',
2024
Sex.FEMALE: 'Female',
2125
Sex.UNKNOWN: 'Unknown',
22-
}[self]
26+
}.get(self, self.name)
2327

2428

2529
class PipelineVersion(str, Enum):

v03_pipeline/var/test/pedigrees/test_pedigree_6.tsv

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@ R0116_sex_check_project2 family_1 family_1 ROS_006_18Y03227_D1 U
44
R0116_sex_check_project2 family_1 family_1 ROS_006_18Y03228_D1 ROS_006_18Y03226_D1 ROS_006_18Y03227_D1 F
55
R0116_sex_check_project2 family_2 family_2 ROS_007_19Y05919_D1 F
66
R0116_sex_check_project2 family_2 family_2 ROS_007_19Y05939_D1 F
7-
R0116_sex_check_project2 family_2 family_2 ROS_007_19Y05987_D1 ROS_007_19Y05919_D1 ROS_007_19Y05939_D1 F
7+
R0116_sex_check_project2 family_2 family_2 ROS_007_19Y05987_D1 ROS_007_19Y05919_D1 ROS_007_19Y05939_D1 F
8+
R0116_sex_check_project2 family_2 family_2 ROS_007_19Y05989_D1 ROS_007_19Y05919_D1 ROS_007_19Y05939_D1 XXX

0 commit comments

Comments
 (0)