Skip to content

Commit 2e8dbcf

Browse files
authored
bugfix: exclude samples from relationship checking that are not present in the expected loadable samples (#1003)
* bugfix: exclude samples from relationship checking that are not present in the expected loadable samples * cleanup
1 parent 0eef897 commit 2e8dbcf

File tree

3 files changed

+97
-76
lines changed

3 files changed

+97
-76
lines changed

v03_pipeline/lib/misc/family_loading_failures.py

Lines changed: 47 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -16,93 +16,71 @@ def passes_relatedness_check(
1616
relatedness_check_lookup: dict[tuple[str, str], list],
1717
sample_id: str,
1818
other_id: str,
19-
relation: Relation,
19+
expected_relation: Relation,
20+
additional_allowed_relation: Relation | None,
2021
) -> tuple[bool, str | None]:
2122
# No relationship to check, return true
2223
if other_id is None:
2324
return True, None
2425
coefficients = relatedness_check_lookup.get(
2526
(min(sample_id, other_id), max(sample_id, other_id)),
2627
)
27-
if not coefficients or not np.allclose(
28-
coefficients,
29-
relation.coefficients,
30-
atol=RELATEDNESS_TOLERANCE,
28+
if not coefficients or not any(
29+
np.allclose(
30+
coefficients,
31+
relation.coefficients,
32+
atol=RELATEDNESS_TOLERANCE,
33+
)
34+
for relation in (
35+
[expected_relation, additional_allowed_relation]
36+
if additional_allowed_relation
37+
else [expected_relation]
38+
)
3139
):
3240
return (
3341
False,
34-
f'Sample {sample_id} has expected relation "{relation.value}" to {other_id} but has coefficients {coefficients or []}',
42+
f'Sample {sample_id} has expected relation "{expected_relation.value}" to {other_id} but has coefficients {coefficients or []}',
3543
)
3644
return True, None
3745

3846

39-
def all_relatedness_checks( # noqa: C901
47+
def all_relatedness_checks(
4048
relatedness_check_lookup: dict[tuple[str, str], list],
49+
family: Family,
4150
sample: Sample,
4251
) -> list[str]:
4352
failure_reasons = []
44-
for parent_id in [sample.mother, sample.father]:
45-
success, reason = passes_relatedness_check(
46-
relatedness_check_lookup,
47-
sample.sample_id,
48-
parent_id,
49-
Relation.PARENT,
50-
)
51-
if not success:
52-
failure_reasons.append(reason)
53-
54-
for grandparent_id in [
55-
sample.maternal_grandmother,
56-
sample.maternal_grandfather,
57-
sample.paternal_grandmother,
58-
sample.paternal_grandfather,
53+
for relationship_set, relation, additional_allowed_relation in [
54+
([sample.mother, sample.father], Relation.PARENT_CHILD, None),
55+
(
56+
[
57+
sample.maternal_grandmother,
58+
sample.maternal_grandfather,
59+
sample.paternal_grandmother,
60+
sample.paternal_grandfather,
61+
],
62+
Relation.GRANDPARENT_GRANDCHILD,
63+
None,
64+
),
65+
(sample.siblings, Relation.SIBLING, None),
66+
(sample.half_siblings, Relation.HALF_SIBLING, Relation.SIBLING),
67+
(sample.aunt_nephews, Relation.AUNT_NEPHEW, None),
5968
]:
60-
success, reason = passes_relatedness_check(
61-
relatedness_check_lookup,
62-
sample.sample_id,
63-
grandparent_id,
64-
Relation.GRANDPARENT,
65-
)
66-
if not success:
67-
failure_reasons.append(reason)
68-
69-
for sibling_id in sample.siblings:
70-
success, reason = passes_relatedness_check(
71-
relatedness_check_lookup,
72-
sample.sample_id,
73-
sibling_id,
74-
Relation.SIBLING,
75-
)
76-
if not success:
77-
failure_reasons.append(reason)
78-
79-
for half_sibling_id in sample.half_siblings:
80-
# NB: A "half sibling" parsed from the pedigree may actually be a sibling, so we allow those
81-
# through as well.
82-
success1, _ = passes_relatedness_check(
83-
relatedness_check_lookup,
84-
sample.sample_id,
85-
half_sibling_id,
86-
Relation.SIBLING,
87-
)
88-
success2, reason = passes_relatedness_check(
89-
relatedness_check_lookup,
90-
sample.sample_id,
91-
half_sibling_id,
92-
Relation.HALF_SIBLING,
93-
)
94-
if not success1 and not success2:
95-
failure_reasons.append(reason)
96-
97-
for aunt_nephew_id in sample.aunt_nephews:
98-
success, reason = passes_relatedness_check(
99-
relatedness_check_lookup,
100-
sample.sample_id,
101-
aunt_nephew_id,
102-
Relation.AUNT_NEPHEW,
103-
)
104-
if not success:
105-
failure_reasons.append(reason)
69+
for other_id in relationship_set:
70+
# Handle case where relation is identified in the
71+
# pedigree as a "dummy" but is not included in
72+
# the list of samples to load.
73+
if other_id not in family.samples:
74+
continue
75+
success, reason = passes_relatedness_check(
76+
relatedness_check_lookup,
77+
sample.sample_id,
78+
other_id,
79+
relation,
80+
additional_allowed_relation,
81+
)
82+
if not success:
83+
failure_reasons.append(reason)
10684
return failure_reasons
10785

10886

@@ -162,6 +140,7 @@ def get_families_failed_relatedness_check(
162140
for sample in family.samples.values():
163141
failure_reasons = all_relatedness_checks(
164142
relatedness_check_lookup,
143+
family,
165144
sample,
166145
)
167146
if failure_reasons:

v03_pipeline/lib/misc/family_loading_failures_test.py

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
get_families_failed_sex_check,
1010
)
1111
from v03_pipeline.lib.misc.io import import_pedigree
12-
from v03_pipeline.lib.misc.pedigree import Sample, parse_pedigree_ht_to_families
12+
from v03_pipeline.lib.misc.pedigree import Family, Sample, parse_pedigree_ht_to_families
1313
from v03_pipeline.lib.model import Sex
1414

1515
TEST_PEDIGREE_6 = 'v03_pipeline/var/test/pedigrees/test_pedigree_6.tsv'
@@ -104,7 +104,21 @@ def test_all_relatedness_checks(self):
104104
paternal_grandfather='sample_3',
105105
half_siblings=['sample_4'],
106106
)
107-
failure_reasons = all_relatedness_checks(relatedness_check_lookup, sample)
107+
family = Family(
108+
family_guid='family_1a',
109+
samples={
110+
'sample_1': sample,
111+
'sample_2': Sample(sex=Sex.MALE, sample_id='sample_2'),
112+
'sample_3': Sample(sex=Sex.MALE, sample_id='sample_3'),
113+
'sample_4': Sample(sex=Sex.MALE, sample_id='sample_4'),
114+
'sample_5': Sample(sex=Sex.MALE, sample_id='sample_5'),
115+
},
116+
)
117+
failure_reasons = all_relatedness_checks(
118+
relatedness_check_lookup,
119+
family,
120+
sample,
121+
)
108122
self.assertListEqual(failure_reasons, [])
109123

110124
# Defined grandparent missing in relatedness table
@@ -117,12 +131,13 @@ def test_all_relatedness_checks(self):
117131
)
118132
failure_reasons = all_relatedness_checks(
119133
relatedness_check_lookup,
134+
family,
120135
sample,
121136
)
122137
self.assertListEqual(
123138
failure_reasons,
124139
[
125-
'Sample sample_1 has expected relation "grandparent" to sample_5 but has coefficients []',
140+
'Sample sample_1 has expected relation "grandparent_grandchild" to sample_5 but has coefficients []',
126141
],
127142
)
128143

@@ -140,6 +155,7 @@ def test_all_relatedness_checks(self):
140155
)
141156
failure_reasons = all_relatedness_checks(
142157
relatedness_check_lookup,
158+
family,
143159
sample,
144160
)
145161
self.assertListEqual(
@@ -167,16 +183,42 @@ def test_all_relatedness_checks(self):
167183
)
168184
failure_reasons = all_relatedness_checks(
169185
relatedness_check_lookup,
186+
family,
170187
sample,
171188
)
172189
self.assertListEqual(
173190
failure_reasons,
174191
[
175-
'Sample sample_1 has expected relation "parent" to sample_2 but has coefficients [0.5, 0.5, 0.5, 0.5]',
192+
'Sample sample_1 has expected relation "parent_child" to sample_2 but has coefficients [0.5, 0.5, 0.5, 0.5]',
176193
'Sample sample_1 has expected relation "sibling" to sample_4 but has coefficients [0.5, 0.5, 0, 0.25]',
177194
],
178195
)
179196

197+
# Some samples will include relationships with
198+
# samples that are not expected to be included
199+
# in the callset. These should not trigger relatedness
200+
# failures.
201+
sample = Sample(
202+
sex=Sex.FEMALE,
203+
sample_id='sample_1',
204+
mother='sample_2',
205+
)
206+
family = Family(
207+
family_guid='family_1a',
208+
samples={
209+
'sample_1': sample,
210+
},
211+
)
212+
failure_reasons = all_relatedness_checks(
213+
{},
214+
family,
215+
sample,
216+
)
217+
self.assertListEqual(
218+
failure_reasons,
219+
[],
220+
)
221+
180222
def test_get_families_failed_sex_check(self):
181223
sex_check_ht = hl.Table.parallelize(
182224
[

v03_pipeline/lib/misc/pedigree.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,17 @@
88

99

1010
class Relation(Enum):
11-
PARENT = 'parent'
12-
GRANDPARENT = 'grandparent'
11+
PARENT_CHILD = 'parent_child'
12+
GRANDPARENT_GRANDCHILD = 'grandparent_grandchild'
1313
SIBLING = 'sibling'
1414
HALF_SIBLING = 'half_sibling'
1515
AUNT_NEPHEW = 'aunt_nephew'
1616

1717
@property
1818
def coefficients(self):
1919
return {
20-
Relation.PARENT: [0, 1, 0, 0.5],
21-
Relation.GRANDPARENT: [0.5, 0.5, 0, 0.25],
20+
Relation.PARENT_CHILD: [0, 1, 0, 0.5],
21+
Relation.GRANDPARENT_GRANDCHILD: [0.5, 0.5, 0, 0.25],
2222
Relation.SIBLING: [0.25, 0.5, 0.25, 0.5],
2323
Relation.HALF_SIBLING: [0.5, 0.5, 0, 0.25],
2424
Relation.AUNT_NEPHEW: [0.5, 0.5, 0, 0.25],

0 commit comments

Comments
 (0)