Skip to content

Commit c85784e

Browse files
committed
aunt uncles bug
1 parent f76c161 commit c85784e

File tree

7 files changed

+186
-80
lines changed

7 files changed

+186
-80
lines changed

v03_pipeline/lib/methods/sex_check.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import hail as hl
22

3-
from v03_pipeline.lib.model import Ploidy
3+
from v03_pipeline.lib.model import Sex
44

55
IMPUTE_SEX_ANNOTATIONS = [
66
'is_female',
@@ -41,13 +41,13 @@ def call_sex(mt: hl.MatrixTable) -> hl.Table:
4141
ht = ht.annotate(
4242
sex=(
4343
hl.case()
44-
.when(hl.is_missing(ht.is_female), Ploidy.UNKNOWN.value)
45-
.when(ht.is_female, Ploidy.FEMALE.value)
46-
.default(Ploidy.MALE.value)
44+
.when(hl.is_missing(ht.is_female), Sex.UNKNOWN.value)
45+
.when(ht.is_female, Sex.FEMALE.value)
46+
.default(Sex.MALE.value)
4747
),
4848
)
4949
ambiguous_perc = ht.aggregate(
50-
hl.agg.fraction(ht.sex == Ploidy.UNKNOWN.value),
50+
hl.agg.fraction(ht.sex == Sex.UNKNOWN.value),
5151
)
5252
if ambiguous_perc > AMBIGUOUS_THRESHOLD_PERC:
5353
msg = f'{ambiguous_perc:.2%} of samples identified as ambiguous. Please contact the methods team to investigate the callset.'

v03_pipeline/lib/misc/family_loading_failures.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import numpy as np
55

66
from v03_pipeline.lib.misc.pedigree import Family, Relation, Sample
7-
from v03_pipeline.lib.model import Ploidy
7+
from v03_pipeline.lib.model import Sex
88

99

1010
def passes_relatedness_check(
@@ -121,13 +121,13 @@ def build_relatedness_check_lookup(
121121
def build_sex_check_lookup(
122122
sex_check_ht: hl.Table,
123123
remap_lookup: hl.dict,
124-
) -> dict[str, Ploidy]:
124+
) -> dict[str, Sex]:
125125
# Build sex check lookup
126126
sex_check_ht = sex_check_ht.key_by(
127127
s=remap_lookup.get(sex_check_ht.s, sex_check_ht.s),
128128
)
129129
sex_check_ht = sex_check_ht.select('sex')
130-
return {r.s: Ploidy(r.sex) for r in sex_check_ht.collect()}
130+
return {r.s: Sex(r.sex) for r in sex_check_ht.collect()}
131131

132132

133133
def get_families_failed_missing_samples(

v03_pipeline/lib/misc/family_loading_failures_test.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
)
1111
from v03_pipeline.lib.misc.io import import_pedigree
1212
from v03_pipeline.lib.misc.pedigree import Sample, parse_pedigree_ht_to_families
13-
from v03_pipeline.lib.model import Ploidy
13+
from v03_pipeline.lib.model import Sex
1414

1515
TEST_PEDIGREE_6 = 'v03_pipeline/var/test/pedigrees/test_pedigree_6.tsv'
1616

@@ -72,12 +72,12 @@ def test_build_sex_check_lookup(self):
7272
self.assertEqual(
7373
build_sex_check_lookup(ht, hl.dict({'ROS_006_18Y03226_D1': 'remapped_id'})),
7474
{
75-
'remapped_id': Ploidy.MALE,
76-
'ROS_006_18Y03227_D1': Ploidy.MALE,
77-
'ROS_006_18Y03228_D1': Ploidy.MALE,
78-
'ROS_007_19Y05919_D1': Ploidy.MALE,
79-
'ROS_007_19Y05939_D1': Ploidy.FEMALE,
80-
'ROS_007_19Y05987_D1': Ploidy.MALE,
75+
'remapped_id': Sex.MALE,
76+
'ROS_006_18Y03227_D1': Sex.MALE,
77+
'ROS_006_18Y03228_D1': Sex.MALE,
78+
'ROS_007_19Y05919_D1': Sex.MALE,
79+
'ROS_007_19Y05939_D1': Sex.FEMALE,
80+
'ROS_007_19Y05987_D1': Sex.MALE,
8181
},
8282
)
8383

@@ -96,7 +96,7 @@ def test_all_relatedness_checks(self):
9696
('sample_1', 'sample_4'): [0.25, 0.5, 0.25, 0.5],
9797
}
9898
sample = Sample(
99-
sex=Ploidy.FEMALE,
99+
sex=Sex.FEMALE,
100100
sample_id='sample_1',
101101
mother='sample_2',
102102
paternal_grandfather='sample_3',
@@ -107,7 +107,7 @@ def test_all_relatedness_checks(self):
107107

108108
# Defined grandparent missing in relatedness table
109109
sample = Sample(
110-
sex=Ploidy.FEMALE,
110+
sex=Sex.FEMALE,
111111
sample_id='sample_1',
112112
mother='sample_2',
113113
paternal_grandfather='sample_3',
@@ -130,7 +130,7 @@ def test_all_relatedness_checks(self):
130130
('sample_1', 'sample_4'): [0.5, 0.5, 0, 0.25],
131131
}
132132
sample = Sample(
133-
sex=Ploidy.FEMALE,
133+
sex=Sex.FEMALE,
134134
sample_id='sample_1',
135135
mother='sample_2',
136136
paternal_grandfather='sample_3',
@@ -157,7 +157,7 @@ def test_all_relatedness_checks(self):
157157
],
158158
}
159159
sample = Sample(
160-
sex=Ploidy.FEMALE,
160+
sex=Sex.FEMALE,
161161
sample_id='sample_1',
162162
mother='sample_2',
163163
paternal_grandfather='sample_3',

v03_pipeline/lib/misc/pedigree.py

Lines changed: 39 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import hail as hl
66

7-
from v03_pipeline.lib.model import Ploidy
7+
from v03_pipeline.lib.model import Sex
88

99

1010
class Relation(Enum):
@@ -28,7 +28,7 @@ def coefficients(self):
2828
@dataclass
2929
class Sample:
3030
sample_id: str
31-
sex: Ploidy
31+
sex: Sex
3232
mother: str = None
3333
father: str = None
3434
maternal_grandmother: str = None
@@ -54,6 +54,26 @@ def is_aunt_nephew(self: 'Sample', other: 'Sample') -> bool:
5454
and (self.paternal_grandfather == other.father)
5555
)
5656

57+
def is_in_direct_lineage(self: 'Sample', other: 'Sample') -> bool:
58+
return (
59+
self.sample_id in {
60+
other.mother,
61+
other.father,
62+
other.maternal_grandmother,
63+
other.maternal_grandfather,
64+
other.paternal_grandmother,
65+
other.paternal_grandfather,
66+
}
67+
or other.sample_id in {
68+
self.mother,
69+
self.father,
70+
self.maternal_grandmother,
71+
self.maternal_grandfather,
72+
self.paternal_grandmother,
73+
self.paternal_grandfather,
74+
}
75+
)
76+
5777

5878
@dataclass
5979
class Family:
@@ -69,7 +89,7 @@ def parse_direct_lineage(rows: list[hl.Struct]) -> dict[str, Sample]: # noqa: C
6989
for row in rows:
7090
samples[row.s] = Sample(
7191
sample_id=row.s,
72-
sex=Ploidy(row.sex),
92+
sex=Sex(row.sex),
7393
mother=row.maternal_s,
7494
father=row.paternal_s,
7595
)
@@ -107,56 +127,38 @@ def parse_collateral_lineage(
107127
# A sample_i that is siblings with sample_j, will list sample_j as as sibling, but
108128
# sample_j will not list sample_i as a sibling. Relationships only appear in the
109129
# ibd table a single time, so we only need to check the pairing once.
110-
for sample_i, sample_j in itertools.combinations(samples.keys(), 2):
111-
# If other sample is already related, continue
112-
if sample_j in {
113-
samples[sample_i].mother,
114-
samples[sample_i].father,
115-
samples[sample_i].maternal_grandmother,
116-
samples[sample_i].maternal_grandfather,
117-
samples[sample_i].paternal_grandmother,
118-
samples[sample_i].paternal_grandfather,
119-
}:
130+
for sample_i, sample_j in itertools.combinations(samples.values(), 2):
131+
# If sample is already related from direct relationships, continue
132+
if sample_i.is_in_direct_lineage(sample_j):
120133
continue
121134

122135
# If both parents are identified and the same, samples are siblings.
123136
if (
124-
samples[sample_i].mother
125-
and samples[sample_i].father
126-
and (samples[sample_i].mother == samples[sample_j].mother)
127-
and (samples[sample_i].father == samples[sample_j].father)
137+
sample_i.mother
138+
and sample_i.father
139+
and (sample_i.mother == sample_j.mother)
140+
and (sample_i.father == sample_j.father)
128141
):
129-
samples[sample_i].siblings.append(
130-
sample_j,
131-
)
142+
sample_i.siblings.append(sample_j.sample_id)
132143
continue
133144

134145
# If only a single parent is identified and the same, samples are half siblings
135146
if (
136-
samples[sample_i].mother
137-
and samples[sample_i].mother == samples[sample_j].mother
147+
sample_i.mother
148+
and sample_i.mother == sample_j.mother
138149
) or (
139-
samples[sample_i].father
140-
and samples[sample_i].father == samples[sample_j].father
150+
sample_i.father
151+
and sample_i.father == sample_j.father
141152
):
142-
samples[sample_i].half_siblings.append(
143-
sample_j,
144-
)
153+
sample_i.half_siblings.append(sample_j.sample_id)
145154
continue
146155

147156
# If either set of one's grandparents is identified and equal to the other's parents,
148157
# they're aunt/uncle related
149-
# NB: because we will only check an i, j pair of samples a single time, (itertools.combinations)
158+
# NB: because we will only check an i, j pair of samples a single time, (itertools.combinations)
150159
# we need to check both grandparents_i == parents_j and parents_i == grandparents_j.
151-
# fmt: off
152-
if (
153-
samples[sample_i].is_aunt_nephew(samples[sample_j])
154-
or samples[sample_j].is_aunt_nephew(samples[sample_i])
155-
):
156-
samples[sample_i].aunt_nephews.append(
157-
sample_j,
158-
)
159-
# fmt: on
160+
if (sample_i.is_aunt_nephew(sample_j) or sample_j.is_aunt_nephew(sample_i)):
161+
sample_i.aunt_nephews.append(sample_j.sample_id)
160162
return samples
161163

162164
@classmethod

0 commit comments

Comments
 (0)