4
4
5
5
import hail as hl
6
6
7
- from v03_pipeline .lib .model import Ploidy
7
+ from v03_pipeline .lib .model import Sex
8
8
9
9
10
10
class Relation (Enum ):
@@ -28,7 +28,7 @@ def coefficients(self):
28
28
@dataclass
29
29
class Sample :
30
30
sample_id : str
31
- sex : Ploidy
31
+ sex : Sex
32
32
mother : str = None
33
33
father : str = None
34
34
maternal_grandmother : str = None
@@ -54,6 +54,26 @@ def is_aunt_nephew(self: 'Sample', other: 'Sample') -> bool:
54
54
and (self .paternal_grandfather == other .father )
55
55
)
56
56
57
+ def is_in_direct_lineage (self : 'Sample' , other : 'Sample' ) -> bool :
58
+ return (
59
+ self .sample_id in {
60
+ other .mother ,
61
+ other .father ,
62
+ other .maternal_grandmother ,
63
+ other .maternal_grandfather ,
64
+ other .paternal_grandmother ,
65
+ other .paternal_grandfather ,
66
+ }
67
+ or other .sample_id in {
68
+ self .mother ,
69
+ self .father ,
70
+ self .maternal_grandmother ,
71
+ self .maternal_grandfather ,
72
+ self .paternal_grandmother ,
73
+ self .paternal_grandfather ,
74
+ }
75
+ )
76
+
57
77
58
78
@dataclass
59
79
class Family :
@@ -69,7 +89,7 @@ def parse_direct_lineage(rows: list[hl.Struct]) -> dict[str, Sample]: # noqa: C
69
89
for row in rows :
70
90
samples [row .s ] = Sample (
71
91
sample_id = row .s ,
72
- sex = Ploidy (row .sex ),
92
+ sex = Sex (row .sex ),
73
93
mother = row .maternal_s ,
74
94
father = row .paternal_s ,
75
95
)
@@ -107,56 +127,38 @@ def parse_collateral_lineage(
107
127
# A sample_i that is siblings with sample_j, will list sample_j as as sibling, but
108
128
# sample_j will not list sample_i as a sibling. Relationships only appear in the
109
129
# ibd table a single time, so we only need to check the pairing once.
110
- for sample_i , sample_j in itertools .combinations (samples .keys (), 2 ):
111
- # If other sample is already related, continue
112
- if sample_j in {
113
- samples [sample_i ].mother ,
114
- samples [sample_i ].father ,
115
- samples [sample_i ].maternal_grandmother ,
116
- samples [sample_i ].maternal_grandfather ,
117
- samples [sample_i ].paternal_grandmother ,
118
- samples [sample_i ].paternal_grandfather ,
119
- }:
130
+ for sample_i , sample_j in itertools .combinations (samples .values (), 2 ):
131
+ # If sample is already related from direct relationships, continue
132
+ if sample_i .is_in_direct_lineage (sample_j ):
120
133
continue
121
134
122
135
# If both parents are identified and the same, samples are siblings.
123
136
if (
124
- samples [ sample_i ] .mother
125
- and samples [ sample_i ] .father
126
- and (samples [ sample_i ] .mother == samples [ sample_j ] .mother )
127
- and (samples [ sample_i ] .father == samples [ sample_j ] .father )
137
+ sample_i .mother
138
+ and sample_i .father
139
+ and (sample_i .mother == sample_j .mother )
140
+ and (sample_i .father == sample_j .father )
128
141
):
129
- samples [sample_i ].siblings .append (
130
- sample_j ,
131
- )
142
+ sample_i .siblings .append (sample_j .sample_id )
132
143
continue
133
144
134
145
# If only a single parent is identified and the same, samples are half siblings
135
146
if (
136
- samples [ sample_i ] .mother
137
- and samples [ sample_i ] .mother == samples [ sample_j ] .mother
147
+ sample_i .mother
148
+ and sample_i .mother == sample_j .mother
138
149
) or (
139
- samples [ sample_i ] .father
140
- and samples [ sample_i ] .father == samples [ sample_j ] .father
150
+ sample_i .father
151
+ and sample_i .father == sample_j .father
141
152
):
142
- samples [sample_i ].half_siblings .append (
143
- sample_j ,
144
- )
153
+ sample_i .half_siblings .append (sample_j .sample_id )
145
154
continue
146
155
147
156
# If either set of one's grandparents is identified and equal to the other's parents,
148
157
# they're aunt/uncle related
149
- # NB: because we will only check an i, j pair of samples a single time, (itertools.combinations)
158
+ # NB: because we will only check an i, j pair of samples a single time, (itertools.combinations)
150
159
# we need to check both grandparents_i == parents_j and parents_i == grandparents_j.
151
- # fmt: off
152
- if (
153
- samples [sample_i ].is_aunt_nephew (samples [sample_j ])
154
- or samples [sample_j ].is_aunt_nephew (samples [sample_i ])
155
- ):
156
- samples [sample_i ].aunt_nephews .append (
157
- sample_j ,
158
- )
159
- # fmt: on
160
+ if (sample_i .is_aunt_nephew (sample_j ) or sample_j .is_aunt_nephew (sample_i )):
161
+ sample_i .aunt_nephews .append (sample_j .sample_id )
160
162
return samples
161
163
162
164
@classmethod
0 commit comments