Skip to content

Commit fc3df35

Browse files
committed
Merge branch 'benb/lookup_table_refactor' of github.com:broadinstitute/seqr-loading-pipelines into benb/lookup_table_refactor
2 parents 4d51857 + 6ce3b54 commit fc3df35

File tree

3 files changed

+137
-36
lines changed

3 files changed

+137
-36
lines changed

v03_pipeline/lib/misc/pedigree.py

Lines changed: 31 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,23 @@ def is_aunt_nephew(self: 'Sample', other: 'Sample') -> bool:
5454
and (self.paternal_grandfather == other.father)
5555
)
5656

57+
def is_in_direct_lineage(self: 'Sample', other: 'Sample') -> bool:
58+
return self.sample_id in {
59+
other.mother,
60+
other.father,
61+
other.maternal_grandmother,
62+
other.maternal_grandfather,
63+
other.paternal_grandmother,
64+
other.paternal_grandfather,
65+
} or other.sample_id in {
66+
self.mother,
67+
self.father,
68+
self.maternal_grandmother,
69+
self.maternal_grandfather,
70+
self.paternal_grandmother,
71+
self.paternal_grandfather,
72+
}
73+
5774

5875
@dataclass
5976
class Family:
@@ -107,56 +124,34 @@ def parse_collateral_lineage(
107124
# A sample_i that is siblings with sample_j, will list sample_j as as sibling, but
108125
# sample_j will not list sample_i as a sibling. Relationships only appear in the
109126
# ibd table a single time, so we only need to check the pairing once.
110-
for sample_i, sample_j in itertools.combinations(samples.keys(), 2):
111-
# If other sample is already related, continue
112-
if sample_j in {
113-
samples[sample_i].mother,
114-
samples[sample_i].father,
115-
samples[sample_i].maternal_grandmother,
116-
samples[sample_i].maternal_grandfather,
117-
samples[sample_i].paternal_grandmother,
118-
samples[sample_i].paternal_grandfather,
119-
}:
127+
for sample_i, sample_j in itertools.combinations(samples.values(), 2):
128+
# If sample is already related from direct relationships, continue
129+
if sample_i.is_in_direct_lineage(sample_j):
120130
continue
121131

122132
# If both parents are identified and the same, samples are siblings.
123133
if (
124-
samples[sample_i].mother
125-
and samples[sample_i].father
126-
and (samples[sample_i].mother == samples[sample_j].mother)
127-
and (samples[sample_i].father == samples[sample_j].father)
134+
sample_i.mother
135+
and sample_i.father
136+
and (sample_i.mother == sample_j.mother)
137+
and (sample_i.father == sample_j.father)
128138
):
129-
samples[sample_i].siblings.append(
130-
sample_j,
131-
)
139+
sample_i.siblings.append(sample_j.sample_id)
132140
continue
133141

134142
# If only a single parent is identified and the same, samples are half siblings
135-
if (
136-
samples[sample_i].mother
137-
and samples[sample_i].mother == samples[sample_j].mother
138-
) or (
139-
samples[sample_i].father
140-
and samples[sample_i].father == samples[sample_j].father
143+
if (sample_i.mother and sample_i.mother == sample_j.mother) or (
144+
sample_i.father and sample_i.father == sample_j.father
141145
):
142-
samples[sample_i].half_siblings.append(
143-
sample_j,
144-
)
146+
sample_i.half_siblings.append(sample_j.sample_id)
145147
continue
146148

147149
# If either set of one's grandparents is identified and equal to the other's parents,
148150
# they're aunt/uncle related
149-
# NB: because we will only check an i, j pair of samples a single time, (itertools.combinations)
151+
# NB: because we will only check an i, j pair of samples a single time, (itertools.combinations)
150152
# we need to check both grandparents_i == parents_j and parents_i == grandparents_j.
151-
# fmt: off
152-
if (
153-
samples[sample_i].is_aunt_nephew(samples[sample_j])
154-
or samples[sample_j].is_aunt_nephew(samples[sample_i])
155-
):
156-
samples[sample_i].aunt_nephews.append(
157-
sample_j,
158-
)
159-
# fmt: on
153+
if sample_i.is_aunt_nephew(sample_j) or sample_j.is_aunt_nephew(sample_i):
154+
sample_i.aunt_nephews.append(sample_j.sample_id)
160155
return samples
161156

162157
@classmethod

v03_pipeline/lib/misc/pedigree_test.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,107 @@ def test_parse_lineage(self) -> None:
205205
},
206206
)
207207

208+
def test_parse_parent_not_aunt_uncle(self) -> None:
209+
samples = Family.parse_direct_lineage(
210+
[
211+
hl.Struct(s='sample_1', maternal_s=None, paternal_s=None, sex='F'),
212+
hl.Struct(
213+
s='sample_2',
214+
maternal_s=None,
215+
paternal_s=None,
216+
sex='M',
217+
),
218+
hl.Struct(
219+
s='sample_3',
220+
maternal_s='sample_1',
221+
paternal_s='sample_2',
222+
sex='F',
223+
),
224+
hl.Struct(
225+
s='sample_4',
226+
maternal_s='sample_3',
227+
paternal_s=None,
228+
sex='F',
229+
),
230+
hl.Struct(
231+
s='sample_5',
232+
maternal_s='sample_3',
233+
paternal_s=None,
234+
sex='F',
235+
),
236+
],
237+
)
238+
self.assertEqual(
239+
Family.parse_collateral_lineage(samples),
240+
{
241+
'sample_1': Sample(
242+
sample_id='sample_1',
243+
sex=Sex.FEMALE,
244+
mother=None,
245+
father=None,
246+
maternal_grandmother=None,
247+
maternal_grandfather=None,
248+
paternal_grandmother=None,
249+
paternal_grandfather=None,
250+
siblings=[],
251+
half_siblings=[],
252+
aunt_nephews=[],
253+
),
254+
'sample_2': Sample(
255+
sample_id='sample_2',
256+
sex=Sex.MALE,
257+
mother=None,
258+
father=None,
259+
maternal_grandmother=None,
260+
maternal_grandfather=None,
261+
paternal_grandmother=None,
262+
paternal_grandfather=None,
263+
siblings=[],
264+
half_siblings=[],
265+
aunt_nephews=[],
266+
),
267+
'sample_3': Sample(
268+
sample_id='sample_3',
269+
sex=Sex.FEMALE,
270+
mother='sample_1',
271+
father='sample_2',
272+
maternal_grandmother=None,
273+
maternal_grandfather=None,
274+
paternal_grandmother=None,
275+
paternal_grandfather=None,
276+
siblings=[],
277+
half_siblings=[],
278+
aunt_nephews=[],
279+
),
280+
'sample_4': Sample(
281+
sample_id='sample_4',
282+
sex=Sex.FEMALE,
283+
mother='sample_3',
284+
father=None,
285+
maternal_grandmother='sample_1',
286+
maternal_grandfather='sample_2',
287+
paternal_grandmother=None,
288+
paternal_grandfather=None,
289+
siblings=[],
290+
half_siblings=['sample_5'],
291+
aunt_nephews=[],
292+
),
293+
'sample_5': Sample(
294+
sample_id='sample_5',
295+
sex=Sex.FEMALE,
296+
mother='sample_3',
297+
father=None,
298+
maternal_grandmother='sample_1',
299+
maternal_grandfather='sample_2',
300+
paternal_grandmother=None,
301+
paternal_grandfather=None,
302+
siblings=[],
303+
half_siblings=[],
304+
aunt_nephews=[],
305+
),
306+
},
307+
)
308+
208309
def test_parse_project(self) -> None:
209310
pedigree_ht = import_pedigree(TEST_PEDIGREE_2)
210311
self.assertCountEqual(

v03_pipeline/lib/tasks/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
from v03_pipeline.lib.tasks.delete_old_runs import DeleteOldRunsTask
2+
from v03_pipeline.lib.tasks.reference_data.write_cached_reference_dataset_query import (
3+
WriteCachedReferenceDatasetQuery,
4+
)
5+
from v03_pipeline.lib.tasks.update_project_table import UpdateProjectTableTask
26
from v03_pipeline.lib.tasks.update_lookup_table import (
37
UpdateLookupTableTask,
48
)
@@ -16,6 +20,7 @@
1620
'UpdateProjectTableTask',
1721
'UpdateLookupTableTask',
1822
'UpdateVariantAnnotationsTableWithNewSamplesTask',
23+
'WriteCachedReferenceDatasetQuery',
1924
'WriteMetadataForRunTask',
2025
'WriteProjectFamilyTablesTask',
2126
]

0 commit comments

Comments
 (0)