Skip to content

Commit 17ed92c

Browse files
authored
Merge pull request #838 from broadinstitute/benb/intersect_family_guids
Intersect family guids when determining which to load
2 parents 70c45ab + f80fe89 commit 17ed92c

File tree

3 files changed

+75
-2
lines changed

3 files changed

+75
-2
lines changed

v03_pipeline/lib/tasks/write_project_family_tables.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,11 @@
22
import luigi
33
import luigi.util
44

5+
from v03_pipeline.lib.misc.io import import_pedigree
6+
from v03_pipeline.lib.misc.pedigree import parse_pedigree_ht_to_families
57
from v03_pipeline.lib.tasks.base.base_hail_table import BaseHailTableTask
68
from v03_pipeline.lib.tasks.base.base_loading_run_params import BaseLoadingRunParams
9+
from v03_pipeline.lib.tasks.files import RawFileTask
710
from v03_pipeline.lib.tasks.update_project_table import UpdateProjectTableTask
811
from v03_pipeline.lib.tasks.write_family_table import WriteFamilyTableTask
912

@@ -30,13 +33,26 @@ def complete(self) -> bool:
3033

3134
def run(self):
3235
# https://luigi.readthedocs.io/en/stable/tasks.html#dynamic-dependencies
36+
# Fetch family guids from project table
3337
update_project_table_task: luigi.Target = yield self.clone(
3438
UpdateProjectTableTask,
3539
force=False,
3640
)
3741
project_ht = hl.read_table(update_project_table_task.path)
38-
family_guids = hl.eval(project_ht.globals.family_guids)
39-
for family_guid in family_guids:
42+
family_guids_in_project_table = set(hl.eval(project_ht.globals.family_guids))
43+
44+
# Fetch family guids from pedigree
45+
pedigree_ht_task: luigi.Target = yield RawFileTask(self.project_pedigree_path)
46+
pedigree_ht = import_pedigree(pedigree_ht_task.path)
47+
families_guids_in_pedigree = {
48+
f.family_guid for f in parse_pedigree_ht_to_families(pedigree_ht)
49+
}
50+
51+
# Intersect them
52+
family_guids_to_load = (
53+
family_guids_in_project_table & families_guids_in_pedigree
54+
)
55+
for family_guid in family_guids_to_load:
4056
self.dynamic_write_family_table_tasks.add(
4157
self.clone(WriteFamilyTableTask, family_guid=family_guid),
4258
)

v03_pipeline/lib/tasks/write_project_family_tables_test.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import luigi.worker
33

44
from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType
5+
from v03_pipeline.lib.paths import project_table_path
56
from v03_pipeline.lib.tasks.write_project_family_tables import (
67
WriteProjectFamilyTablesTask,
78
)
@@ -10,9 +11,12 @@
1011
TEST_SNV_INDEL_VCF = 'v03_pipeline/var/test/callsets/1kg_30variants.vcf'
1112
TEST_REMAP = 'v03_pipeline/var/test/remaps/test_remap_1.tsv'
1213
TEST_PEDIGREE_4 = 'v03_pipeline/var/test/pedigrees/test_pedigree_4.tsv'
14+
TEST_PEDIGREE_4_SUBSET = 'v03_pipeline/var/test/pedigrees/test_pedigree_4_subset.tsv'
1315

1416

1517
class WriteProjectFamilyTablesTest(MockedDatarootTestCase):
18+
maxDiff = None
19+
1620
def test_snv_write_project_family_tables_task(self) -> None:
1721
worker = luigi.worker.Worker()
1822
write_project_family_tables = WriteProjectFamilyTablesTask(
@@ -51,3 +55,54 @@ def test_snv_write_project_family_tables_task(self) -> None:
5155
[['NA20888_1']],
5256
],
5357
)
58+
59+
write_project_family_tables_subset = WriteProjectFamilyTablesTask(
60+
reference_genome=ReferenceGenome.GRCh38,
61+
dataset_type=DatasetType.SNV_INDEL,
62+
sample_type=SampleType.WGS,
63+
callset_path=TEST_SNV_INDEL_VCF,
64+
project_guid='R0113_test_project',
65+
project_remap_path=TEST_REMAP,
66+
project_pedigree_path=TEST_PEDIGREE_4_SUBSET,
67+
skip_validation=True,
68+
skip_check_sex_and_relatedness=True,
69+
)
70+
worker.add(write_project_family_tables_subset)
71+
worker.run()
72+
self.assertTrue(write_project_family_tables_subset.complete())
73+
hts = [
74+
hl.read_table(write_family_table_task.output().path)
75+
for write_family_table_task in write_project_family_tables_subset.dynamic_write_family_table_tasks
76+
]
77+
# Only one family table written
78+
self.assertEqual(
79+
len(hts),
80+
1,
81+
)
82+
# Project table still contains all family guids
83+
self.assertCountEqual(
84+
hl.read_table(
85+
project_table_path(
86+
ReferenceGenome.GRCh38,
87+
DatasetType.SNV_INDEL,
88+
'R0113_test_project',
89+
),
90+
).family_guids.collect(),
91+
[
92+
[
93+
'123_1',
94+
'234_1',
95+
'345_1',
96+
'456_1',
97+
'567_1',
98+
'678_1',
99+
'789_1',
100+
'890_1',
101+
'901_1',
102+
'bcd_1',
103+
'cde_1',
104+
'def_1',
105+
'efg_1',
106+
],
107+
],
108+
)
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Project_GUID Family_GUID Family_ID Individual_ID Paternal_ID Maternal_ID Sex
2+
R0114_project4 123_1 123 NA19675_1 F

0 commit comments

Comments
 (0)