Skip to content

Commit 799ff8f

Browse files
authored
add task to write relatedness check to tsv (#930)
* add task to write relatedness check to tsv * fix requirements * relatedness_check_table_path
1 parent e4682a9 commit 799ff8f

File tree

4 files changed

+112
-5
lines changed

4 files changed

+112
-5
lines changed

v03_pipeline/lib/paths.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,22 @@ def relatedness_check_table_path(
187187
)
188188

189189

190+
def relatedness_check_tsv_path(
191+
reference_genome: ReferenceGenome,
192+
dataset_type: DatasetType,
193+
callset_path: str,
194+
) -> str:
195+
return os.path.join(
196+
_pipeline_prefix(
197+
Env.LOADING_DATASETS_DIR,
198+
reference_genome,
199+
dataset_type,
200+
),
201+
'relatedness_check',
202+
f'{hashlib.sha256(callset_path.encode("utf8")).hexdigest()}.tsv',
203+
)
204+
205+
190206
def remapped_and_subsetted_callset_path(
191207
reference_genome: ReferenceGenome,
192208
dataset_type: DatasetType,
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import hail as hl
2+
import luigi
3+
import luigi.util
4+
5+
from v03_pipeline.lib.paths import relatedness_check_tsv_path
6+
from v03_pipeline.lib.tasks.base.base_loading_run_params import BaseLoadingRunParams
7+
from v03_pipeline.lib.tasks.files import GCSorLocalTarget
8+
from v03_pipeline.lib.tasks.write_relatedness_check_table import (
9+
WriteRelatednessCheckTableTask,
10+
)
11+
12+
13+
@luigi.util.inherits(BaseLoadingRunParams)
14+
class WriteRelatednessCheckTsvTask(luigi.Task):
15+
def output(self) -> luigi.Target:
16+
return GCSorLocalTarget(
17+
relatedness_check_tsv_path(
18+
self.reference_genome,
19+
self.dataset_type,
20+
self.callset_path,
21+
),
22+
)
23+
24+
def requires(self):
25+
return [self.clone(WriteRelatednessCheckTableTask)]
26+
27+
def run(self):
28+
ht = hl.read_table(self.input()[0].path)
29+
ht.export(self.output().path)
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import shutil
2+
3+
import luigi.worker
4+
5+
from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType
6+
from v03_pipeline.lib.paths import relatedness_check_table_path
7+
from v03_pipeline.lib.tasks.write_relatedness_check_tsv import (
8+
WriteRelatednessCheckTsvTask,
9+
)
10+
from v03_pipeline.lib.test.mocked_dataroot_testcase import MockedDatarootTestCase
11+
12+
TEST_RELATEDNESS_CHECK_1 = (
13+
'v03_pipeline/var/test/relatedness_check/test_relatedness_check_1.ht'
14+
)
15+
TEST_VCF = 'v03_pipeline/var/test/callsets/1kg_30variants.vcf'
16+
TEST_RUN_ID = 'manual__2024-04-03'
17+
18+
19+
class WriteRelatednessCheckTsvTaskTest(MockedDatarootTestCase):
20+
def setUp(self) -> None:
21+
super().setUp()
22+
shutil.copytree(
23+
TEST_RELATEDNESS_CHECK_1,
24+
relatedness_check_table_path(
25+
ReferenceGenome.GRCh38,
26+
DatasetType.SNV_INDEL,
27+
TEST_VCF,
28+
),
29+
)
30+
31+
def test_write_relatedness_check_tsv_task(
32+
self,
33+
) -> None:
34+
worker = luigi.worker.Worker()
35+
task = WriteRelatednessCheckTsvTask(
36+
reference_genome=ReferenceGenome.GRCh38,
37+
dataset_type=DatasetType.SNV_INDEL,
38+
callset_path=TEST_VCF,
39+
run_id=TEST_RUN_ID,
40+
sample_type=SampleType.WES,
41+
)
42+
worker.add(task)
43+
worker.run()
44+
self.assertTrue(task.complete())
45+
with task.output().open('r') as f:
46+
lines = f.readlines()
47+
expected_lines = [
48+
'i\tj\tibd0\tibd1\tibd2\tpi_hat\n',
49+
'HG00731_1\tHG00733_1\t0\t1\t0\t5.0000e-01\n',
50+
'HG00732_1\tHG00733_1\t0\t1\t0\t5.0000e-01\n',
51+
]
52+
for expected_line, actual_line in zip(expected_lines, lines, strict=False):
53+
self.assertEqual(expected_line, actual_line)

v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,16 @@
1717
from v03_pipeline.lib.misc.pedigree import parse_pedigree_ht_to_families
1818
from v03_pipeline.lib.misc.sample_ids import remap_sample_ids, subset_samples
1919
from v03_pipeline.lib.model.environment import Env
20-
from v03_pipeline.lib.paths import remapped_and_subsetted_callset_path
20+
from v03_pipeline.lib.paths import (
21+
relatedness_check_table_path,
22+
remapped_and_subsetted_callset_path,
23+
)
2124
from v03_pipeline.lib.tasks.base.base_loading_run_params import BaseLoadingRunParams
2225
from v03_pipeline.lib.tasks.base.base_write import BaseWriteTask
2326
from v03_pipeline.lib.tasks.files import GCSorLocalTarget, RawFileTask
2427
from v03_pipeline.lib.tasks.validate_callset import ValidateCallsetTask
25-
from v03_pipeline.lib.tasks.write_relatedness_check_table import (
26-
WriteRelatednessCheckTableTask,
28+
from v03_pipeline.lib.tasks.write_relatedness_check_tsv import (
29+
WriteRelatednessCheckTsvTask,
2730
)
2831
from v03_pipeline.lib.tasks.write_sex_check_table import WriteSexCheckTableTask
2932

@@ -67,7 +70,7 @@ def requires(self) -> list[luigi.Task]:
6770
):
6871
requirements = [
6972
*requirements,
70-
self.clone(WriteRelatednessCheckTableTask),
73+
self.clone(WriteRelatednessCheckTsvTask),
7174
self.clone(WriteSexCheckTableTask),
7275
]
7376
return requirements
@@ -101,7 +104,13 @@ def create_table(self) -> hl.MatrixTable:
101104
and self.dataset_type.check_sex_and_relatedness
102105
and not self.skip_check_sex_and_relatedness
103106
):
104-
relatedness_check_ht = hl.read_table(self.input()[2].path)
107+
relatedness_check_ht = hl.read_table(
108+
relatedness_check_table_path(
109+
self.reference_genome,
110+
self.dataset_type,
111+
self.callset_path,
112+
),
113+
)
105114
sex_check_ht = hl.read_table(self.input()[3].path)
106115
families_failed_relatedness_check = get_families_failed_relatedness_check(
107116
families - families_failed_missing_samples.keys(),

0 commit comments

Comments
 (0)