|
1 | 1 | import hail as hl
|
2 | 2 | import luigi
|
3 | 3 |
|
4 |
| -from v03_pipeline.lib.annotations.fields import get_fields |
5 |
| -from v03_pipeline.lib.misc.family_entries import compute_callset_family_entries_ht |
6 |
| -from v03_pipeline.lib.misc.io import import_pedigree |
7 |
| -from v03_pipeline.lib.misc.pedigree import parse_pedigree_ht_to_families |
8 |
| -from v03_pipeline.lib.misc.sample_ids import subset_samples |
9 | 4 | from v03_pipeline.lib.paths import family_table_path
|
10 | 5 | from v03_pipeline.lib.tasks.base.base_write import BaseWriteTask
|
11 | 6 | from v03_pipeline.lib.tasks.files import GCSorLocalTarget
|
12 |
| -from v03_pipeline.lib.tasks.write_remapped_and_subsetted_callset import ( |
13 |
| - WriteRemappedAndSubsettedCallsetTask, |
| 7 | +from v03_pipeline.lib.tasks.update_project_table import ( |
| 8 | + UpdateProjectTableTask, |
14 | 9 | )
|
15 | 10 |
|
16 | 11 |
|
@@ -56,50 +51,28 @@ def complete(self) -> bool:
|
56 | 51 | )
|
57 | 52 |
|
58 | 53 | def requires(self) -> luigi.Task:
|
59 |
| - return WriteRemappedAndSubsettedCallsetTask( |
| 54 | + return UpdateProjectTableTask( |
60 | 55 | self.reference_genome,
|
61 | 56 | self.dataset_type,
|
62 | 57 | self.sample_type,
|
63 |
| - self.callset_path, |
64 | 58 | self.project_guid,
|
| 59 | + self.callset_path, |
65 | 60 | self.project_remap_path,
|
66 | 61 | self.project_pedigree_path,
|
67 | 62 | self.ignore_missing_samples_when_subsetting,
|
68 | 63 | self.ignore_missing_samples_when_remapping,
|
69 | 64 | self.validate,
|
70 | 65 | False,
|
| 66 | + self.is_new_gcnv_joint_call, |
71 | 67 | )
|
72 | 68 |
|
73 | 69 | def create_table(self) -> hl.Table:
|
74 |
| - callset_mt = hl.read_matrix_table(self.input().path) |
75 |
| - pedigree_ht = import_pedigree(self.project_pedigree_path) |
76 |
| - families = parse_pedigree_ht_to_families(pedigree_ht) |
77 |
| - family = next( |
78 |
| - iter( |
79 |
| - family for family in families if family.family_guid == self.family_guid |
80 |
| - ), |
81 |
| - ) |
82 |
| - callset_mt = subset_samples( |
83 |
| - callset_mt, |
84 |
| - hl.Table.parallelize( |
85 |
| - [{'s': sample_id} for sample_id in family.samples], |
86 |
| - hl.tstruct(s=hl.dtype('str')), |
87 |
| - key='s', |
88 |
| - ), |
89 |
| - False, |
90 |
| - ) |
91 |
| - ht = compute_callset_family_entries_ht( |
92 |
| - self.dataset_type, |
93 |
| - callset_mt, |
94 |
| - get_fields( |
95 |
| - callset_mt, |
96 |
| - self.dataset_type.genotype_entry_annotation_fns, |
97 |
| - **self.param_kwargs, |
98 |
| - ), |
99 |
| - ) |
100 |
| - ht = ht.transmute( |
101 |
| - entries=hl.flatten(ht.family_entries), |
| 70 | + project_ht = hl.read_table(self.input().path) |
| 71 | + family_i = project_ht.globals.family_guids.index(self.family_guid) |
| 72 | + ht = project_ht.transmute( |
| 73 | + entries=project_ht.family_entries[family_i], |
102 | 74 | )
|
| 75 | + ht = ht.filter(hl.is_defined(ht.entries)) |
103 | 76 | return ht.select_globals(
|
104 | 77 | sample_ids=ht.family_samples[self.family_guid],
|
105 | 78 | sample_type=self.sample_type.value,
|
|
0 commit comments