6
6
join_lookup_hts ,
7
7
remove_family_guids ,
8
8
)
9
+ from v03_pipeline .lib .misc .util import callset_project_pairs
9
10
from v03_pipeline .lib .model .constants import PROJECTS_EXCLUDED_FROM_LOOKUP
10
11
from v03_pipeline .lib .paths import lookup_table_path
11
12
from v03_pipeline .lib .tasks .base .base_update_task import BaseUpdateTask
16
17
17
18
18
19
class UpdateLookupTableTask (BaseUpdateTask ):
19
- callset_path = luigi .Parameter ()
20
- project_guid = luigi .Parameter ()
21
- project_remap_path = luigi .Parameter ()
22
- project_pedigree_path = luigi .Parameter ()
20
+ callset_paths = luigi .Parameter ()
21
+ project_guids = luigi .ListParameter ()
22
+ project_remap_paths = luigi .ListParameter ()
23
+ project_pedigree_paths = luigi .ListParameter ()
23
24
ignore_missing_samples_when_subsetting = luigi .BoolParameter (
24
25
default = False ,
25
26
parsing = luigi .BoolParameter .EXPLICIT_PARSING ,
@@ -43,27 +44,58 @@ def output(self) -> luigi.Target:
43
44
44
45
def complete (self ) -> bool :
45
46
return super ().complete () and hl .eval (
46
- hl .read_table (self .output ().path ).updates .contains (
47
- hl .Struct (
48
- callset = self .callset_path ,
49
- project_guid = self .project_guid ,
47
+ hl .bind (
48
+ lambda updates : hl .all (
49
+ [
50
+ updates .contains (
51
+ hl .Struct (
52
+ callset = callset_path ,
53
+ project_guid = project_guid ,
54
+ ),
55
+ )
56
+ for (
57
+ callset_path ,
58
+ project_guid ,
59
+ _ ,
60
+ _ ,
61
+ ) in callset_project_pairs (
62
+ self .callset_paths ,
63
+ self .project_guids ,
64
+ self .project_remap_paths ,
65
+ self .project_pedigree_paths ,
66
+ )
67
+ ],
50
68
),
69
+ hl .read_table (self .output ().path ).updates ,
51
70
),
52
71
)
53
72
54
- def requires (self ) -> luigi .Task :
55
- return WriteRemappedAndSubsettedCallsetTask (
56
- self .reference_genome ,
57
- self .dataset_type ,
58
- self .sample_type ,
59
- self .callset_path ,
60
- self .project_guid ,
61
- self .project_remap_path ,
62
- self .project_pedigree_path ,
63
- self .ignore_missing_samples_when_subsetting ,
64
- self .ignore_missing_samples_when_remapping ,
65
- self .validate ,
66
- )
73
+ def requires (self ) -> list [luigi .Task ]:
74
+ return [
75
+ WriteRemappedAndSubsettedCallsetTask (
76
+ self .reference_genome ,
77
+ self .dataset_type ,
78
+ self .sample_type ,
79
+ callset_path ,
80
+ project_guid ,
81
+ project_remap_path ,
82
+ project_pedigree_path ,
83
+ self .ignore_missing_samples_when_subsetting ,
84
+ self .ignore_missing_samples_when_remapping ,
85
+ self .validate ,
86
+ )
87
+ for (
88
+ callset_path ,
89
+ project_guid ,
90
+ project_remap_path ,
91
+ project_pedigree_path ,
92
+ ) in callset_project_pairs (
93
+ self .callset_paths ,
94
+ self .project_guids ,
95
+ self .project_remap_paths ,
96
+ self .project_pedigree_paths ,
97
+ )
98
+ ]
67
99
68
100
def initialize_table (self ) -> hl .Table :
69
101
key_type = self .dataset_type .table_key_type (self .reference_genome )
@@ -91,34 +123,44 @@ def initialize_table(self) -> hl.Table:
91
123
)
92
124
93
125
def update_table (self , ht : hl .Table ) -> hl .Table :
94
- if self .project_guid in PROJECTS_EXCLUDED_FROM_LOOKUP :
95
- return ht .annotate_globals (
96
- updates = ht .updates .add (
97
- hl .Struct (
98
- callset = self .callset_path ,
99
- project_guid = self .project_guid ,
126
+ # NB: there's a chance this many hail operations blows the DAG compute stack
127
+ # in an unfortunate way. Please keep an eye out!
128
+ for i , (callset_path , project_guid , _ , _ ) in enumerate (callset_project_pairs (
129
+ self .callset_paths ,
130
+ self .project_guids ,
131
+ self .project_remap_paths ,
132
+ self .project_pedigree_paths ,
133
+ )):
134
+ if project_guid in PROJECTS_EXCLUDED_FROM_LOOKUP :
135
+ ht = ht .annotate_globals (
136
+ updates = ht .updates .add (
137
+ hl .Struct (
138
+ callset = callset_path ,
139
+ project_guid = project_guid ,
140
+ ),
100
141
),
142
+ )
143
+ continue
144
+ callset_mt = hl .read_matrix_table (self .input ()[i ].path )
145
+ ht = remove_family_guids (
146
+ ht ,
147
+ project_guid ,
148
+ callset_mt .index_globals ().family_samples .key_set (),
149
+ )
150
+ callset_ht = compute_callset_lookup_ht (
151
+ self .dataset_type ,
152
+ callset_mt ,
153
+ project_guid ,
154
+ )
155
+ ht = join_lookup_hts (
156
+ ht ,
157
+ callset_ht ,
158
+ )
159
+ ht = ht .select_globals (
160
+ project_guids = ht .project_guids ,
161
+ project_families = ht .project_families ,
162
+ updates = ht .updates .add (
163
+ hl .Struct (callset = self .callset_path , project_guid = self .project_guid ),
101
164
),
102
165
)
103
- callset_mt = hl .read_matrix_table (self .input ().path )
104
- ht = remove_family_guids (
105
- ht ,
106
- self .project_guid ,
107
- callset_mt .index_globals ().family_samples .key_set (),
108
- )
109
- callset_ht = compute_callset_lookup_ht (
110
- self .dataset_type ,
111
- callset_mt ,
112
- self .project_guid ,
113
- )
114
- ht = join_lookup_hts (
115
- ht ,
116
- callset_ht ,
117
- )
118
- return ht .select_globals (
119
- project_guids = ht .project_guids ,
120
- project_families = ht .project_families ,
121
- updates = ht .updates .add (
122
- hl .Struct (callset = self .callset_path , project_guid = self .project_guid ),
123
- ),
124
- )
166
+ return ht
0 commit comments