Skip to content

Commit dcc8c6d

Browse files
committed
Merge branch 'benb/i_guess_this_is_sufficient' of github.com:broadinstitute/seqr-loading-pipelines into dev
2 parents 7a9094c + 33fd1d9 commit dcc8c6d

File tree

2 files changed

+22
-12
lines changed

2 files changed

+22
-12
lines changed

v03_pipeline/lib/misc/family_entries.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -98,13 +98,22 @@ def remove_family_guids(
9898
family_guids: hl.SetExpression,
9999
) -> hl.Table:
100100
# Remove families from the existing project table structure (both the entries arrays and the globals are mutated)
101-
family_indexes_to_keep = hl.array(
102-
hl.enumerate(ht.globals.family_guids)
103-
.filter(lambda item: ~family_guids.contains(item[1]))
104-
.map(lambda item: item[0]),
101+
family_indexes_to_keep = hl.eval(
102+
hl.array(
103+
hl.enumerate(ht.globals.family_guids)
104+
.filter(lambda item: ~family_guids.contains(item[1]))
105+
.map(lambda item: item[0]),
106+
),
105107
)
106108
ht = ht.annotate(
107-
family_entries=family_indexes_to_keep.map(lambda i: ht.family_entries[i]),
109+
# NB: this "should" work without the extra if statement (and does in the tests)
110+
# however, experiments on dataproc showed this statement hanging with an empty
111+
# unevaluated indexes array.
112+
family_entries=hl.array(family_indexes_to_keep).map(
113+
lambda i: ht.family_entries[i],
114+
)
115+
if len(family_indexes_to_keep) > 0
116+
else hl.empty_array(ht.family_entries.dtype.element_type),
108117
)
109118
ht = ht.filter(hl.any(ht.family_entries.map(hl.is_defined)))
110119
return ht.annotate_globals(

v03_pipeline/lib/misc/lookup.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -128,22 +128,23 @@ def remove_project(
128128
existing_project_guids = hl.eval(ht.globals.project_guids)
129129
if project_guid not in existing_project_guids:
130130
return ht
131-
project_indexes_to_keep = (
131+
project_indexes_to_keep = hl.eval(
132132
hl.enumerate(existing_project_guids)
133133
.filter(lambda item: item[1] != project_guid)
134-
.map(lambda item: item[0])
134+
.map(lambda item: item[0]),
135135
)
136136
ht = ht.annotate(
137137
project_stats=(
138-
project_indexes_to_keep.map(
139-
lambda i: ht.project_stats[i],
140-
)
138+
# See "remove_family_guids" func for why this was necessary
139+
hl.array(project_indexes_to_keep).map(lambda i: ht.project_stats[i])
140+
if len(project_indexes_to_keep) > 0
141+
else hl.empty_array(ht.project_stats.dtype.element_type)
141142
),
142143
)
143144
ht = ht.filter(hl.any(ht.project_stats.map(hl.is_defined)))
144145
return ht.annotate_globals(
145-
project_guids=project_indexes_to_keep.map(
146-
lambda i: ht.project_guids[i],
146+
project_guids=ht.project_guids.filter(
147+
lambda p: p != project_guid,
147148
),
148149
project_families=hl.dict(
149150
ht.project_families.items().filter(lambda item: item[0] != project_guid),

0 commit comments

Comments
 (0)