Skip to content

Add an extra hl.eval when computing which families to preserve. #811

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 13 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 14 additions & 5 deletions v03_pipeline/lib/misc/family_entries.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,13 +98,22 @@ def remove_family_guids(
family_guids: hl.SetExpression,
) -> hl.Table:
# Remove families from the existing project table structure (both the entries arrays and the globals are mutated)
family_indexes_to_keep = hl.array(
hl.enumerate(ht.globals.family_guids)
.filter(lambda item: ~family_guids.contains(item[1]))
.map(lambda item: item[0]),
family_indexes_to_keep = hl.eval(
hl.array(
hl.enumerate(ht.globals.family_guids)
.filter(lambda item: ~family_guids.contains(item[1]))
.map(lambda item: item[0]),
),
)
ht = ht.annotate(
family_entries=family_indexes_to_keep.map(lambda i: ht.family_entries[i]),
# NB: this "should" work without the extra if statement (and does in the tests)
# however, experiments on dataproc showed this statement hanging with an empty
# unevaluated indexes array.
family_entries=hl.array(family_indexes_to_keep).map(
lambda i: ht.family_entries[i],
)
if len(family_indexes_to_keep) > 0
else hl.empty_array(ht.family_entries.dtype.element_type),
)
ht = ht.filter(hl.any(ht.family_entries.map(hl.is_defined)))
return ht.annotate_globals(
Expand Down
15 changes: 8 additions & 7 deletions v03_pipeline/lib/misc/lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,22 +128,23 @@ def remove_project(
existing_project_guids = hl.eval(ht.globals.project_guids)
if project_guid not in existing_project_guids:
return ht
project_indexes_to_keep = (
project_indexes_to_keep = hl.eval(
hl.enumerate(existing_project_guids)
.filter(lambda item: item[1] != project_guid)
.map(lambda item: item[0])
.map(lambda item: item[0]),
)
ht = ht.annotate(
project_stats=(
project_indexes_to_keep.map(
lambda i: ht.project_stats[i],
)
# See "remove_family_guids" func for why this was necessary
hl.array(project_indexes_to_keep).map(lambda i: ht.project_stats[i])
if len(project_indexes_to_keep) > 0
else hl.empty_array(ht.project_stats.dtype.element_type)
),
)
ht = ht.filter(hl.any(ht.project_stats.map(hl.is_defined)))
return ht.annotate_globals(
project_guids=project_indexes_to_keep.map(
lambda i: ht.project_guids[i],
project_guids=ht.project_guids.filter(
lambda p: p != project_guid,
),
project_families=hl.dict(
ht.project_families.items().filter(lambda item: item[0] != project_guid),
Expand Down
Loading