Skip to content

Commit fb44b2b

Browse files
authored
use dataset configs in UpdateVATWithUpdatedRDC complete() take 2 (#723)
* use datasets in Globals constructors * simpler mock * cleaner set operations * start to build config in test * use dataset configs in UpdateVATWithUpdatedRDC complete() take 2 * fixes * oops * Delete v03_pipeline/lib/tasks/reference_data/write_cached_reference_dataset_query.py * Delete v03_pipeline/lib/tasks/reference_data/write_cached_reference_dataset_query_test.py * oops * fix mocks * Filter to paths
1 parent fe5e913 commit fb44b2b

File tree

84 files changed

+660
-49
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

84 files changed

+660
-49
lines changed

v03_pipeline/lib/reference_data/compare_globals.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -67,13 +67,7 @@ def from_ht(
6767
paths = dict(rdc_globals_struct.paths)
6868
versions = dict(rdc_globals_struct.versions)
6969
# enums are nested structs
70-
enums = {k: dict(v) for k, v in rdc_globals_struct.enums.items()}
71-
72-
for global_dict in [paths, versions, enums]:
73-
for dataset in list(global_dict.keys()):
74-
if dataset not in datasets:
75-
global_dict.pop(dataset)
76-
70+
enums = {k: dict(v) for k, v in rdc_globals_struct.enums.items() if k in paths}
7771
selects = {}
7872
for dataset in datasets:
7973
if dataset in ht.row:

v03_pipeline/lib/tasks/reference_data/update_variant_annotations_table_with_updated_reference_dataset.py

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -44,22 +44,25 @@ def complete(self) -> bool:
4444
)
4545
return False
4646

47-
for rdc in self.reference_dataset_collections:
48-
datasets = rdc.datasets(self.dataset_type)
49-
annotations_ht_globals = Globals.from_ht(
50-
hl.read_table(self.output().path),
51-
datasets,
52-
)
53-
rdc_ht_globals = Globals.from_ht(
54-
self.rdc_annotation_dependencies[f'{rdc.value}_ht'],
55-
datasets,
56-
)
57-
self._datasets_to_update.extend(
58-
get_datasets_to_update(
59-
annotations_ht_globals,
60-
rdc_ht_globals,
61-
),
62-
)
47+
datasets_to_check = [
48+
dataset
49+
for rdc in self.reference_dataset_collections
50+
for dataset in rdc.datasets(self.dataset_type)
51+
]
52+
annotations_ht_globals = Globals.from_ht(
53+
hl.read_table(self.output().path),
54+
datasets_to_check,
55+
)
56+
rdc_ht_globals = Globals.from_dataset_configs(
57+
self.reference_genome,
58+
datasets_to_check,
59+
)
60+
self._datasets_to_update.extend(
61+
get_datasets_to_update(
62+
annotations_ht_globals,
63+
rdc_ht_globals,
64+
),
65+
)
6366
logger.info(f'Datasets to update: {self._datasets_to_update}')
6467
return not self._datasets_to_update
6568

0 commit comments

Comments
 (0)