Skip to content

Commit 6f947d7

Browse files
authored
Revert "delete old crdq code" (#743)
1 parent 256c124 commit 6f947d7

File tree

1 file changed

+89
-0
lines changed

1 file changed

+89
-0
lines changed
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
#!/usr/bin/env python3
2+
import argparse
3+
4+
import hail as hl
5+
6+
from v03_pipeline.lib.misc.io import write
7+
from v03_pipeline.lib.model import (
8+
CachedReferenceDatasetQuery,
9+
DatasetType,
10+
ReferenceDatasetCollection,
11+
ReferenceGenome,
12+
)
13+
from v03_pipeline.lib.paths import (
14+
valid_cached_reference_dataset_query_path,
15+
valid_reference_dataset_collection_path,
16+
)
17+
from v03_pipeline.lib.reference_data.config import CONFIG
18+
from v03_pipeline.lib.reference_data.dataset_table_operations import (
19+
import_ht_from_config_path,
20+
)
21+
22+
23+
def get_ht(
24+
dataset_type: DatasetType,
25+
reference_genome: ReferenceGenome,
26+
query: CachedReferenceDatasetQuery,
27+
) -> hl.Table:
28+
# If the query is defined over an uncombined reference dataset, use the combiner config.
29+
if query.reference_dataset:
30+
config = CONFIG[query.reference_dataset][reference_genome.v02_value]
31+
return import_ht_from_config_path(config, reference_genome)
32+
return hl.read_table(
33+
valid_reference_dataset_collection_path(
34+
reference_genome,
35+
dataset_type,
36+
ReferenceDatasetCollection.COMBINED,
37+
),
38+
)
39+
40+
41+
def run(
42+
dataset_type: DatasetType,
43+
reference_genome: ReferenceGenome,
44+
query: CachedReferenceDatasetQuery,
45+
):
46+
ht = get_ht(dataset_type, reference_genome, query)
47+
ht = query.query(ht, dataset_type=dataset_type, reference_genome=reference_genome)
48+
destination_path = valid_cached_reference_dataset_query_path(
49+
reference_genome,
50+
dataset_type,
51+
query,
52+
)
53+
print(f'Uploading ht to {destination_path}')
54+
write(ht, destination_path)
55+
56+
57+
if __name__ == '__main__':
58+
parser = argparse.ArgumentParser()
59+
parser.add_argument(
60+
'--reference-genome',
61+
type=ReferenceGenome,
62+
choices=list(ReferenceGenome),
63+
default=ReferenceGenome.GRCh38,
64+
)
65+
parser.add_argument(
66+
'--dataset-type',
67+
type=DatasetType,
68+
choices=list(DatasetType),
69+
default=None,
70+
help='When used, update the passed dataset, otherwise run all datasets.',
71+
)
72+
parser.add_argument(
73+
'--query',
74+
type=CachedReferenceDatasetQuery,
75+
choices=list(CachedReferenceDatasetQuery),
76+
required=True,
77+
)
78+
args, _ = parser.parse_known_args()
79+
if (
80+
args.query
81+
and args.query
82+
not in CachedReferenceDatasetQuery.for_reference_genome_dataset_type(
83+
args.reference_genome,
84+
args.dataset_type,
85+
)
86+
):
87+
msg = f'{args.query} is not a valid query for {DatasetType}'
88+
raise ValueError(msg)
89+
run(args.dataset_type, args.reference_genome, args.query)

0 commit comments

Comments
 (0)