Skip to content

Commit 680779d

Browse files
committed
Handle duplicates
1 parent ee1a157 commit 680779d

File tree

4 files changed

+10
-5
lines changed

4 files changed

+10
-5
lines changed

v03_pipeline/lib/reference_datasets/dbnsfp.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,10 @@ def get_ht(path: str, reference_genome: ReferenceGenome) -> hl.Table:
7777
**{k: predictor_parse(ht[k]) for k in PREDICTOR_FIELDS},
7878
)
7979
ht = ht.rename(rename)
80-
81-
return key_by_locus_alleles(ht, reference_genome)
80+
ht = key_by_locus_alleles(ht, reference_genome)
81+
return ht.group_by(*ht.key).aggregate(
82+
**{f: hl.agg.max(ht[f]) for f in ht.row_key},
83+
)
8284

8385

8486
def select(_: ReferenceGenome, dataset_type: DatasetType, ht: hl.Table) -> hl.Table:

v03_pipeline/lib/reference_datasets/eigen.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@
33

44
def get_ht(path: str, *_) -> hl.Table:
55
ht = hl.read_table(path)
6-
return ht.select(Eigen_phred=ht.info['Eigen-phred'])
6+
ht = ht.select(Eigen_phred=ht.info['Eigen-phred'])
7+
return ht.group_by(*ht.key).aggregate(Eigen_phred=hl.agg.max(ht.Eigen_phred))

v03_pipeline/lib/reference_datasets/hmtvar.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,5 @@ def get_ht(
2020
alleles=hl.array([ht.ref_rCRS, ht.alt]),
2121
score=ht.disease_score,
2222
)
23-
return ht.key_by('locus', 'alleles')
23+
ht = ht.key_by('locus', 'alleles')
24+
return ht.group_by(*ht.key).aggregate(score=hl.agg.max(ht.score))

v03_pipeline/lib/reference_datasets/mitimpact.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,5 @@ def get_ht(
2323
alleles=[ht.Ref, ht.Alt],
2424
score=hl.parse_float32(ht.APOGEE2_score),
2525
)
26-
return ht.key_by('locus', 'alleles')
26+
ht = ht.key_by('locus', 'alleles')
27+
return ht.group_by(*ht.key).aggregate(score=hl.agg.max(ht.score))

0 commit comments

Comments
 (0)