Skip to content

Commit bb0c43e

Browse files
committed
add distinct
1 parent 03c47d1 commit bb0c43e

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

v03_pipeline/lib/reference_datasets/splice_ai.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
def remove_duplicate_scores(ht: hl.Table):
1313
#
14-
# SpliceAI has duplicate rows of the ilk:
14+
# SpliceAI has many duplicate rows of the ilk:
1515
#
1616
# 1:861264 | ["C","A"] | NA | -1.00e+01 | NA | ["A|AL645608.1|0.00|0.00|0.00|0.00|2|27|12|1"] |
1717
# 1:861264 | ["C","A"] | NA | -1.00e+01 | NA | ["A|SAMD11|0.02|0.01|0.00|0.00|14|38|14|38"]
@@ -22,10 +22,10 @@ def remove_duplicate_scores(ht: hl.Table):
2222
non_duplicates_ht = ht.anti_join(duplicates_ht)
2323
return non_duplicates_ht.union(
2424
# Remove rows that 1) are part of duplicate variant groupings
25-
# and 2) contain dots.
25+
# and 2) contain dots. Then, remove arbitrarily with .distinct()
2626
duplicates_ht.filter(
2727
~duplicates_ht.info.SpliceAI[0].split(delim='\\|')[1].contains('.'),
28-
),
28+
).distinct(),
2929
)
3030

3131

0 commit comments

Comments
 (0)