File tree Expand file tree Collapse file tree 1 file changed +3
-3
lines changed
v03_pipeline/lib/reference_datasets Expand file tree Collapse file tree 1 file changed +3
-3
lines changed Original file line number Diff line number Diff line change 11
11
12
12
def remove_duplicate_scores (ht : hl .Table ):
13
13
#
14
- # SpliceAI has duplicate rows of the ilk:
14
+ # SpliceAI has many duplicate rows of the ilk:
15
15
#
16
16
# 1:861264 | ["C","A"] | NA | -1.00e+01 | NA | ["A|AL645608.1|0.00|0.00|0.00|0.00|2|27|12|1"] |
17
17
# 1:861264 | ["C","A"] | NA | -1.00e+01 | NA | ["A|SAMD11|0.02|0.01|0.00|0.00|14|38|14|38"]
@@ -22,10 +22,10 @@ def remove_duplicate_scores(ht: hl.Table):
22
22
non_duplicates_ht = ht .anti_join (duplicates_ht )
23
23
return non_duplicates_ht .union (
24
24
# Remove rows that 1) are part of duplicate variant groupings
25
- # and 2) contain dots.
25
+ # and 2) contain dots. Then, remove arbitrarily with .distinct()
26
26
duplicates_ht .filter (
27
27
~ duplicates_ht .info .SpliceAI [0 ].split (delim = '\\ |' )[1 ].contains ('.' ),
28
- ),
28
+ ). distinct () ,
29
29
)
30
30
31
31
You can’t perform that action at this time.
0 commit comments