Skip to content

Commit b3927c4

Browse files
committed
Allow shuffle on smaller callsets
1 parent cc50910 commit b3927c4

File tree

1 file changed

+4
-1
lines changed
  • v03_pipeline/lib/misc

1 file changed

+4
-1
lines changed

v03_pipeline/lib/misc/io.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
BIALLELIC = 2
1212
B_PER_MB = 1 << 20 # 1024 * 1024
1313
MB_PER_PARTITION = 128
14+
MAX_SAMPLES_SPLIT_MULTI_SHUFFLE = 100
1415

1516
MALE = 'Male'
1617
FEMALE = 'Female'
@@ -54,7 +55,9 @@ def split_multi_hts(mt: hl.MatrixTable) -> hl.MatrixTable:
5455
bi = bi.filter_rows(~bi.alleles.contains('*'))
5556
bi = bi.annotate_rows(a_index=1, was_split=False)
5657
multi = mt.filter_rows(hl.len(mt.alleles) > BIALLELIC)
57-
split = hl.split_multi_hts(multi)
58+
split = hl.split_multi_hts(
59+
multi, permit_shuffle=mt.count()[1] < MAX_SAMPLES_SPLIT_MULTI_SHUFFLE
60+
)
5861
mt = split.union_rows(bi)
5962
return mt.distinct_by_row()
6063

0 commit comments

Comments
 (0)