Skip to content

Commit c126df8

Browse files
authored
Filter rows with no coverage (#763)
undefined
1 parent fe49297 commit c126df8

File tree

2 files changed

+30
-4
lines changed

2 files changed

+30
-4
lines changed

v03_pipeline/lib/misc/sample_ids.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,4 +81,5 @@ def subset_samples(
8181
else:
8282
raise MatrixTableSampleSetError(message, missing_samples)
8383
print(f'Subsetted to {subset_count} sample ids')
84-
return mt.semi_join_cols(sample_subset_ht)
84+
mt = mt.semi_join_cols(sample_subset_ht)
85+
return mt.filter_rows(hl.agg.any(hl.is_defined(mt.GT)))

v03_pipeline/lib/misc/sample_ids_test.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@
1212
rows={'variants': [1, 2]},
1313
cols={'s': ['HG00731', 'HG00732', 'HG00733']},
1414
entries={
15-
'HL': [
16-
[0.0, hl.missing(hl.tfloat), 0.3],
17-
[0.1, 0.2, 0.3],
15+
'GT': [
16+
[hl.Call([0, 1]), hl.missing(hl.tcall), hl.Call([0, 1])],
17+
[hl.Call([0, 1]), hl.Call([0, 1]), hl.Call([1, 1])],
1818
],
1919
},
2020
).key_cols_by('s')
@@ -165,3 +165,28 @@ def test_subset_samples_missing_samples(self):
165165
sample_subset_ht,
166166
ignore_missing_samples_when_subsetting=False,
167167
)
168+
169+
def test_subset_no_defined_gt(self):
170+
mt = hl.MatrixTable.from_parts(
171+
rows={'variants': [1, 2]},
172+
cols={'s': ['HG00731', 'HG00732']},
173+
entries={
174+
'GT': [
175+
[hl.Call([1, 1]), hl.missing(hl.tcall)],
176+
[hl.Call([1, 1]), hl.Call([1, 1])],
177+
],
178+
},
179+
).key_cols_by('s')
180+
sample_subset_ht = hl.Table.parallelize(
181+
[
182+
{'s': 'HG00732'},
183+
],
184+
hl.tstruct(s=hl.tstr),
185+
key='s',
186+
)
187+
mt = subset_samples(
188+
mt,
189+
sample_subset_ht,
190+
False,
191+
)
192+
self.assertEqual(mt.count(), (1, 1))

0 commit comments

Comments
 (0)