Skip to content

Commit 6bb8ee0

Browse files
committed
added filtering in merge and sort tools (see #81)
1 parent 16e8d3d commit 6bb8ee0

File tree

2 files changed

+26
-12
lines changed

2 files changed

+26
-12
lines changed

sambamba/merge.d

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ import core.atomic;
9292

9393
import sambamba.utils.common.progressbar;
9494
import sambamba.utils.common.overwrite;
95+
import sambamba.utils.common.filtering;
9596

9697
void printUsage() {
9798
stderr.writeln("Usage: sambamba-merge [options] <output.bam> <input1.bam> <input2.bam> [...]");
@@ -104,6 +105,8 @@ void printUsage() {
104105
stderr.writeln(" output merged header to stdout in SAM format, other options are ignored; mainly for debug purposes");
105106
stderr.writeln(" -p, --show-progress");
106107
stderr.writeln(" show progress bar in STDERR");
108+
stderr.writeln(" -F, --filter=FILTER");
109+
stderr.writeln(" keep only reads that satisfy FILTER");
107110
}
108111

109112
// these variables can be implicitly used in tasks created in writeBAM
@@ -114,6 +117,7 @@ shared(string[string][]) program_id_map;
114117
shared(string[string][]) readgroup_id_map;
115118

116119
__gshared static TaskPool task_pool;
120+
__gshared static Filter read_filter;
117121

118122
BamRead changeAlignment(Tuple!(BamRead, size_t) al_with_file_id) {
119123
auto al = al_with_file_id[0];
@@ -157,14 +161,10 @@ BamRead changeAlignment(Tuple!(BamRead, size_t) al_with_file_id) {
157161
}
158162

159163
auto modifyAlignmentRange(T)(T alignments_with_file_id) {
160-
version(serial) {
161-
return map!changeAlignment(zip(alignments_with_file_id[0],
162-
repeat(alignments_with_file_id[1])));
163-
} else {
164-
return task_pool.map!changeAlignment(zip(alignments_with_file_id[0],
165-
repeat(alignments_with_file_id[1])),
166-
8192);
167-
}
164+
return task_pool.map!changeAlignment(
165+
zip(filtered(alignments_with_file_id[0], read_filter),
166+
repeat(alignments_with_file_id[1])),
167+
8192);
168168
}
169169

170170
version(standalone) {
@@ -180,6 +180,7 @@ int merge_main(string[] args) {
180180
bool validate_headers = false;
181181
bool header_only = false;
182182
bool show_progress = false;
183+
string filter_str = null;
183184

184185
if (args.length < 4) {
185186
printUsage();
@@ -194,7 +195,10 @@ int merge_main(string[] args) {
194195
"compression-level|l", &compression_level,
195196
"validate-headers|v", &validate_headers,
196197
"header|H", &header_only,
197-
"show-progress|p", &show_progress);
198+
"show-progress|p", &show_progress,
199+
"filter|F", &filter_str);
200+
201+
read_filter = createFilterFromQuery(filter_str);
198202

199203
task_pool = new TaskPool(number_of_threads);
200204
scope(exit) task_pool.finish();

sambamba/sort.d

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ import std.c.string;
4747
import sambamba.utils.common.progressbar;
4848
import sambamba.utils.common.overwrite;
4949
import sambamba.utils.common.tmpdir;
50+
import sambamba.utils.common.filtering;
5051

5152
import thirdparty.mergesort;
5253

@@ -69,6 +70,8 @@ void printUsage() {
6970
stderr.writeln(" show progressbar in STDERR");
7071
stderr.writeln(" -t, --nthreads=NTHREADS");
7172
stderr.writeln(" use specified number of threads");
73+
stderr.writeln(" -F, --filter=FILTER");
74+
stderr.writeln(" keep only reads that satisfy FILTER");
7275
}
7376

7477
version(standalone) {
@@ -94,6 +97,7 @@ class Sorter {
9497
bool uncompressed_chunks = false;
9598
string output_filename = null;
9699
string filename = null;
100+
string filter_str = null;
97101

98102
struct UnsortedChunk {
99103
size_t max_sz;
@@ -181,20 +185,25 @@ class Sorter {
181185
}
182186

183187
void sort() {
188+
auto filter = createFilterFromQuery(filter_str);
189+
184190
createHeader();
185191

186192
bam.setBufferSize(16_000_000);
187193
bam.assumeSequentialProcessing();
194+
188195
if (show_progress) {
189196
stderr.writeln("Writing sorted chunks to temporary directory...");
190197
bar = new shared(ProgressBar)();
191198
auto reads = bam.readsWithProgress(
192199
(lazy float p){ bar.update(p); }
193200
);
194-
writeSortedChunks(reads);
201+
auto filtered_reads = filtered(reads, filter);
202+
writeSortedChunks(filtered_reads);
195203
bar.finish();
196204
} else {
197-
writeSortedChunks(bam.reads!withoutOffsets);
205+
auto filtered_reads = filtered(bam.reads!withoutOffsets(), filter);
206+
writeSortedChunks(filtered_reads);
198207
}
199208

200209
scope(success) {
@@ -427,7 +436,8 @@ int sort_main(string[] args) {
427436
"uncompressed-chunks|u", &sorter.uncompressed_chunks,
428437
"compression-level|l", &sorter.compression_level,
429438
"show-progress|p", &show_progress,
430-
"nthreads|t", &n_threads);
439+
"nthreads|t", &n_threads,
440+
"filter|F", &sorter.filter_str);
431441

432442
if (sorter.output_filename is null) {
433443
sorter.output_filename = setExtension(args[1], "sorted.bam");

0 commit comments

Comments
 (0)