Skip to content

Commit eeb7724

Browse files
committed
in satc_dump there were an issue if filtering was on and there were more bins and they were specified in incorrect orderd
1 parent 8c0a23b commit eeb7724

File tree

3 files changed

+13
-32
lines changed

3 files changed

+13
-32
lines changed

src/common/version.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#pragma once
22

3-
#define SPLASH_VER "2.11.5"
3+
#define SPLASH_VER "2.11.6"
44

55
inline void SPLASH_VER_PRINT(std::ostream& oss) {
66
oss << "splash version: " << SPLASH_VER << "\n";

src/satc_dump/satc_dump.cpp

Lines changed: 11 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -245,28 +245,6 @@ std::vector<std::string> read_bins_paths(const std::string& path, uint32_t n_bin
245245
return bin_paths;
246246
}
247247

248-
std::vector<std::vector<uint64_t>> split_anchors(const std::string& path, uint32_t n_bins) {
249-
std::vector<std::vector<uint64_t>> res;
250-
if (path == "") {
251-
return res;
252-
}
253-
res.resize(n_bins);
254-
std::ifstream in(path);
255-
if (!in) {
256-
std::cerr << "Error: cannot open file " << path << "\n";
257-
exit(1);
258-
}
259-
std::string str_anchor;
260-
ReadAnchorsFromPlainOrDSV(in, path,
261-
[&res, &n_bins](uint64_t anchor)
262-
{
263-
uint64_t bin_id = refresh::MurMur64Hash{}(anchor) % n_bins;
264-
res[bin_id].push_back(anchor);
265-
});
266-
return res;
267-
}
268-
269-
270248
template<typename writer_t>
271249
class SeparatelyOrNot {
272250
static_assert(std::is_same_v<writer_t, writer_binary> ||
@@ -323,21 +301,24 @@ void process_multibin_mode_impl(const Params& params) {
323301
std::is_same_v<separately_or_not_t, SeparatelyOrNot<writer_binary>>);
324302

325303
auto bin_paths = read_bins_paths(params.input, params.n_bins);
326-
auto bins_anchors = split_anchors(params.anchor_list_path, params.n_bins);
327-
bool accept_all = bins_anchors.empty();
328304

329305
separately_or_not_t separately_or_not(params.output, params.separately);
330306

307+
308+
//previously I have filtering anchors (params.anchor_list_path) splitted into bins to lower the size of the datastructure in AcceptedAnchors
309+
//I have never test how it influences the performance
310+
//the problem with this is that if bins in the input file are not in order bin_id_0, bin_id_1, etc. there will be inconsistency
311+
//and wrong filtering set is used
312+
//this could be solved by checking the bin id based on first anchor in given bin and selecting appropriate filtering anchor set
313+
//but for simiplicity I will just create a single AcceptedAnchors instance for all bins
314+
//I leave this comment for future reference, if there will be performance issue
315+
//Last commit having this with splitting filtering anchors and possible wrong results: 23b32b5d85049d3f808f4e444910832d2d03d2f1
316+
317+
AcceptedAnchors accepted_anchors(params.anchor_list_path);
331318
SampleNameDecoder sample_name_decoder(params.sample_names);
332319
for (uint32_t bin_id = 0; bin_id < params.n_bins; ++bin_id) {
333320
separately_or_not.StartBin(bin_id);
334321

335-
if (!accept_all && bins_anchors[bin_id].empty()) {
336-
std::cerr << "INFO: none of specified anchors occurs in bin " << bin_id << " (" << bin_paths[bin_id]<< "). Skip reading bin content.\n";
337-
continue;
338-
}
339-
AcceptedAnchors accepted_anchors(accept_all ? std::vector<uint64_t>{} : bins_anchors[bin_id]);
340-
341322
buffered_binary_reader in(bin_paths[bin_id]);
342323
if (!in) {
343324
std::cerr << "Error: cannot open file " << bin_paths[bin_id] << "\n";

src/splash.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def _split_lines(self, text, width):
2323
# this is the RawTextHelpFormatter._split_lines
2424
return argparse.HelpFormatter._split_lines(self, text, width)
2525

26-
SPLASH_VERSION="2.11.5"
26+
SPLASH_VERSION="2.11.6"
2727

2828
parser = argparse.ArgumentParser(
2929
prog = "splash",

0 commit comments

Comments
 (0)