Skip to content

Commit ad54f0a

Browse files
committed
support duplex_only option
1 parent 47c5864 commit ad54f0a

File tree

5 files changed

+10
-3
lines changed

5 files changed

+10
-3
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,11 +192,12 @@ options:
192192
-o, --out output bam/sam file. STDOUT will be written to if it's not specified (string [=-])
193193
-r, --ref reference fasta file name (should be an uncompressed .fa/.fasta file) (string)
194194
-b, --bed bed file to specify the capturing region, none by default (string [=])
195+
-x, --duplex_only only output duplex consensus sequences, which means single stranded consensus sequences will be discarded.
195196
-u, --umi_prefix the prefix for UMI, if it has. None by default. Check the README for the defails of UMI formats. (string [=auto])
196197
-s, --supporting_reads only output consensus reads/pairs that merged by >= <supporting_reads> reads/pairs. The valud should be 1~10, and the default value is 1. (int [=1])
197198
-a, --ratio_threshold if the ratio of the major base in a cluster is less than <ratio_threshold>, it will be further compared to the reference. The valud should be 0.5~1.0, and the default value is 0.8 (double [=0.8])
198199
-c, --score_threshold if the score of the major base in a cluster is less than <score_threshold>, it will be further compared to the reference. The valud should be 1~20, and the default value is 6 (int [=6])
199-
-d, --umi_diff_threshold if two reads with identical mapping position have UMI difference <= <umi_diff_threshold>, then they will be merged to generate a consensus read. Default value is 2. (int [=2])
200+
-d, --umi_diff_threshold if two reads with identical mapping position have UMI difference <= <umi_diff_threshold>, then they will be merged to generate a consensus read. Default value is 1. (int [=1])
200201
-D, --duplex_diff_threshold if the forward consensus and reverse consensus sequences have <= <duplex_diff_threshold> mismatches, then they will be merged to generate a duplex consensus sequence, otherwise will be discarded. Default value is 2. (int [=2])
201202
--high_qual the threshold for a quality score to be considered as high quality. Default 30 means Q30. (int [=30])
202203
--moderate_qual the threshold for a quality score to be considered as moderate quality. Default 20 means Q20. (int [=20])

src/cluster.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ vector<Pair*> Cluster::clusterByUMI(int umiDiffThreshold, Stats* preStats, Stats
154154
}
155155
// no duplex found, treat it as sscs
156156
if(!foundDuplex) {
157-
if(p1->mMergeReads >= mOptions->clusterSizeReq) {
157+
if(!mOptions->duplexOnly && p1->mMergeReads >= mOptions->clusterSizeReq) {
158158
singleConsesusCount++;
159159
p1->writeSscsDcsTag();
160160
postStats->addSSCS();
@@ -168,7 +168,7 @@ vector<Pair*> Cluster::clusterByUMI(int umiDiffThreshold, Stats* preStats, Stats
168168
// no umi, no duplex
169169
for(int i=0;i<singleConsensusPairs.size(); i++) {
170170
Pair* p = singleConsensusPairs[i];
171-
if(p->mMergeReads >= mOptions->clusterSizeReq) {
171+
if(!mOptions->duplexOnly && p->mMergeReads >= mOptions->clusterSizeReq) {
172172
singleConsesusCount++;
173173
p->writeSscsDcsTag();
174174
postStats->addSSCS();

src/main.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ int main(int argc, char* argv[]){
3232
cmd.add<string>("out", 'o', "output bam/sam file. STDOUT will be written to if it's not specified", false, "-");
3333
cmd.add<string>("ref", 'r', "reference fasta file name (should be an uncompressed .fa/.fasta file)", true, "");
3434
cmd.add<string>("bed", 'b', "bed file to specify the capturing region, none by default", false, "");
35+
cmd.add("duplex_only", 'x', "only output duplex consensus sequences, which means single stranded consensus sequences will be discarded.");
3536

3637
// UMI
3738
cmd.add<string>("umi_prefix", 'u', "the prefix for UMI, if it has. None by default. Check the README for the defails of UMI formats.", false, "auto");
@@ -74,6 +75,7 @@ int main(int argc, char* argv[]){
7475
opt.properReadsUmiDiffThreshold = cmd.get<int>("umi_diff_threshold");
7576
opt.duplexMismatchThreshold = cmd.get<int>("duplex_diff_threshold");
7677
opt.debug = cmd.exist("debug");
78+
opt.duplexOnly = cmd.exist("duplex_only");
7779

7880
// reporting
7981
opt.jsonFile = cmd.get<string>("json");

src/options.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ Options::Options(){
3838

3939
bedCoverageStep = 10;
4040
coverageStep = 10000;
41+
42+
duplexOnly = false;
4143
}
4244

4345
bool Options::validate() {

src/options.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ class Options{
5959

6060
int coverageStep;
6161
int bedCoverageStep;
62+
63+
bool duplexOnly;
6264
};
6365

6466
#endif

0 commit comments

Comments
 (0)