Skip to content

Commit bec639c

Browse files
committed
default to not removing '*' for peptides #399
1 parent ac37424 commit bec639c

File tree

3 files changed

+5
-13
lines changed

3 files changed

+5
-13
lines changed

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
- `seqkit translate`:
99
- add options `-s/--out-subseqs` and `-m/--min-len` to write ORFs longer than `x` amino acids as individual records. [#389](https://github.com/shenwei356/seqkit/issues/389)
1010
- `seqkit sum`:
11-
- fix the logic: if `-g/--remove gaps` is not on for proteins, we'll forcely switch it on to remove possible '*'.
11+
- do not remove possible '*' by default. Thanks to @photocyte. [#399](https://github.com/shenwei356/seqkit/issues/399)
1212
- added a progress bar for > 1 input files.
1313
- `seqkit pair`:
1414
- remove the restriction of requiring FASTQ format, i.e., FASTA files are also supported.

doc/docs/usage.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -851,10 +851,10 @@ Flags:
851851
-a, --all show all information, including the sequences length and the number of sequences
852852
-b, --basename only output basename of files
853853
-c, --circular the file contains a single cicular genome sequence
854-
-G, --gap-letters string gap letters (default "- \t.*")
854+
-G, --gap-letters string gap letters to delete with the flag -g/--remove-gaps (default "- \t.*")
855855
-h, --help help for sum
856856
-k, --kmer-size int k-mer size for processing circular genomes (default 1000)
857-
-g, --remove-gaps remove gaps
857+
-g, --remove-gaps remove gap characters set in the option -G/gap-letters
858858
--rna2dna convert RNA to DNA
859859
-s, --single-strand only consider the positive strand of a circular genome, e.g., ssRNA virus
860860
genomes

seqkit/cmd/sum.go

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ import (
3030
"os"
3131
"path/filepath"
3232
"runtime"
33-
"strings"
3433
"sync"
3534
"time"
3635

@@ -459,13 +458,6 @@ Examples:
459458
ab = fastxReader.Alphabet()
460459
if ab == seq.Protein {
461460
seqType = "P"
462-
if !removeGaps {
463-
if !strings.Contains(gapLetters, "*") {
464-
gapLetters += "*"
465-
}
466-
removeGaps = true
467-
log.Infof(`the flag -g/--remove-gaps is switched on for removing the possible stop codon '*' character for protein sequences`)
468-
}
469461
} else if ab == seq.RNA || ab == seq.RNAredundant {
470462
seqType = "R"
471463
} else if ab == seq.DNA || ab == seq.DNAredundant {
@@ -575,8 +567,8 @@ func init() {
575567
sumCmd.Flags().BoolP("circular", "c", false, "the file contains a single cicular genome sequence")
576568
sumCmd.Flags().IntP("kmer-size", "k", 1000, "k-mer size for processing circular genomes")
577569
sumCmd.Flags().BoolP("basename", "b", false, "only output basename of files")
578-
sumCmd.Flags().BoolP("remove-gaps", "g", false, "remove gaps")
579-
sumCmd.Flags().StringP("gap-letters", "G", "- .*", "gap letters")
570+
sumCmd.Flags().BoolP("remove-gaps", "g", false, "remove gap characters set in the option -G/gap-letters")
571+
sumCmd.Flags().StringP("gap-letters", "G", "- .*", "gap letters to delete with the flag -g/--remove-gaps")
580572
sumCmd.Flags().BoolP("all", "a", false, "show all information, including the sequences length and the number of sequences")
581573
sumCmd.Flags().BoolP("rna2dna", "", false, "convert RNA to DNA")
582574
sumCmd.Flags().BoolP("single-strand", "s", false, "only consider the positive strand of a circular genome, e.g., ssRNA virus genomes")

0 commit comments

Comments
 (0)