Skip to content

Commit c34232b

Browse files
committed
Making prep_metaquast rule more fault tolerant
1 parent 9560a93 commit c34232b

File tree

1 file changed

+31
-7
lines changed

1 file changed

+31
-7
lines changed

workflow/rules/paired-end.smk

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -724,6 +724,7 @@ rule prep_metaquast:
724724
report=join(workpath,"{name}","info","{name}.reads_kraken2_report.txt"),
725725
output:
726726
txt=join(workpath,"{name}","temp","{name}_reads_class_names.txt"),
727+
tmp=temp(join(workpath,"{name}","temp","{name}.metaquast.tmp.fa")),
727728
fa=join(workpath,"{name}","temp","{name}.metaquast.fa"),
728729
params:
729730
rname='prepmetaq',
@@ -740,14 +741,37 @@ rule prep_metaquast:
740741
| uniq \\
741742
| tr ' ' '_' \\
742743
> {output.txt}
743-
744+
745+
# Subsets NCBI Viral FASTA file
746+
# to only include viruses found
747+
# in the sample in a more fault
748+
# tolerant manner
744749
paste - - < {params.ncbi_viral} \\
745-
| grep -f {output.txt} \\
746-
| tr '\\t' '\\n' \\
747-
| awk -F ',' '{{print $1}}' \\
748-
| tr '/' '_' \\
749-
| cut -d '_' -f1-5 \\
750-
> {output.fa}
750+
> {output.tmp}
751+
while read default_pattern; do
752+
# Back pattern to search for
753+
# if default pattern does not
754+
# exist in the NCBI viral FASTA
755+
backup_pattern=$(
756+
echo "$default_pattern" \\
757+
| awk -F '_' '{{print $1}}'
758+
)
759+
match=$(
760+
grep "$default_pattern" {output.tmp} \\
761+
|| grep "$backup_pattern" {output.tmp} \\
762+
|| true;
763+
)
764+
# Check if there was a match
765+
# prior to adding to FASTA,
766+
# avoids adding empty str
767+
if [ "$match" != "" ]; then
768+
echo "$match" \\
769+
| tr '\\t' '\\n' \\
770+
| awk -F ',' '{{print $1}}' \\
771+
| tr '/' '_' \\
772+
| cut -d '_' -f1-5
773+
fi
774+
done < {output.txt} > {output.fa}
751775
752776
mkdir -p {params.outdir}
753777
faSplit byname {output.fa} {params.outdir}/

0 commit comments

Comments
 (0)