@@ -724,6 +724,7 @@ rule prep_metaquast:
724
724
report = join (workpath ,"{name}" ,"info" ,"{name}.reads_kraken2_report.txt" ),
725
725
output :
726
726
txt = join (workpath ,"{name}" ,"temp" ,"{name}_reads_class_names.txt" ),
727
+ tmp = temp (join (workpath ,"{name}" ,"temp" ,"{name}.metaquast.tmp.fa" )),
727
728
fa = join (workpath ,"{name}" ,"temp" ,"{name}.metaquast.fa" ),
728
729
params :
729
730
rname = 'prepmetaq' ,
@@ -740,14 +741,37 @@ rule prep_metaquast:
740
741
| uniq \\
741
742
| tr ' ' '_' \\
742
743
> {output.txt}
743
-
744
+
745
+ # Subsets NCBI Viral FASTA file
746
+ # to only include viruses found
747
+ # in the sample in a more fault
748
+ # tolerant manner
744
749
paste - - < {params.ncbi_viral} \\
745
- | grep -f {output.txt} \\
746
- | tr '\\ t' '\\ n' \\
747
- | awk -F ',' '{{print $1}}' \\
748
- | tr '/' '_' \\
749
- | cut -d '_' -f1-5 \\
750
- > {output.fa}
750
+ > {output.tmp}
751
+ while read default_pattern; do
752
+ # Back pattern to search for
753
+ # if default pattern does not
754
+ # exist in the NCBI viral FASTA
755
+ backup_pattern=$(
756
+ echo "$default_pattern" \\
757
+ | awk -F '_' '{{print $1}}'
758
+ )
759
+ match=$(
760
+ grep "$default_pattern" {output.tmp} \\
761
+ || grep "$backup_pattern" {output.tmp} \\
762
+ || true;
763
+ )
764
+ # Check if there was a match
765
+ # prior to adding to FASTA,
766
+ # avoids adding empty str
767
+ if [ "$match" != "" ]; then
768
+ echo "$match" \\
769
+ | tr '\\ t' '\\ n' \\
770
+ | awk -F ',' '{{print $1}}' \\
771
+ | tr '/' '_' \\
772
+ | cut -d '_' -f1-5
773
+ fi
774
+ done < {output.txt} > {output.fa}
751
775
752
776
mkdir -p {params.outdir}
753
777
faSplit byname {output.fa} {params.outdir}/
0 commit comments