1+ process seq_qc {
2+
3+ tag { sample_id }
4+
5+ executor ' local'
6+
7+ publishDir " ${ params.outdir} /${ sample_id} " , mode: ' copy' , pattern: " ${ sample_id} _seq_qc.csv"
8+
9+ input:
10+ tuple path(seq)
11+
12+ output:
13+ tuple val(sample_id), path(" ${ sample_id} _seq_qc.csv" ), emit: seq_qc_csv
14+
15+ script:
16+ sample_id = seq. getName(). split(' \\ .' )[0 ]
17+ """
18+ seq_qc.py -i ${ seq} > ${ sample_id} _seq_qc.csv
19+ """
20+ }
21+
22+ process blastn {
23+ // errorStrategy 'ignore'
24+
25+
26+ publishDir " ${ params.outdir} /${ sample_id} " , mode: ' copy' , pattern: " ${ sample_id} *"
27+
28+ input:
29+ tuple path(query), path(db_dir), val(db_name)
30+
31+ output:
32+ tuple val(sample_id), path(" ${ sample_id} _blast.csv" ), emit: blast_csv, optional:true
33+ tuple val(sample_id), path(" ${ sample_id} _seq_description" ), emit: seq_description, optional:true
34+ tuple val(sample_id), path(" ${ sample_id} _blast_species_genus_results.csv" ), emit: taxon_results, optional:true
35+ tuple val(sample_id), path(" ${ sample_id} _taxon_results.txt" ), emit: raw_taxon_results, optional:true
36+
37+ script:
38+ sample_id = query. getName(). split(' \\ .' )[0 ]
39+ """
40+ export BLASTDB="${ db_dir} "
41+
42+ echo "query_seq_id,subject_accession,subject_strand,query_length,query_start,query_end,subject_length,subject_start,subject_end,alignment_length,percent_identity,percent_coverage,num_mismatch,num_gaps,e_value,bitscore,subject_taxids,subject_names" > ${ sample_id} _blast.csv
43+
44+ blastn \
45+ -db ${ db_name} \
46+ -num_threads ${ task.cpus} \
47+ -perc_identity ${ params.minid} \
48+ -qcov_hsp_perc ${ params.mincov} \
49+ -query ${ query} \
50+ -outfmt "6 qseqid saccver sstrand qlen qstart qend slen sstart send length pident qcovhsp mismatch gaps evalue bitscore staxids sscinames" \
51+ | tr \$ "\\ t" "," >> ${ sample_id} _blast.csv
52+
53+ tail -qn+2 ${ sample_id} _blast.csv | cut -d',' -f2 | sort -u > seqids
54+ blastdbcmd -db ${ db_name} -entry_batch seqids | grep '>' > ${ sample_id} _seq_description
55+
56+
57+
58+
59+ if [ "${ db_dir} " == "2022-11-16_nt" ] || [ "${ db_dir} " == "refseq_its" ] ; then
60+ tail -qn+2 ${ sample_id} _blast.csv | cut -d',' -f17 | sort -u > taxids
61+ taxonkit lineage -r -n taxids > ${ sample_id} _taxon_results.txt
62+ bind_taxonkit.py -f ${ sample_id} _taxon_results.txt -b ${ sample_id} _blast.csv -o ${ sample_id} _blast_species_genus_results.csv
63+ fi
64+
65+
66+ """
67+ }
68+
69+
70+
71+ process filter_best_bitscore {
72+
73+ tag { sample_id }
74+
75+ executor ' local'
76+
77+ publishDir " ${ params.outdir} /${ sample_id} " , mode: ' copy' , pattern: " ${ sample_id} _blast_best_bitscore.csv"
78+
79+ input:
80+ tuple val(sample_id), path(full_blast_report)
81+
82+ output:
83+ tuple val(sample_id), path(" ${ sample_id} _blast_best_bitscore.csv" ), emit: blast_best_bitscore_csv
84+
85+ script:
86+ """
87+ filter_best_bitscore.py -i ${ full_blast_report} > ${ sample_id} _blast_best_bitscore.csv
88+ """
89+ }
0 commit comments