Skip to content

Commit 4540927

Browse files
committed
add up to bowtie
1 parent 8ff9b12 commit 4540927

File tree

9 files changed

+74
-16
lines changed

9 files changed

+74
-16
lines changed
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#!/usr/bin/env python3
2+
3+
import sys
4+
import re
5+
6+
def gtf_to_bed(input_file, output_file):
7+
"""
8+
Convert GTF file to BED format
9+
Parameters:
10+
input_file (str): Path to input GTF file
11+
output_file (str): Path to output BED file
12+
"""
13+
with open(input_file, 'r') as f_in, open(output_file, 'w') as f_out:
14+
for line in f_in:
15+
# Skip comment lines
16+
if line.startswith('#'):
17+
continue
18+
19+
try:
20+
# Parse GTF line
21+
fields = line.strip().split('\t')
22+
if len(fields) < 9: # GTF must have 9 fields
23+
continue
24+
25+
chrom = fields[0]
26+
start = int(fields[3]) - 1 # Convert to 0-based
27+
end = int(fields[4])
28+
strand = fields[6]
29+
30+
# Extract gene_id from attributes
31+
attributes = fields[8]
32+
gene_id_match = re.search(r'gene_id "([^"]+)"', attributes)
33+
gene_id = gene_id_match.group(1) if gene_id_match else "unknown"
34+
35+
# Write BED line
36+
bed_line = f"{chrom}\t{start}\t{end}\t{gene_id}\t0\t{strand}\n"
37+
f_out.write(bed_line)
38+
39+
except (IndexError, ValueError) as e:
40+
# Skip malformed lines
41+
continue
42+
43+
if __name__ == "__main__":
44+
if len(sys.argv) != 3:
45+
print("Usage: python gtf_to_bed.py input.gtf output.bed")
46+
sys.exit(1)
47+
48+
input_file = sys.argv[1]
49+
output_file = sys.argv[2]
50+
gtf_to_bed(input_file, output_file)

RNAseq/Workflow_Documentation/NF_RCP/workflow_code/conf/by_docker_image.config

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ process {
1919
container = "quay.io/biocontainers/samtools:1.21--h50ea8bc_0"
2020
}
2121

22-
withName: 'QUANTIFY_STAR_GENES|QUANTIFY_RSEM_GENES|DESEQ2_DGE|ADD_GENE_ANNOTATIONS|EXTEND_DGE_TABLE' {
22+
withName: 'QUANTIFY_STAR_GENES|QUANTIFY_RSEM_GENES|DGE_DESEQ2|ADD_GENE_ANNOTATIONS|EXTEND_DGE_TABLE' {
2323
// This image includes a collection of R libraries that support all R scripts in the workflow
2424
container = "quay.io/torres-alexis/gl_images:DESeq2_1.0.9"
2525
}
@@ -31,7 +31,7 @@ process {
3131

3232
withName: 'MULTIQC' {
3333
// MultiQC 1.26 (12/21/2024)
34-
container = "quay.io/biocontainers/multiqc:1.26--pyhdfd78af_0"
34+
container = "quay.io/nasa_genelab/dp_tools:1.3.5"
3535
}
3636

3737
withName: 'TRIMGALORE' {
@@ -60,12 +60,12 @@ process {
6060
container = "quay.io/biocontainers/rsem:1.3.3--pl526ha52163a_0"
6161
}
6262

63-
withName: 'GET_ACCESSIONS|FETCH_ISA|ISA_TO_RUNSHEET|RUNSHEET_FROM_ISA|GENERATE_MD5SUMS|SOFTWARE_VERSIONS|UPDATE_ISA_TABLES|PARSE_QC_METRICS' {
63+
withName: 'GET_ACCESSIONS|FETCH_ISA|ISA_TO_RUNSHEET|RUNSHEET_FROM_ISA|GENERATE_MD5SUMS|SOFTWARE_VERSIONS|UPDATE_ISA_TABLES|PARSE_QC_METRICS|GTF_TO_BED' {
6464
container = "quay.io/nasa_genelab/dp_tools:1.3.5"
6565
}
6666

6767
withLabel: 'VV' {
68-
// container = "quay.io/nasa_genelab/dp_tools:1.3.5"
68+
container = "quay.io/nasa_genelab/dp_tools:1.3.5"
6969
}
7070
// withName: 'QUALIMAP_BAM_QC|QUALIMAP_RNASEQ_QC' {
7171
container = "quay.io/biocontainers/qualimap:2.3--hdfd78af_0"

RNAseq/Workflow_Documentation/NF_RCP/workflow_code/conf/slurm.config

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ process {
1717
maxErrors = '-1'
1818

1919
// See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
20-
withName:'GET_ACCESSIONS|FETCH_ISA|ISA_TO_RUNSHEET|PARSE_ANNOTATIONS_TABLE|COPY_READS|GET_MAX_READ_LENGTH|ADD_GENE_ANNOTATIONS|EXTEND_DGE_TABLE|VV_RAW_READS' {
20+
withName:'GET_ACCESSIONS|FETCH_ISA|ISA_TO_RUNSHEET|PARSE_ANNOTATIONS_TABLE|COPY_READS|GET_MAX_READ_LENGTH|ADD_GENE_ANNOTATIONS|EXTEND_DGE_TABLE|VV_RAW_READS|GTF_TO_BED' {
2121
cpus = { 1 }
2222
memory = { 2.GB }
2323
}
@@ -46,6 +46,14 @@ process {
4646
cpus = { 8 * task.attempt }
4747
memory = { 32.GB * task.attempt }
4848
}
49+
withName:BUILD_BOWTIE2_INDEX {
50+
cpus = { 8 * task.attempt }
51+
memory = { 32.GB * task.attempt }
52+
}
53+
withName:ALIGN_BOWTIE2 {
54+
cpus = { 4 * task.attempt }
55+
memory = { 16.GB * task.attempt }
56+
}
4957
withName:INFER_EXPERIMENT {
5058
// To do: test 2, 1-2gb, consider qualimap alternatives for rseqc processes
5159
cpus = { 2 * task.attempt }

RNAseq/Workflow_Documentation/NF_RCP/workflow_code/modules/align_bowtie2.nf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ process ALIGN_BOWTIE2 {
2121
2222
2323
mkdir -p ${ meta.id }
24-
bowtie2 -x ${ BOWTIE2_INDEX_DIR } \
24+
bowtie2 -x ${ BOWTIE2_INDEXES } \
2525
${readArgs} \
2626
--threads ${ task.cpus } \
2727
--minins 0 \

RNAseq/Workflow_Documentation/NF_RCP/workflow_code/modules/build_bowtie2_index.nf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
process BUILD_BOWTIE2_INDEX {
22
// Builds Bowtie 2 index, this is ercc-spike-in and organism specific
3-
tag "Refs: ${ genome_fasta }, ${ genome_gtf }, Source: ${reference_source}${reference_source.toLowerCase().contains('ensembl') ? ', Version: ' + reference_version : ''}, GenomeSubsample: ' + params.genome_subsample : ''}"
3+
tag "Refs: ${ genome_fasta.baseName }, ${ genome_gtf.baseName }, Source: ${reference_source}${reference_source.toLowerCase().contains('ensembl') ? ', Version: ' + reference_version : ''}${params.genome_subsample ? ', GenomeSubsample: ' + params.genome_subsample : ''}"
44
storeDir "${ derived_store_path }/Bowtie2_Indices/${ reference_source }/${reference_source.toLowerCase().contains('ensembl') ? reference_version + '/' : ''}${ meta.organism_sci }"
55

66
input:

RNAseq/Workflow_Documentation/NF_RCP/workflow_code/modules/gtf_to_bed.nf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,6 @@ process GTF_TO_BED {
1414

1515
script:
1616
"""
17-
gtf_to_bed.py ${ genome_gtf } ${ genome_gtf.baseName }.bed
17+
python gtf_to_bed.py ${ genome_gtf } ${ genome_gtf.baseName }.bed
1818
"""
1919
}

RNAseq/Workflow_Documentation/NF_RCP/workflow_code/nextflow.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,6 @@ manifest {
141141
homePage = 'https://github.com/nasa/GeneLab_Data_Processing/tree/master/RNAseq'
142142
description = 'RNA-Seq Pipeline for Document GL-DPPD-7101-G.'
143143
mainScript = 'main.nf'
144-
nextflowVersion = '!>=24.10.3'
144+
nextflowVersion = '!>=24.04.4'
145145
version = '2.0.0'
146146
}

RNAseq/Workflow_Documentation/NF_RCP/workflow_code/workflows/rnaseq.nf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ workflow RNASEQ {
167167
// Metadata and reference files are ready. Stage the raw reads, find the max read length, and build the STAR index.
168168

169169
// Stage the raw or truncated reads.
170-
STAGE_RAW_READS( publishdir, samples )
170+
STAGE_RAW_READS( samples )
171171
raw_reads = STAGE_RAW_READS.out.raw_reads
172172
samples_txt = STAGE_RAW_READS.out.samples_txt
173173
//samples_txt | view

RNAseq/Workflow_Documentation/NF_RCP/workflow_code/workflows/rnaseq_microbes.nf

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ workflow RNASEQ_MICROBES {
150150
// Metadata and reference files are ready. Stage the raw reads, find the max read length, and build the Bowtie 2 index.
151151

152152
// Stage the raw or truncated reads.
153-
STAGE_RAW_READS( publishdir, samples )
153+
STAGE_RAW_READS( samples )
154154
raw_reads = STAGE_RAW_READS.out.raw_reads
155155
samples_txt = STAGE_RAW_READS.out.samples_txt
156156
//samples_txt | view
@@ -182,7 +182,7 @@ workflow RNASEQ_MICROBES {
182182
| set { trimmed_fastqc_zip }
183183

184184

185-
// Build Bowtie 2 genome index
185+
// // Build Bowtie 2 genome index
186186
BUILD_BOWTIE2_INDEX(derived_store_path, organism_sci, reference_source, reference_version, genome_references, ch_meta)
187187
bowtie2_index_dir = BUILD_BOWTIE2_INDEX.out.index_dir
188188

@@ -195,10 +195,10 @@ workflow RNASEQ_MICROBES {
195195
// MultiQC
196196
ch_multiqc_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config ) : Channel.fromPath("NO_FILE")
197197
RAW_READS_MULTIQC( samples_txt, raw_fastqc_zip, ch_multiqc_config )
198-
TRIMMING_MULTIQC( samples_txt, trimgalore_reports, ch_multiqc_config )
199-
TRIMMED_READS_MULTIQC( samples_txt, trimmed_fastqc_zip, ch_multiqc_config )
200-
ALIGN_MULTIQC( samples_txt, bowtie2_alignment_logs, ch_multiqc_config )
198+
// TRIMMING_MULTIQC( samples_txt, trimgalore_reports, ch_multiqc_config )
199+
// TRIMMED_READS_MULTIQC( samples_txt, trimmed_fastqc_zip, ch_multiqc_config )
200+
// ALIGN_MULTIQC( samples_txt, bowtie2_alignment_logs, ch_multiqc_config )
201201

202202
emit:
203-
PARSE_QC_METRICS.out.file
203+
RAW_FASTQC.out.fastqc
204204
}

0 commit comments

Comments
 (0)