Skip to content

Commit 72fe63e

Browse files
committed
add md5sum functionality, to do: finish implementation
1 parent d3c0b89 commit 72fe63e

File tree

6 files changed

+50
-7
lines changed

6 files changed

+50
-7
lines changed

RNAseq/Workflow_Documentation/NF_RCP/workflow_code/conf/by_docker_image.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ process {
6060
container = "quay.io/biocontainers/rsem:1.3.3--pl526ha52163a_0"
6161
}
6262

63-
withName: 'GET_ACCESSIONS|FETCH_ISA|ISA_TO_RUNSHEET|RUNSHEET_FROM_ISA|GENERATE_MD5SUMS|SOFTWARE_VERSIONS|UPDATE_ISA_TABLES|PARSE_QC_METRICS|REMOVE_RRNA_FEATURECOUNTS' {
63+
withName: 'GET_ACCESSIONS|FETCH_ISA|ISA_TO_RUNSHEET|RUNSHEET_FROM_ISA|GENERATE_MD5SUMS|SOFTWARE_VERSIONS|UPDATE_ISA_TABLES|PARSE_QC_METRICS|REMOVE_RRNA_FEATURECOUNTS|MD5SUM' {
6464
container = "quay.io/nasa_genelab/dp_tools:1.3.5"
6565
}
6666

RNAseq/Workflow_Documentation/NF_RCP/workflow_code/conf/local.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ process {
1717
maxErrors = '-1'
1818

1919
// See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
20-
withName:'GET_ACCESSIONS|FETCH_ISA|ISA_TO_RUNSHEET|PARSE_ANNOTATIONS_TABLE|COPY_READS|GET_MAX_READ_LENGTH|ADD_GENE_ANNOTATIONS|EXTEND_DGE_TABLE|VV_RAW_READS|GET_GTF_FEATURES|REMOVE_RRNA_FEATURECOUNTS' {
20+
withName:'GET_ACCESSIONS|FETCH_ISA|ISA_TO_RUNSHEET|PARSE_ANNOTATIONS_TABLE|COPY_READS|GET_MAX_READ_LENGTH|ADD_GENE_ANNOTATIONS|EXTEND_DGE_TABLE|VV_RAW_READS|GET_GTF_FEATURES|REMOVE_RRNA_FEATURECOUNTS|MD5SUM' {
2121
cpus = { 1 }
2222
memory = { 2.GB }
2323
}

RNAseq/Workflow_Documentation/NF_RCP/workflow_code/conf/slurm.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ process {
1818
cache = 'lenient'
1919

2020
// See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
21-
withName:'GET_ACCESSIONS|FETCH_ISA|ISA_TO_RUNSHEET|PARSE_ANNOTATIONS_TABLE|COPY_READS|GET_MAX_READ_LENGTH|ADD_GENE_ANNOTATIONS|EXTEND_DGE_TABLE|VV_RAW_READS|GET_GTF_FEATURES|REMOVE_RRNA_FEATURECOUNTS' {
21+
withName:'GET_ACCESSIONS|FETCH_ISA|ISA_TO_RUNSHEET|PARSE_ANNOTATIONS_TABLE|COPY_READS|GET_MAX_READ_LENGTH|ADD_GENE_ANNOTATIONS|EXTEND_DGE_TABLE|VV_RAW_READS|GET_GTF_FEATURES|REMOVE_RRNA_FEATURECOUNTS|MD5SUM' {
2222
cpus = { 1 }
2323
memory = { 2.GB }
2424
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
process MD5SUM {
2+
/**
3+
* Run recursive md5sum on files/directories and output md5sum files
4+
*
5+
* Inputs:
6+
* path to file(s) or directory
7+
* label for output file name
8+
*
9+
* Outputs:
10+
* {MD5SumLabel}_md5sums.tsv - Raw md5sums
11+
*/
12+
13+
input:
14+
path files
15+
16+
output:
17+
path("${ params.md5sumLabel }_md5sum_GLbulkRNAseq.tsv"), emit: md5sums
18+
19+
script:
20+
"""
21+
# Generate raw md5sums
22+
if [ -d "${ files }" ]; then
23+
find "${ files }" -type f -exec md5sum {} \\; > ${ params.md5sumLabel }_md5sum_GLbulkRNAseq.tsv
24+
else
25+
md5sum ${ files } > ${ params.md5sumLabel }_md5sum_GLbulkRNAseq.tsv
26+
fi
27+
28+
"""
29+
}

RNAseq/Workflow_Documentation/NF_RCP/workflow_code/modules/software_versions.nf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@ process SOFTWARE_VERSIONS {
33
path versions_file
44

55
output:
6-
path "software_versions_GLbulkRNAseq.txt", emit: software_versions
6+
path "software_versions_GLbulkRNAseq.md", emit: software_versions
77
path "software_versions_GLbulkRNAseq.yaml", emit: software_versions_yaml
88

99
script:
1010
"""
11-
software_versions.py ${versions_file} software_versions_GLbulkRNAseq.txt --assay rnaseq
11+
software_versions.py ${versions_file} software_versions_GLbulkRNAseq.md --assay rnaseq
1212
"""
1313
}

RNAseq/Workflow_Documentation/NF_RCP/workflow_code/workflows/rnaseq_microbes.nf

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ include { ADD_GENE_ANNOTATIONS } from '../modules/add_gene_annotations.nf'
5858

5959
include { SOFTWARE_VERSIONS } from '../modules/software_versions.nf'
6060

61+
include { MD5SUM as RAW_MD5SUM } from '../modules/md5sum.nf' addParams(md5sumLabel:"raw")
62+
include { MD5SUM as PROCESSED_MD5SUM } from '../modules/md5sum.nf' addParams(md5sumLabel:"processed")
63+
6164

6265
include { VV_RAW_READS;
6366
VV_TRIMMED_READS;
@@ -314,10 +317,10 @@ workflow RNASEQ_MICROBES {
314317
| mix(FEATURECOUNTS.out.versions)
315318
| mix(RAW_READS_MULTIQC.out.versions)
316319
| mix(DGE_DESEQ2.out.versions)
320+
| mix(ch_nextflow_version)
317321
// Process the versions:
318322
ch_software_versions
319323
| unique // Remove duplicates
320-
| mix(ch_nextflow_version) // Add Nextflow version
321324
| collectFile( // Combine all into one file
322325
name: "software_versions.txt",
323326
newLine: true,
@@ -327,6 +330,17 @@ workflow RNASEQ_MICROBES {
327330

328331
SOFTWARE_VERSIONS(ch_final_software_versions)
329332

333+
334+
// Generate md5sums for raw and processed data
335+
RAW_MD5SUM( STAGE_RAW_READS.out.ch_all_raw_reads
336+
| concat (RAW_READS_MULTIQC.out.zipped_report) // to do: reimplement zip output w/ cleaned paths
337+
| collect)
338+
339+
// PROCESSED_MD5SUM(x
340+
// | concat(y)
341+
// | collect
342+
// )
343+
330344
emit:
331-
EXTRACT_RRNA.out.rrna_ids
345+
RAW_MD5SUM.out.md5sums
332346
}

0 commit comments

Comments
 (0)