Skip to content

Commit 91114a0

Browse files
committed
add nextflow schema without runsheet schema
1 parent 72fe63e commit 91114a0

File tree

3 files changed

+286
-2
lines changed

3 files changed

+286
-2
lines changed

RNAseq/Workflow_Documentation/NF_RCP/workflow_code/nextflow.config

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@
66
----------------------------------------------------------------------------------------
77
*/
88

9+
// Plugins
10+
plugins {
11+
id 'nf-schema@2.2.0'
12+
}
13+
914
// Global default params, used in configs
1015
params {
1116
mode = 'default' // Set to 'microbes' for processing microbes (Bowtie2)
@@ -139,7 +144,7 @@ dag {
139144
manifest {
140145
name = 'NASA GeneLab Data Processing RNA-Seq Pipeline'
141146
homePage = 'https://github.com/nasa/GeneLab_Data_Processing/tree/master/RNAseq'
142-
description = 'RNA-Seq Pipeline for Document GL-DPPD-7101-G.'
147+
description = 'RNA-Seq Pipeline for Documents GL-DPPD-7101-G and GL-DPPD-7115.'
143148
mainScript = 'main.nf'
144149
nextflowVersion = '!>=24.04.4'
145150
version = '2.0.0'
Lines changed: 271 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,271 @@
1+
{
2+
"$schema": "https://json-schema.org/draft/2020-12/schema",
3+
"$id": "stub.json",
4+
"title": "NASA GeneLab Data Processing RNA-Seq Pipeline pipeline parameters",
5+
"description": "RNA-Seq Pipeline for Documents GL-DPPD-7101-G and GL-DPPD-7115.",
6+
"type": "object",
7+
"properties": {
8+
"outdir": {
9+
"type": "string",
10+
"format": "directory-path",
11+
"description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.",
12+
"fa_icon": "fas fa-folder-open",
13+
"default": "./results",
14+
"hidden": true
15+
},
16+
"api_url": {
17+
"type": "string",
18+
"default": "https://osdr.nasa.gov/osdr/data/search?ffield=Study+Assay+Technology+Type&fvalue=RNA+Sequencing+%28RNA-Seq%29&size=2000",
19+
"hidden": true
20+
}
21+
},
22+
"$defs": {
23+
"qc": {
24+
"title": "QC",
25+
"type": "object",
26+
"description": "",
27+
"default": "",
28+
"properties": {
29+
"validate_params": {
30+
"type": "boolean",
31+
"description": "Boolean whether to validate parameters against the schema at runtime",
32+
"default": true,
33+
"fa_icon": "fas fa-check-square",
34+
"hidden": true
35+
},
36+
"multiqc_config": {
37+
"type": "string",
38+
"default": "/home/alexis/git/NewRepo/GeneLab_Data_Processing/RNAseq/Workflow_Documentation/NF_RCP/workflow_code/conf/multiqc.config",
39+
"description": "Path to the MultiQC config file.",
40+
"format": "file-path"
41+
},
42+
"rseqc_sample_count": {
43+
"type": "integer",
44+
"default": 15000000,
45+
"description": "Number of read-pairs used for RSeQC infer_experiment.py and inner_distance.py"
46+
},
47+
"max_flag_code": {
48+
"type": "integer",
49+
"default": 80,
50+
"description": "TO DO: implement",
51+
"hidden": true
52+
},
53+
"skip_vv": {
54+
"type": "boolean",
55+
"description": "TO DO: Skip VV modules.",
56+
"hidden": true
57+
}
58+
},
59+
"required": [
60+
"rseqc_sample_count"
61+
]
62+
},
63+
"boilerplate": {
64+
"title": "Boilerplate",
65+
"type": "object",
66+
"description": "",
67+
"default": "",
68+
"properties": {
69+
"help": {
70+
"type": "string",
71+
"description": "Display help menu and exit."
72+
},
73+
"version": {
74+
"type": "boolean",
75+
"description": "Display version and exit.",
76+
"fa_icon": "fas fa-question-circle",
77+
"hidden": true
78+
},
79+
"email": {
80+
"type": "string",
81+
"hidden": true,
82+
"description": "TO DO: implement"
83+
}
84+
}
85+
},
86+
"local_storage": {
87+
"title": "Local Storage",
88+
"type": "object",
89+
"description": "",
90+
"default": "",
91+
"properties": {
92+
"derived_store_path": {
93+
"type": "string",
94+
"default": "./DerivedReferences",
95+
"description": "Location where the derived reference files will be stored.",
96+
"format": "directory-path"
97+
},
98+
"reference_store_path": {
99+
"type": "string",
100+
"default": "./References",
101+
"description": "Location where the reference fasta and gtf will be stored.",
102+
"format": "directory-path"
103+
}
104+
},
105+
"required": [
106+
"derived_store_path",
107+
"reference_store_path"
108+
]
109+
},
110+
"configs": {
111+
"title": "Configs",
112+
"type": "object",
113+
"description": "",
114+
"default": "",
115+
"properties": {
116+
"publish_dir_mode": {
117+
"type": "string",
118+
"default": "link",
119+
"enum": [
120+
"copy",
121+
"copyNoFollow",
122+
"link",
123+
"move",
124+
"relink",
125+
"symlink"
126+
],
127+
"description": "Nextflow publishdir mode."
128+
}
129+
},
130+
"required": [
131+
"publish_dir_mode"
132+
]
133+
},
134+
"input": {
135+
"title": "Input",
136+
"type": "object",
137+
"description": "",
138+
"default": "",
139+
"properties": {
140+
"accession": {
141+
"type": "string",
142+
"description": "Input OSD or GLDS identifier as 'OSD-#' or 'GLDS-#' if processing an OSDR dataset.",
143+
"pattern": "^(OSD|GLDS)-[0-9]+$"
144+
},
145+
"runsheet_path": {
146+
"type": "string",
147+
"description": "Path to the input runsheet.",
148+
"format": "file-path"
149+
},
150+
"isa_archive_path": {
151+
"type": "string",
152+
"description": "Path to the ISA.zip for an OSDR dataset. '--accession' must also be used.",
153+
"format": "file-path"
154+
},
155+
"mode": {
156+
"type": "string",
157+
"default": "default",
158+
"description": "Specifies whether to use the default Eukaryotes workflow or the Prokaryotes workflow with ' --mode microbes'.",
159+
"enum": [
160+
"default",
161+
"microbes"
162+
]
163+
},
164+
"output_suffix": {
165+
"type": "string",
166+
"default": "_GLbulkRNAseq",
167+
"description": "Specifies a string that should be used to label the output file names."
168+
}
169+
},
170+
"required": [
171+
"mode",
172+
"output_suffix"
173+
]
174+
},
175+
"references": {
176+
"title": "References",
177+
"type": "object",
178+
"description": "By default, reference_table points to GeneLab Reference Annotations Table GL-DPPD-7110-A_annotations.csv which defines reference FASTA and GTF files and associated GeneLab-generated gene annotations.",
179+
"default": "",
180+
"properties": {
181+
"reference_table": {
182+
"type": "string",
183+
"default": "https://raw.githubusercontent.com/nasa/GeneLab_Data_Processing/refs/heads/master/GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A_annotations.csv",
184+
"description": "GL-DPPD-7110-A_annotations.csv"
185+
},
186+
"reference_fasta": {
187+
"type": "string",
188+
"description": "Specifies an alternative reference FASTA file to use in place of the one listed in GL-DPPD-7110-A_annotations.csv."
189+
},
190+
"reference_gtf": {
191+
"type": "string",
192+
"description": "Specifies an alternative reference GTF file to use in place of the one listed in GL-DPPD-7110-A_annotations.csv."
193+
},
194+
"reference_source": {
195+
"type": "string",
196+
"description": "Specifies the source of the alternative reference files. (e.g. \"NCBI\" or \"ENSEMBL\")."
197+
},
198+
"reference_version": {
199+
"type": "string",
200+
"description": "Specifies the version of the alternative reference files used, if available."
201+
}
202+
},
203+
"required": [
204+
"reference_table"
205+
]
206+
},
207+
"debugging_options": {
208+
"title": "Debugging options",
209+
"type": "object",
210+
"description": "Parameters used to reduce the size of inputs or reference data",
211+
"default": "",
212+
"properties": {
213+
"stage_local": {
214+
"type": "boolean",
215+
"default": true,
216+
"description": "Break the workflow after pulling reads into channels."
217+
},
218+
"limit_samples_to": {
219+
"type": "string",
220+
"description": "Only use the first n samples for the analysis"
221+
},
222+
"genome_subsample": {
223+
"type": "string",
224+
"description": "Subsample the reference genome to the specified region / chromosome."
225+
},
226+
"force_single_end": {
227+
"type": "boolean",
228+
"description": "Only use Read 1 for each sample, even if the original data is paired-end."
229+
},
230+
"truncate_to": {
231+
"type": "integer",
232+
"description": "Only use the specified number of reads from each input file."
233+
},
234+
"use_dummy_gene_counts": {
235+
"type": "boolean",
236+
"description": "Generate random gene counts during DGE. This is should be enabled when using '--truncate_to'."
237+
},
238+
"dp_tools_plugin": {
239+
"type": "string",
240+
"description": "Alternative dp_tools plugin."
241+
}
242+
}
243+
}
244+
},
245+
"allOf": [
246+
{
247+
"$ref": "#/$defs/qc"
248+
},
249+
{
250+
"$ref": "#/$defs/boilerplate"
251+
},
252+
{
253+
"$ref": "#/$defs/local_storage"
254+
},
255+
{
256+
"$ref": "#/$defs/configs"
257+
},
258+
{
259+
"$ref": "#/$defs/input"
260+
},
261+
{
262+
"$ref": "#/$defs/references"
263+
},
264+
{
265+
"$ref": "#/$defs/debugging_options"
266+
}
267+
],
268+
"required": [
269+
"outdir"
270+
]
271+
}

RNAseq/Workflow_Documentation/NF_RCP/workflow_code/workflows/rnaseq_microbes.nf

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ include { SOFTWARE_VERSIONS } from '../modules/software_versions.nf'
6161
include { MD5SUM as RAW_MD5SUM } from '../modules/md5sum.nf' addParams(md5sumLabel:"raw")
6262
include { MD5SUM as PROCESSED_MD5SUM } from '../modules/md5sum.nf' addParams(md5sumLabel:"processed")
6363

64+
include { validateParameters; paramsSummaryLog; samplesheetToList } from 'plugin/nf-schema'
6465

6566
include { VV_RAW_READS;
6667
VV_TRIMMED_READS;
@@ -97,6 +98,7 @@ workflow RNASEQ_MICROBES {
9798
derived_store_path
9899
main:
99100
publishdir = "results" // default path passed to publishDir, updated below to "GLDS-#" if processing an OSDR dataset
101+
ch_isa_versions = Channel.empty() // Initialize empty channel for ISA versions
100102

101103
// Set up runsheet
102104
if ( runsheet_path == null ) {
@@ -113,6 +115,7 @@ workflow RNASEQ_MICROBES {
113115
//Convert ISA archive to runsheet
114116
ISA_TO_RUNSHEET( osd_accession, glds_accession, isa_archive, dp_tools_plugin )
115117
runsheet_path = ISA_TO_RUNSHEET.out.runsheet
118+
ch_isa_versions = ISA_TO_RUNSHEET.out.versions // Capture version if ISA_TO_RUNSHEET runs
116119
}
117120

118121
PARSE_RUNSHEET( runsheet_path )
@@ -303,7 +306,7 @@ workflow RNASEQ_MICROBES {
303306

304307
// Mix in versions from each process
305308
ch_software_versions = ch_software_versions
306-
| mix(ISA_TO_RUNSHEET.out.versions)
309+
| mix(ch_isa_versions) // Use the stored versions channel
307310
| mix(GTF_TO_PRED.out.versions)
308311
| mix(PRED_TO_BED.out.versions)
309312
| mix(RAW_FASTQC.out.versions)
@@ -336,6 +339,11 @@ workflow RNASEQ_MICROBES {
336339
| concat (RAW_READS_MULTIQC.out.zipped_report) // to do: reimplement zip output w/ cleaned paths
337340
| collect)
338341

342+
// Validate input parameters
343+
validateParameters()
344+
345+
// Print summary of supplied parameters
346+
log.info paramsSummaryLog(workflow)
339347
// PROCESSED_MD5SUM(x
340348
// | concat(y)
341349
// | collect

0 commit comments

Comments
 (0)