Skip to content

Commit a5b15a0

Browse files
committed
add draft runsheet schema check
1 parent 91114a0 commit a5b15a0

File tree

4 files changed

+74
-10
lines changed

4 files changed

+74
-10
lines changed

RNAseq/Workflow_Documentation/NF_RCP/examples/runsheet/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
| Sample Name | string | Sample Name, added as a prefix to sample-specific processed data output files. Should not include spaces or weird characters. | Mmus_BAL-TAL_LRTN_BSL_Rep1_B7 |
1919
| has_ERCC | bool | Set to True if ERCC spike-ins are included in the samples. This ensures ERCC normalized DGE is performed in addition to standard DGE. | True |
2020
| paired_end | bool | Set to True if the samples were sequenced as paired-end. If set to False, samples are assumed to be single-end. | False |
21-
| organism | string | Species name used to map to the appropriate gene annotations file. Supported species can be found in the `species` column of the [GL-DPPD-7110_annotations.csv](../../../../../GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110/GL-DPPD-7110_annotations.csv) file. | Mus musculus |
21+
| organism | string | Species name used to map to the appropriate gene annotations file. Supported species can be found in the `species` column of the [GL-DPPD-7110-A_annotations.csv](../../../../../GeneLab_Reference_Annotations/Pipeline_GL-DPPD-7110_Versions/GL-DPPD-7110-A/GL-DPPD-7110-A_annotations.csv) file. | Mus musculus |
2222
| read1_path | string (url or local path) | Location of the raw reads file. For paired-end data, this specifies the forward reads fastq.gz file. | /my/data/sample_1.fastq.gz |
2323
| read2_path | string (url or local path) | Location of the raw reads file. For paired-end data, this specifies the reverse reads fastq.gz file. For single-end data, this column should be omitted. | /my/data/sample_2.fastq.gz |
2424
| Factor Value[<name, e.g. Spaceflight>] | string | A set of one or more columns specifying the experimental group the sample belongs to. In the simplest form, a column named 'Factor Value[group]' is sufficient. | Space Flight |
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
{
2+
"$schema": "https://json-schema.org/draft/2020-12/schema",
3+
"title": "RNAseq Runsheet Schema",
4+
"description": "Schema for validating RNAseq processing runsheets",
5+
"type": "array",
6+
"items": {
7+
"type": "object",
8+
"properties": {
9+
"Sample Name": {
10+
"type": "string",
11+
"description": "Sample Name, added as a prefix to sample-specific processed data output files. Should not include spaces or weird characters."
12+
},
13+
"has_ERCC": {
14+
"type": "boolean",
15+
"description": "Set to True if ERCC spike-ins are included in the samples. This ensures ERCC normalized DGE is performed in addition to standard DGE."
16+
},
17+
"paired_end": {
18+
"type": "boolean",
19+
"description": "Set to True if the samples were sequenced as paired-end. If set to False, samples are assumed to be single-end."
20+
},
21+
"organism": {
22+
"type": "string",
23+
"description": "The organism of the samples. This is used to select the appropriate annotation files."
24+
},
25+
"read1_path": {
26+
"type": "string",
27+
"description": "Location of the raw reads file. For paired-end data, this specifies the forward reads fastq.gz file."
28+
},
29+
"read2_path": {
30+
"type": "string",
31+
"description": "Location of the raw reads file. For paired-end data, this specifies the reverse reads fastq.gz file. For single-end data, this column should be omitted."
32+
},
33+
"Original Sample Name": {
34+
"type": "string",
35+
"description": "Used to map the sample name that will be used for processing to the original sample name. This is often identical except in cases where the original name includes spaces or weird characters."
36+
}
37+
},
38+
"patternProperties": {
39+
"^Factor Value\\[.*\\]$": {
40+
"type": "string",
41+
"description": "Experimental factor values. Column names should be 'Factor Value[factor_name]' where factor_name is the experimental factor (e.g., Spaceflight, Time, Treatment), and values are the corresponding conditions"
42+
}
43+
},
44+
"required": [
45+
"Sample Name",
46+
"has_ERCC",
47+
"paired_end",
48+
"organism",
49+
"read1_path"
50+
],
51+
"if": {
52+
"properties": {
53+
"paired_end": { "const": true }
54+
}
55+
},
56+
"then": {
57+
"required": ["read2_path"]
58+
},
59+
"minItems": 2
60+
}
61+
}

RNAseq/Workflow_Documentation/NF_RCP/workflow_code/nextflow_schema.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,8 @@
145145
"runsheet_path": {
146146
"type": "string",
147147
"description": "Path to the input runsheet.",
148-
"format": "file-path"
148+
"format": "file-path",
149+
"schema": "./conf/runsheet_schema.json"
149150
},
150151
"isa_archive_path": {
151152
"type": "string",

RNAseq/Workflow_Documentation/NF_RCP/workflow_code/workflows/rnaseq_microbes.nf

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,16 @@ workflow RNASEQ_MICROBES {
118118
ch_isa_versions = ISA_TO_RUNSHEET.out.versions // Capture version if ISA_TO_RUNSHEET runs
119119
}
120120

121+
// Validate input parameters
122+
validateParameters()
123+
124+
// Print summary of supplied parameters
125+
log.info paramsSummaryLog(workflow)
126+
// PROCESSED_MD5SUM(x
127+
// | concat(y)
128+
// | collect
129+
// )
130+
121131
PARSE_RUNSHEET( runsheet_path )
122132

123133
samples = PARSE_RUNSHEET.out.samples
@@ -339,15 +349,7 @@ workflow RNASEQ_MICROBES {
339349
| concat (RAW_READS_MULTIQC.out.zipped_report) // to do: reimplement zip output w/ cleaned paths
340350
| collect)
341351

342-
// Validate input parameters
343-
validateParameters()
344352

345-
// Print summary of supplied parameters
346-
log.info paramsSummaryLog(workflow)
347-
// PROCESSED_MD5SUM(x
348-
// | concat(y)
349-
// | collect
350-
// )
351353

352354
emit:
353355
RAW_MD5SUM.out.md5sums

0 commit comments

Comments
 (0)