Skip to content

Commit cc0cb5e

Browse files
committed
add vv raw reads stub
1 parent 0561c64 commit cc0cb5e

File tree

1 file changed

+137
-0
lines changed
  • RNAseq/Workflow_Documentation/NF_RCP/workflow_code/bin

1 file changed

+137
-0
lines changed
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
#!/usr/bin/env python
2+
import click
3+
from pathlib import Path
4+
import json
5+
import os
6+
7+
# Output structure config
8+
STRUCTURE = {
9+
"rnaseq": {
10+
"microbes": {
11+
"components": {
12+
"raw_reads": {
13+
"outputs": {
14+
"raw_fastq": "00-RawData/Fastq",
15+
"raw_fastqc": "00-RawData/FastQC_Reports",
16+
"raw_multiqc": "00-RawData/FastQC_Reports"
17+
}
18+
},
19+
"trimmed_reads": {
20+
"outputs": {
21+
"fastq": "01-TG_Preproc/Fastq",
22+
"fastqc": "01-TG_Preproc/FastQC_Reports",
23+
"trimming_reports": "01-TG_Preproc/Trimming_Reports"
24+
}
25+
},
26+
"alignments": {
27+
"02-Bowtie2_Alignment": {
28+
"{sample_name}": {} # Sample-specific subdirectories
29+
}
30+
},
31+
"counts": {
32+
"03-FeatureCounts": {
33+
}
34+
},
35+
"dge": {
36+
"04-DESeq2_NormCounts": {},
37+
"04-rRNArm_DESeq2_NormCounts": {},
38+
"05-rRNArm_DESeq2_DGE": {},
39+
"05-DESeq2_DGE": {}
40+
}
41+
}
42+
}
43+
}
44+
}
45+
46+
# Future tissue-specific structure could look like:
47+
TISSUE_STRUCTURE = {
48+
"dge": {
49+
"04-{tissue}-DESeq2_NormCounts": {},
50+
"04-{tissue}-rRNArm_DESeq2_NormCounts": {},
51+
"05-{tissue}-rRNArm_DESeq2_DGE": {},
52+
"05-{tissue}-DESeq2_DGE": {}
53+
}
54+
}
55+
56+
@click.command()
57+
@click.option('--assay-type', type=click.Choice(['rnaseq', 'scrna']), default='rnaseq')
58+
@click.option('--assay-suffix', type=click.STRING, default="_GLbulkRNAseq")
59+
@click.option('--runsheet-path', type=click.Path(exists=True), help="Path to runsheet")
60+
@click.option('--outdir', type=click.Path(), default=Path.cwd(), help="Output directory")
61+
@click.option('--paired-end', type=click.STRING, help="Paired end setting")
62+
@click.option('--mode', type=click.Choice(['microbes', 'default']), default='default')
63+
@click.option('--run-components', type=click.STRING, help="Component to validate (e.g. raw_reads)")
64+
@click.option('--raw-fastq', type=click.Path(exists=True), help="Path to raw fastq directory")
65+
@click.option('--raw-fastqc', type=click.Path(exists=True), help="Path to raw fastqc directory")
66+
@click.option('--raw-multiqc', type=click.Path(exists=True), help="Path to raw multiqc directory")
67+
def vv(assay_type, assay_suffix, runsheet_path, outdir, paired_end, mode, run_components, raw_fastq, raw_fastqc, raw_multiqc):
68+
"""Organize pipeline outputs and optionally validate"""
69+
outdir = Path(outdir)
70+
71+
# Stage files if inputs provided
72+
if any([raw_fastq, raw_fastqc, raw_multiqc]):
73+
file_paths = {
74+
'raw_fastq': raw_fastq,
75+
'raw_fastqc': raw_fastqc,
76+
'raw_multiqc': raw_multiqc
77+
}
78+
stage_files(assay_type, 'raw_reads', **file_paths)
79+
80+
# Run validation if component specified
81+
if run_components:
82+
with open("VV_log.tsv", "w") as f:
83+
f.write(f"Stub validation log for {run_components}\n")
84+
85+
def stage_files(assay_type, section, **file_paths):
86+
"""
87+
Stage files either by component or direct paths
88+
89+
Args:
90+
assay_type (str): e.g. 'rnaseq'
91+
section (str): e.g. 'raw_reads'
92+
**file_paths: Keyword args for direct file paths (raw_fastq, raw_fastqc, etc)
93+
"""
94+
structure = STRUCTURE[assay_type]['microbes']['components'][section]['outputs']
95+
96+
# Direct path staging
97+
for file_type, path in file_paths.items():
98+
if path: # Only process if path was provided
99+
target_dir = structure[file_type]
100+
stage_to_location(path, target_dir)
101+
102+
def stage_to_location(source_path, target_dir):
103+
"""Helper to stage files to their target location"""
104+
os.makedirs(target_dir, exist_ok=True)
105+
106+
# Get the ultimate source by following all symlinks
107+
ultimate_source = os.path.realpath(source_path)
108+
109+
if os.path.isdir(source_path):
110+
# For directories, link their contents directly into target_dir
111+
for item in os.listdir(source_path):
112+
src = os.path.realpath(os.path.join(source_path, item)) # Get ultimate source for each file
113+
dst = os.path.join(target_dir, item)
114+
os.symlink(src, dst)
115+
else:
116+
# For single files
117+
dst = os.path.join(target_dir, os.path.basename(source_path))
118+
os.symlink(ultimate_source, dst)
119+
120+
def get_target_dir(structure, file_type):
121+
"""Traverse structure to find target directory for file type"""
122+
# Implementation depends on your exact structure format
123+
pass
124+
125+
if __name__ == '__main__':
126+
vv()
127+
128+
# Component based:
129+
stage_files('rnaseq', 'raw_reads',
130+
components=['raw_reads'],
131+
raw_fastq='path/to/fastq',
132+
raw_fastqc='path/to/fastqc')
133+
134+
# Direct path based:
135+
stage_files('rnaseq', 'raw_reads',
136+
raw_fastq='path/to/fastq',
137+
raw_fastqc='path/to/fastqc')

0 commit comments

Comments
 (0)