@@ -328,12 +328,34 @@ class GenerateReportForExperiment(RerunnableTaskMixin, luigi.Task):
328
328
"""
329
329
330
330
def run (self ):
331
+ fastqc_dir = join (cfg .OUTPUT_DIR , cfg .DATAQCDIR , self .experiment_id )
331
332
search_dirs = [
332
333
join (cfg .OUTPUT_DIR , 'data-trimmed' , self .experiment_id ),
333
- join ( cfg . OUTPUT_DIR , cfg . DATAQCDIR , self . experiment_id ) ,
334
+ fastqc_dir ,
334
335
join (cfg .OUTPUT_DIR , cfg .ALIGNDIR , self .reference_id , self .experiment_id )]
335
336
self .output ().makedirs ()
336
- yield multiqc .GenerateReport (search_dirs , dirname (self .output ().path ), force = self .rerun )
337
+
338
+ # generate sample mapping for FastQC files
339
+ fastqc_suffix = '_fastqc.zip'
340
+ sample_names_file = join (cfg .OUTPUT_DIR , 'report' , self .reference_id , self .experiment_id , 'sample_names.tsv' )
341
+ with open (sample_names_file , 'w' ) as out :
342
+ for root , dirs , files in os .walk (fastqc_dir ):
343
+ for f in files :
344
+ if f .endswith (fastqc_suffix ):
345
+ fastqc_sample_id = f [:- len (fastqc_suffix )]
346
+ sample_id = os .path .basename (root )
347
+ # To avoid sample name clashes for paired-read
348
+ # sequencing, we need to add a suffix to the sample ID
349
+ # In single-end sequencing, fastq-dump does not
350
+ # produces _1, _2 suffixes, so the FastQC metrics will
351
+ # appear in the same row
352
+ if fastqc_sample_id .endswith ('_1' ):
353
+ sample_id += '_1'
354
+ elif fastqc_sample_id .endswith ('_2' ):
355
+ sample_id += '_2'
356
+ out .write (f'{ fastqc_sample_id } \t { sample_id } \n ' )
357
+
358
+ yield multiqc .GenerateReport (search_dirs , dirname (self .output ().path ), replace_names = sample_names_file , force = self .rerun )
337
359
338
360
def output (self ):
339
361
return luigi .LocalTarget (join (cfg .OUTPUT_DIR , 'report' , self .reference_id , self .experiment_id , 'multiqc_report.html' ))
0 commit comments