From a3b254a1593c71935c2b1f6b5a25eddca5f5adc5 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Fri, 12 Jul 2024 13:09:25 -0700 Subject: [PATCH 01/27] (wip) added span-run and nsplit strategies --- src/io/readers.rs | 1 + src/io/readers/precursor_reader/tdf/dia.rs | 20 +-- src/io/readers/spectrum_reader/tdf.rs | 6 +- src/io/readers/spectrum_reader/tdf/dda.rs | 4 + src/io/readers/spectrum_reader/tdf/dia.rs | 35 +++-- .../spectrum_reader/tdf/raw_spectra.rs | 5 + src/io/readers/tdf_utils.rs | 124 ++++++++++++++++++ src/utils/vec_utils.rs | 4 +- 8 files changed, 161 insertions(+), 38 deletions(-) create mode 100644 src/io/readers/tdf_utils.rs diff --git a/src/io/readers.rs b/src/io/readers.rs index 03d5248..fd9f3ce 100644 --- a/src/io/readers.rs +++ b/src/io/readers.rs @@ -4,6 +4,7 @@ mod metadata_reader; mod precursor_reader; mod quad_settings_reader; mod spectrum_reader; +mod tdf_utils; pub use frame_reader::*; pub use metadata_reader::*; diff --git a/src/io/readers/precursor_reader/tdf/dia.rs b/src/io/readers/precursor_reader/tdf/dia.rs index d604769..46fdc37 100644 --- a/src/io/readers/precursor_reader/tdf/dia.rs +++ b/src/io/readers/precursor_reader/tdf/dia.rs @@ -1,5 +1,6 @@ use std::path::{Path, PathBuf}; +use crate::io::readers::tdf_utils::expand_quadrupole_settings; use crate::{ domain_converters::{ ConvertableDomain, Frame2RtConverter, Scan2ImConverter, @@ -34,23 +35,8 @@ impl DIATDFPrecursorReader { SqlWindowGroup::from_sql_reader(&tdf_sql_reader).unwrap(); let quadrupole_settings = QuadrupoleSettingsReader::new(tdf_sql_reader.get_path()); - let mut expanded_quadrupole_settings: Vec = vec![]; - for window_group in window_groups { - let window = window_group.window_group; - let frame = window_group.frame; - let group = &quadrupole_settings[window as usize - 1]; - for sub_window in 0..group.isolation_mz.len() { - let sub_quad_settings = QuadrupoleSettings { - index: frame, - scan_starts: vec![group.scan_starts[sub_window]], - scan_ends: vec![group.scan_ends[sub_window]], - isolation_mz: vec![group.isolation_mz[sub_window]], - isolation_width: vec![group.isolation_width[sub_window]], - collision_energy: vec![group.collision_energy[sub_window]], - }; - expanded_quadrupole_settings.push(sub_quad_settings) - } - } + let expanded_quadrupole_settings = + expand_quadrupole_settings(&window_groups, &quadrupole_settings); Self { path: path.as_ref().to_path_buf(), expanded_quadrupole_settings, diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index fe1cbcc..270102d 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -69,7 +69,11 @@ impl SpectrumReaderTrait for TDFSpectrumReader { } fn len(&self) -> usize { - self.precursor_reader.len() + debug_assert_eq!( + self.precursor_reader.len(), + self.raw_spectrum_reader.len() + ); + self.raw_spectrum_reader.len() } fn get_path(&self) -> PathBuf { diff --git a/src/io/readers/spectrum_reader/tdf/dda.rs b/src/io/readers/spectrum_reader/tdf/dda.rs index c5d9eb8..b309c69 100644 --- a/src/io/readers/spectrum_reader/tdf/dda.rs +++ b/src/io/readers/spectrum_reader/tdf/dda.rs @@ -96,4 +96,8 @@ impl RawSpectrumReaderTrait for DDARawSpectrumReader { }; raw_spectrum } + + fn len(&self) -> usize { + self.offsets.len() - 1 + } } diff --git a/src/io/readers/spectrum_reader/tdf/dia.rs b/src/io/readers/spectrum_reader/tdf/dia.rs index 493152b..24386c8 100644 --- a/src/io/readers/spectrum_reader/tdf/dia.rs +++ b/src/io/readers/spectrum_reader/tdf/dia.rs @@ -1,3 +1,4 @@ +use crate::io::readers::tdf_utils::expand_quadrupole_settings; use crate::{ io::readers::{ file_readers::sql_reader::{ @@ -19,27 +20,12 @@ pub struct DIARawSpectrumReader { impl DIARawSpectrumReader { pub fn new(tdf_sql_reader: &SqlReader, frame_reader: FrameReader) -> Self { - let window_groups = + let window_groups: Vec = SqlWindowGroup::from_sql_reader(&tdf_sql_reader).unwrap(); let quadrupole_settings = QuadrupoleSettingsReader::new(&tdf_sql_reader.get_path()); - let mut expanded_quadrupole_settings: Vec = vec![]; - for window_group in window_groups { - let window = window_group.window_group; - let frame = window_group.frame; - let group = &quadrupole_settings[window as usize - 1]; - for sub_window in 0..group.isolation_mz.len() { - let sub_quad_settings = QuadrupoleSettings { - index: frame, - scan_starts: vec![group.scan_starts[sub_window]], - scan_ends: vec![group.scan_ends[sub_window]], - isolation_mz: vec![group.isolation_mz[sub_window]], - isolation_width: vec![group.isolation_width[sub_window]], - collision_energy: vec![group.collision_energy[sub_window]], - }; - expanded_quadrupole_settings.push(sub_quad_settings) - } - } + let expanded_quadrupole_settings = + expand_quadrupole_settings(&window_groups, &quadrupole_settings); Self { expanded_quadrupole_settings, frame_reader, @@ -50,6 +36,15 @@ impl DIARawSpectrumReader { impl RawSpectrumReaderTrait for DIARawSpectrumReader { fn get(&self, index: usize) -> RawSpectrum { let quad_settings = &self.expanded_quadrupole_settings[index]; + if index < 10 { + println!("{}", index); + println!("{:?}", quad_settings); + } + if index > (self.expanded_quadrupole_settings.len() - 10) { + println!("{}", index); + println!("{:?}", quad_settings); + } + let collision_energy = quad_settings.collision_energy[0]; let isolation_mz = quad_settings.isolation_mz[0]; let isolation_width = quad_settings.isolation_width[0]; @@ -75,4 +70,8 @@ impl RawSpectrumReaderTrait for DIARawSpectrumReader { }; raw_spectrum } + + fn len(&self) -> usize { + self.expanded_quadrupole_settings.len() + } } diff --git a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs index 156b6a4..ea88d2b 100644 --- a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs +++ b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs @@ -110,8 +110,13 @@ impl RawSpectrumReader { pub fn get(&self, index: usize) -> RawSpectrum { self.raw_spectrum_reader.get(index) } + + pub fn len(&self) -> usize { + self.raw_spectrum_reader.len() + } } pub trait RawSpectrumReaderTrait: Sync { fn get(&self, index: usize) -> RawSpectrum; + fn len(&self) -> usize; } diff --git a/src/io/readers/tdf_utils.rs b/src/io/readers/tdf_utils.rs new file mode 100644 index 0000000..ff4d938 --- /dev/null +++ b/src/io/readers/tdf_utils.rs @@ -0,0 +1,124 @@ +use crate::io::readers::file_readers::sql_reader::frame_groups::SqlWindowGroup; +use crate::ms_data::QuadrupoleSettings; + +type SpanStep = (usize, usize); + +enum QuadWindowExpansionStrategy { + None, + Even(usize), + Uniform(SpanStep), +} + +fn scan_range_subsplit( + start: usize, + end: usize, + strategy: &QuadWindowExpansionStrategy, +) -> Vec<(usize, usize)> { + let out = match strategy { + QuadWindowExpansionStrategy::None => { + vec![(start, end)] + }, + QuadWindowExpansionStrategy::Even(num_splits) => { + let sub_subwindow_width = (end - start) / (num_splits + 1); + let mut out = Vec::new(); + for sub_subwindow in 0..num_splits.clone() { + let sub_subwindow_scan_start = + start + (sub_subwindow_width * sub_subwindow); + let sub_subwindow_scan_end = + start + (sub_subwindow_width * (sub_subwindow + 2)); + + out.push((sub_subwindow_scan_start, sub_subwindow_scan_end)) + } + out + }, + QuadWindowExpansionStrategy::Uniform((span, step)) => { + let mut curr_start = start.clone(); + let mut curr_end = start + span; + let mut out = Vec::new(); + while curr_end < end { + out.push((curr_start, curr_end)); + curr_start += step; + curr_end += step; + } + out + }, + }; + out +} + +pub fn expand_quadrupole_settings( + window_groups: &[SqlWindowGroup], + quadrupole_settings: &[QuadrupoleSettings], +) -> Vec { + // Read the 'NUM_SUB_SUB_SPLITS' from env variables ... default to 1 + // (for now) + + let splits = match std::env::var("NUM_SUB_SUB_SPLITS") { + Ok(s) => match s.parse::() { + Ok(n) => { + println!("Number of splits: {} from env", n); + QuadWindowExpansionStrategy::Even(n) + }, + Err(_) => { + println!("Invalid number of splits: {}", s); + QuadWindowExpansionStrategy::None + }, + }, + Err(_) => match std::env::var("SUB_SPLITS_SPAN") { + Ok(s) => match s.parse::() { + Ok(n) => { + println!("Number of scans per split: {} from env", n); + QuadWindowExpansionStrategy::Uniform((n, n / 2)) + }, + Err(_) => { + println!("Invalid number of splits: {}", s); + QuadWindowExpansionStrategy::None + }, + }, + Err(_) => QuadWindowExpansionStrategy::None, + }, + }; + + let mut expanded_quadrupole_settings: Vec = vec![]; + for window_group in window_groups { + let window = window_group.window_group; + let frame = window_group.frame; + let group = &quadrupole_settings[window as usize - 1]; + for sub_window in 0..group.isolation_mz.len() { + let subwindow_scan_start = group.scan_starts[sub_window]; + let subwindow_scan_end = group.scan_ends[sub_window]; + for (sws, swe) in scan_range_subsplit( + subwindow_scan_start, + subwindow_scan_end, + &splits, + ) { + assert!( + sws >= subwindow_scan_start, + "{} >= {} not true", + sws, + subwindow_scan_start + ); + assert!( + swe <= subwindow_scan_end, + "{} <= {} not true", + swe, + subwindow_scan_end + ); + let sub_quad_settings = QuadrupoleSettings { + index: frame, + scan_starts: vec![sws], + scan_ends: vec![swe], + isolation_mz: vec![group.isolation_mz[sub_window]], + isolation_width: vec![group.isolation_width[sub_window]], + collision_energy: vec![group.collision_energy[sub_window]], + }; + expanded_quadrupole_settings.push(sub_quad_settings) + } + } + } + println!( + "Number of expanded quad settings {}", + expanded_quadrupole_settings.len() + ); + expanded_quadrupole_settings +} diff --git a/src/utils/vec_utils.rs b/src/utils/vec_utils.rs index 724fc3c..3ee53c0 100644 --- a/src/utils/vec_utils.rs +++ b/src/utils/vec_utils.rs @@ -12,8 +12,8 @@ pub fn group_and_sum + Copy>( return (vec![], vec![]); } let order: Vec = argsort(&groups); - let mut new_groups: Vec = vec![]; - let mut new_values: Vec = vec![]; + let mut new_groups: Vec = Vec::with_capacity(order.len()); + let mut new_values: Vec = Vec::with_capacity(order.len()); let mut current_group: T = groups[order[0]]; let mut current_value: U = values[order[0]]; for &index in &order[1..] { From f5fb8d943b59683000dcc875863a04bb902db847 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Tue, 16 Jul 2024 23:42:19 -0700 Subject: [PATCH 02/27] propagated configs (untested) --- src/file_readers.rs | 18 +++- src/io/readers/frame_reader.rs | 9 +- src/io/readers/precursor_reader.rs | 18 +++- src/io/readers/precursor_reader/tdf.rs | 38 ++++++-- src/io/readers/precursor_reader/tdf/dia.rs | 25 ++++- src/io/readers/spectrum_reader.rs | 36 ++++++- src/io/readers/spectrum_reader/minitdf.rs | 2 +- src/io/readers/spectrum_reader/tdf.rs | 32 +++--- src/io/readers/spectrum_reader/tdf/dia.rs | 21 +++- src/io/readers/tdf_utils.rs | 108 +++++++++++++++++---- tests/frame_readers.rs | 9 +- tests/spectrum_readers.rs | 9 +- 12 files changed, 261 insertions(+), 64 deletions(-) diff --git a/src/file_readers.rs b/src/file_readers.rs index b14d0fd..785504f 100644 --- a/src/file_readers.rs +++ b/src/file_readers.rs @@ -1,7 +1,7 @@ use std::{fs, path::PathBuf}; use crate::io::readers::file_readers::sql_reader::frames::SqlFrame; -use crate::io::readers::SpectrumReader; +use crate::io::readers::{SpectrumReader, SpectrumReaderConfig}; use crate::{io::readers::FrameReader, Error}; use crate::ms_data::{Frame, Spectrum}; @@ -17,19 +17,27 @@ impl FileReader { // TODO refactor out // TODO proper error handling // TODO update docs - pub fn new>(path_name: T) -> Result { + pub fn new>( + path_name: T, + reader_config: SpectrumReaderConfig, + ) -> Result { let format: FileFormat = FileFormat::parse(path_name)?; let frame_reader = match &format { - FileFormat::DFolder(path) => Some(FrameReader::new(&path)), + FileFormat::DFolder(path) => Some(FrameReader::new( + &path, + reader_config.frame_splitting_params, + )), FileFormat::MS2Folder(_) => None, }; let spectrum_reader = match &format { FileFormat::DFolder(path) => { - let reader = SpectrumReader::new(path); + let reader = SpectrumReader::new(path, reader_config); // reader.calibrate(); Some(reader) }, - FileFormat::MS2Folder(path) => Some(SpectrumReader::new(path)), + FileFormat::MS2Folder(path) => { + Some(SpectrumReader::new(path, reader_config)) + }, }; Ok(Self { frame_reader, diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index e13b6be..187a221 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -19,7 +19,7 @@ use super::{ }, tdf_blob_reader::{TdfBlob, TdfBlobReader}, }, - QuadrupoleSettingsReader, + FrameWindowSplittingStrategy, QuadrupoleSettingsReader, }; #[derive(Debug)] @@ -30,10 +30,14 @@ pub struct FrameReader { acquisition: AcquisitionType, window_groups: Vec, quadrupole_settings: Vec>, + pub splitting_strategy: FrameWindowSplittingStrategy, } impl FrameReader { - pub fn new(path: impl AsRef) -> Self { + pub fn new( + path: impl AsRef, + config: FrameWindowSplittingStrategy, + ) -> Self { let sql_path = find_extension(&path, "analysis.tdf").unwrap(); let tdf_sql_reader = SqlReader::open(sql_path).unwrap(); let sql_frames = SqlFrame::from_sql_reader(&tdf_sql_reader).unwrap(); @@ -71,6 +75,7 @@ impl FrameReader { .into_iter() .map(|x| Arc::new(x)) .collect(), + splitting_strategy: config, } } diff --git a/src/io/readers/precursor_reader.rs b/src/io/readers/precursor_reader.rs index 23a804b..8c01afb 100644 --- a/src/io/readers/precursor_reader.rs +++ b/src/io/readers/precursor_reader.rs @@ -9,6 +9,8 @@ use tdf::TDFPrecursorReader; use crate::ms_data::Precursor; +use super::FrameWindowSplittingStrategy; + pub struct PrecursorReader { precursor_reader: Box, } @@ -20,11 +22,19 @@ impl fmt::Debug for PrecursorReader { } impl PrecursorReader { - pub fn new(path: impl AsRef) -> Self { + pub fn new( + path: impl AsRef, + config: Option, + ) -> Self { + let tmp = path.as_ref().extension().and_then(|e| e.to_str()); let precursor_reader: Box = - match path.as_ref().extension().and_then(|e| e.to_str()) { - Some("parquet") => Box::new(MiniTDFPrecursorReader::new(path)), - Some("tdf") => Box::new(TDFPrecursorReader::new(path)), + match (tmp, config) { + (Some("parquet"), None) => { + Box::new(MiniTDFPrecursorReader::new(path)) + }, + (Some("tdf"), strat) => { + Box::new(TDFPrecursorReader::new(path, strat)) + }, _ => panic!(), }; Self { precursor_reader } diff --git a/src/io/readers/precursor_reader/tdf.rs b/src/io/readers/precursor_reader/tdf.rs index 8619a1e..2be9a24 100644 --- a/src/io/readers/precursor_reader/tdf.rs +++ b/src/io/readers/precursor_reader/tdf.rs @@ -7,7 +7,9 @@ use dda::DDATDFPrecursorReader; use dia::DIATDFPrecursorReader; use crate::{ - io::readers::file_readers::sql_reader::SqlReader, + io::readers::{ + file_readers::sql_reader::SqlReader, FrameWindowSplittingStrategy, + }, ms_data::{AcquisitionType, Precursor}, }; @@ -18,7 +20,10 @@ pub struct TDFPrecursorReader { } impl TDFPrecursorReader { - pub fn new(path: impl AsRef) -> Self { + pub fn new( + path: impl AsRef, + splitting_strategy: Option, + ) -> Self { let sql_path = path.as_ref(); let tdf_sql_reader = SqlReader::open(sql_path).unwrap(); let sql_frames: Vec = tdf_sql_reader @@ -32,14 +37,33 @@ impl TDFPrecursorReader { AcquisitionType::Unknown }; let precursor_reader: Box = - match acquisition_type { - AcquisitionType::DDAPASEF => { + match (acquisition_type, splitting_strategy) { + (AcquisitionType::DDAPASEF, None) => { Box::new(DDATDFPrecursorReader::new(path)) }, - AcquisitionType::DIAPASEF => { - Box::new(DIATDFPrecursorReader::new(path)) + ( + AcquisitionType::DDAPASEF, + Some(FrameWindowSplittingStrategy::None), + ) => { + // Not 100% sure when this happens ... + // By this I mean generating a Some(None) + // ./tests/frame_readers.rs:60:25 generates it. + // JSPP - 2024-Jul-16 + Box::new(DDATDFPrecursorReader::new(path)) + }, + (AcquisitionType::DIAPASEF, Some(splitting_strat)) => { + Box::new(DIATDFPrecursorReader::new(path, splitting_strat)) + }, + (AcquisitionType::DIAPASEF, None) => { + Box::new(DIATDFPrecursorReader::new( + path, + FrameWindowSplittingStrategy::None, + )) }, - _ => panic!(), + _ => panic!( + "No idea how to handle {:?} - {:?}", + acquisition_type, splitting_strategy + ), }; Self { precursor_reader } } diff --git a/src/io/readers/precursor_reader/tdf/dia.rs b/src/io/readers/precursor_reader/tdf/dia.rs index 46fdc37..f480f7c 100644 --- a/src/io/readers/precursor_reader/tdf/dia.rs +++ b/src/io/readers/precursor_reader/tdf/dia.rs @@ -1,6 +1,9 @@ use std::path::{Path, PathBuf}; -use crate::io::readers::tdf_utils::expand_quadrupole_settings; +use crate::io::readers::tdf_utils::{ + expand_quadrupole_settings, expand_window_settings, +}; +use crate::io::readers::FrameWindowSplittingStrategy; use crate::{ domain_converters::{ ConvertableDomain, Frame2RtConverter, Scan2ImConverter, @@ -25,7 +28,10 @@ pub struct DIATDFPrecursorReader { } impl DIATDFPrecursorReader { - pub fn new(path: impl AsRef) -> Self { + pub fn new( + path: impl AsRef, + splitting_strat: FrameWindowSplittingStrategy, + ) -> Self { let sql_path = path.as_ref(); let tdf_sql_reader = SqlReader::open(sql_path).unwrap(); let metadata = MetadataReader::new(&path); @@ -35,8 +41,19 @@ impl DIATDFPrecursorReader { SqlWindowGroup::from_sql_reader(&tdf_sql_reader).unwrap(); let quadrupole_settings = QuadrupoleSettingsReader::new(tdf_sql_reader.get_path()); - let expanded_quadrupole_settings = - expand_quadrupole_settings(&window_groups, &quadrupole_settings); + let expanded_quadrupole_settings = match splitting_strat { + FrameWindowSplittingStrategy::None => quadrupole_settings, + FrameWindowSplittingStrategy::Quadrupole(x) => { + expand_quadrupole_settings( + &window_groups, + &quadrupole_settings, + &x, + ) + }, + FrameWindowSplittingStrategy::Window(x) => { + expand_window_settings(&window_groups, &quadrupole_settings, &x) + }, + }; Self { path: path.as_ref().to_path_buf(), expanded_quadrupole_settings, diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index 0082ca3..bf45c97 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -7,12 +7,44 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator}; use std::path::{Path, PathBuf}; use tdf::TDFSpectrumReader; +use crate::io::readers::tdf_utils::QuadWindowExpansionStrategy; use crate::ms_data::Spectrum; pub struct SpectrumReader { spectrum_reader: Box, } +#[derive(Debug)] +pub struct SpectrumProcessingParams { + smoothing_window: u32, + centroiding_window: u32, + calibration_tolerance: f64, +} + +impl Default for SpectrumProcessingParams { + fn default() -> Self { + Self { + smoothing_window: 1, + centroiding_window: 1, + calibration_tolerance: 0.1, + } + } +} + +#[derive(Debug, Clone, Copy, Default)] +pub enum FrameWindowSplittingStrategy { + #[default] + None, + Quadrupole(QuadWindowExpansionStrategy), + Window(QuadWindowExpansionStrategy), +} + +#[derive(Debug, Default)] +pub struct SpectrumReaderConfig { + pub spectrum_processing_params: SpectrumProcessingParams, + pub frame_splitting_params: FrameWindowSplittingStrategy, +} + impl fmt::Debug for SpectrumReader { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "SpectrumReader {{ /* fields omitted */ }}") @@ -20,11 +52,11 @@ impl fmt::Debug for SpectrumReader { } impl SpectrumReader { - pub fn new(path: impl AsRef) -> Self { + pub fn new(path: impl AsRef, config: SpectrumReaderConfig) -> Self { let spectrum_reader: Box = match path.as_ref().extension().and_then(|e| e.to_str()) { Some("ms2") => Box::new(MiniTDFSpectrumReader::new(path)), - Some("d") => Box::new(TDFSpectrumReader::new(path)), + Some("d") => Box::new(TDFSpectrumReader::new(path, config)), _ => panic!(), }; Self { spectrum_reader } diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs index 84a6f4a..6681a24 100644 --- a/src/io/readers/spectrum_reader/minitdf.rs +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -28,7 +28,7 @@ impl MiniTDFSpectrumReader { pub fn new(path: impl AsRef) -> Self { let parquet_file_name = find_extension(&path, "ms2spectrum.parquet").unwrap(); - let precursor_reader = PrecursorReader::new(&parquet_file_name); + let precursor_reader = PrecursorReader::new(&parquet_file_name, None); let offsets = ParquetPrecursor::from_parquet_file(&parquet_file_name) .unwrap() .iter() diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index 270102d..c635c91 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -16,11 +16,7 @@ use crate::{ utils::find_extension, }; -use super::SpectrumReaderTrait; - -const SMOOTHING_WINDOW: u32 = 1; -const CENTROIDING_WINDOW: u32 = 1; -const CALIBRATION_TOLERANCE: f64 = 0.1; +use super::{SpectrumReaderConfig, SpectrumReaderTrait}; #[derive(Debug)] pub struct TDFSpectrumReader { @@ -28,16 +24,24 @@ pub struct TDFSpectrumReader { precursor_reader: PrecursorReader, mz_reader: Tof2MzConverter, raw_spectrum_reader: RawSpectrumReader, + config: SpectrumReaderConfig, } impl TDFSpectrumReader { - pub fn new(path_name: impl AsRef) -> Self { - let frame_reader: FrameReader = FrameReader::new(&path_name); + pub fn new( + path_name: impl AsRef, + config: SpectrumReaderConfig, + ) -> Self { + let frame_reader: FrameReader = + FrameReader::new(&path_name, config.frame_splitting_params); let sql_path = find_extension(&path_name, "analysis.tdf").unwrap(); let metadata = MetadataReader::new(&sql_path); let mz_reader: Tof2MzConverter = metadata.mz_converter; let tdf_sql_reader = SqlReader::open(&sql_path).unwrap(); - let precursor_reader = PrecursorReader::new(&sql_path); + let precursor_reader = PrecursorReader::new( + &sql_path, + Some(config.frame_splitting_params), + ); let acquisition_type = frame_reader.get_acquisition(); let raw_spectrum_reader = RawSpectrumReader::new( &tdf_sql_reader, @@ -49,14 +53,15 @@ impl TDFSpectrumReader { precursor_reader, mz_reader, raw_spectrum_reader, + config, } } pub fn read_single_raw_spectrum(&self, index: usize) -> RawSpectrum { let raw_spectrum = self.raw_spectrum_reader.get(index); raw_spectrum - .smooth(SMOOTHING_WINDOW) - .centroid(CENTROIDING_WINDOW) + .smooth(self.config.spectrum_processing_params.smoothing_window) + .centroid(self.config.spectrum_processing_params.centroiding_window) } } @@ -90,7 +95,12 @@ impl SpectrumReaderTrait for TDFSpectrumReader { let mut result: Vec<(f64, u32)> = vec![]; for &tof_index in spectrum.tof_indices.iter() { let mz = self.mz_reader.convert(tof_index); - if (mz - precursor_mz).abs() < CALIBRATION_TOLERANCE { + if (mz - precursor_mz).abs() + < self + .config + .spectrum_processing_params + .calibration_tolerance + { let hit = (precursor_mz, tof_index); result.push(hit); } diff --git a/src/io/readers/spectrum_reader/tdf/dia.rs b/src/io/readers/spectrum_reader/tdf/dia.rs index 24386c8..ad836bf 100644 --- a/src/io/readers/spectrum_reader/tdf/dia.rs +++ b/src/io/readers/spectrum_reader/tdf/dia.rs @@ -1,4 +1,7 @@ -use crate::io::readers::tdf_utils::expand_quadrupole_settings; +use crate::io::readers::tdf_utils::{ + expand_quadrupole_settings, expand_window_settings, +}; +use crate::io::readers::FrameWindowSplittingStrategy; use crate::{ io::readers::{ file_readers::sql_reader::{ @@ -24,8 +27,20 @@ impl DIARawSpectrumReader { SqlWindowGroup::from_sql_reader(&tdf_sql_reader).unwrap(); let quadrupole_settings = QuadrupoleSettingsReader::new(&tdf_sql_reader.get_path()); - let expanded_quadrupole_settings = - expand_quadrupole_settings(&window_groups, &quadrupole_settings); + let expanded_quadrupole_settings = match frame_reader.splitting_strategy + { + FrameWindowSplittingStrategy::None => quadrupole_settings, + FrameWindowSplittingStrategy::Quadrupole(x) => { + expand_quadrupole_settings( + &window_groups, + &quadrupole_settings, + &x, + ) + }, + FrameWindowSplittingStrategy::Window(x) => { + expand_window_settings(&window_groups, &quadrupole_settings, &x) + }, + }; Self { expanded_quadrupole_settings, frame_reader, diff --git a/src/io/readers/tdf_utils.rs b/src/io/readers/tdf_utils.rs index ff4d938..6850aaf 100644 --- a/src/io/readers/tdf_utils.rs +++ b/src/io/readers/tdf_utils.rs @@ -3,7 +3,8 @@ use crate::ms_data::QuadrupoleSettings; type SpanStep = (usize, usize); -enum QuadWindowExpansionStrategy { +#[derive(Debug, Copy, Clone)] +pub enum QuadWindowExpansionStrategy { None, Even(usize), Uniform(SpanStep), @@ -43,16 +44,21 @@ fn scan_range_subsplit( out }, }; + + debug_assert!( + out.iter().all(|(s, e)| s < e), + "Invalid scan range: {:?}", + out + ); + debug_assert!( + out.iter().all(|(s, e)| *s >= start && *e <= end), + "Invalid scan range: {:?}", + out + ); out } -pub fn expand_quadrupole_settings( - window_groups: &[SqlWindowGroup], - quadrupole_settings: &[QuadrupoleSettings], -) -> Vec { - // Read the 'NUM_SUB_SUB_SPLITS' from env variables ... default to 1 - // (for now) - +fn expansion_strategy_from_env() -> QuadWindowExpansionStrategy { let splits = match std::env::var("NUM_SUB_SUB_SPLITS") { Ok(s) => match s.parse::() { Ok(n) => { @@ -79,6 +85,78 @@ pub fn expand_quadrupole_settings( }, }; + splits +} + +pub fn expand_window_settings( + window_groups: &[SqlWindowGroup], + quadrupole_settings: &[QuadrupoleSettings], + strategy: &QuadWindowExpansionStrategy, +) -> Vec { + let mut expanded_quadrupole_settings: Vec = vec![]; + for window_group in window_groups { + let window = window_group.window_group; + let frame = window_group.frame; + let group = &quadrupole_settings[window as usize - 1]; + let window_group_start = + group.scan_starts.iter().min().unwrap().clone(); + let window_group_end = group.scan_ends.iter().max().unwrap().clone(); + + for (sws, swe) in + scan_range_subsplit(window_group_start, window_group_end, &strategy) + { + let mut mz_sum = 0.0; + let mut mz_min = std::f64::MAX; + let mut mz_max = std::f64::MIN; + let mut nce_sum = 0.0; + let mut num_added = 0; + + for i in 0..group.isolation_mz.len() { + // Should I be checking here for overlap instead of full containment? + if sws <= group.scan_starts[i] && swe >= group.scan_ends[i] { + mz_sum += group.isolation_mz[i]; + mz_min = mz_min.min( + group.isolation_mz[i] + - (group.isolation_width[i] / 2.0), + ); + mz_max = mz_max.max( + group.isolation_mz[i] + + (group.isolation_width[i] / 2.0), + ); + nce_sum += group.collision_energy[i]; + num_added += 1; + } + } + + let mz_mean = mz_sum / num_added as f64; + let mean_nce = nce_sum / num_added as f64; + + let sub_quad_settings = QuadrupoleSettings { + index: frame, + scan_starts: vec![sws], + scan_ends: vec![swe], + isolation_mz: vec![mz_mean], + isolation_width: vec![mz_min - mz_max], + collision_energy: vec![mean_nce], + }; + expanded_quadrupole_settings.push(sub_quad_settings) + } + } + println!( + "Number of expanded quad settings {}", + expanded_quadrupole_settings.len() + ); + expanded_quadrupole_settings +} + +pub fn expand_quadrupole_settings( + window_groups: &[SqlWindowGroup], + quadrupole_settings: &[QuadrupoleSettings], + strategy: &QuadWindowExpansionStrategy, +) -> Vec { + // Read the 'NUM_SUB_SUB_SPLITS' from env variables ... default to 1 + // (for now) + let mut expanded_quadrupole_settings: Vec = vec![]; for window_group in window_groups { let window = window_group.window_group; @@ -90,20 +168,8 @@ pub fn expand_quadrupole_settings( for (sws, swe) in scan_range_subsplit( subwindow_scan_start, subwindow_scan_end, - &splits, + &strategy, ) { - assert!( - sws >= subwindow_scan_start, - "{} >= {} not true", - sws, - subwindow_scan_start - ); - assert!( - swe <= subwindow_scan_end, - "{} <= {} not true", - swe, - subwindow_scan_end - ); let sub_quad_settings = QuadrupoleSettings { index: frame, scan_starts: vec![sws], diff --git a/tests/frame_readers.rs b/tests/frame_readers.rs index 8804a32..1b541eb 100644 --- a/tests/frame_readers.rs +++ b/tests/frame_readers.rs @@ -1,5 +1,6 @@ use std::{path::Path, sync::Arc}; use timsrust::{ + io::readers::SpectrumReaderConfig, ms_data::{AcquisitionType, Frame, MSLevel, QuadrupoleSettings}, FileReader, }; @@ -19,7 +20,9 @@ fn tdf_reader_frames1() { .unwrap() .to_string(); let frames: Vec = - FileReader::new(&file_path).unwrap().read_all_ms1_frames(); + FileReader::new(&file_path, SpectrumReaderConfig::default()) + .unwrap() + .read_all_ms1_frames(); let expected: Vec = vec![ Frame { scan_offsets: vec![0, 1, 3, 6, 10], @@ -62,7 +65,9 @@ fn tdf_reader_frames2() { .unwrap() .to_string(); let frames: Vec = - FileReader::new(&file_path).unwrap().read_all_ms2_frames(); + FileReader::new(&file_path, SpectrumReaderConfig::default()) + .unwrap() + .read_all_ms2_frames(); let expected: Vec = vec![ // Frame::default(), Frame { diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index 085013f..133f0ce 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -1,5 +1,6 @@ use std::path::Path; use timsrust::{ + io::readers::SpectrumReaderConfig, ms_data::{Precursor, Spectrum}, FileReader, }; @@ -19,7 +20,9 @@ fn minitdf_reader() { .unwrap() .to_string(); let spectra: Vec = - FileReader::new(file_path).unwrap().read_all_spectra(); + FileReader::new(file_path, SpectrumReaderConfig::default()) + .unwrap() + .read_all_spectra(); let expected: Vec = vec![ Spectrum { mz_values: vec![100.0, 200.002, 300.03, 400.4], @@ -70,7 +73,9 @@ fn tdf_reader_dda() { .unwrap() .to_string(); let spectra: Vec = - FileReader::new(file_path).unwrap().read_all_spectra(); + FileReader::new(file_path, SpectrumReaderConfig::default()) + .unwrap() + .read_all_spectra(); let expected: Vec = vec![ Spectrum { mz_values: vec![199.7633445943076], From 903ed349a675678af7aed4a22b92af9cd4529f00 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 17 Jul 2024 11:17:23 +0200 Subject: [PATCH 03/27] FIX: typo in tdfblob reader --- src/io/readers/file_readers/tdf_blob_reader.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/io/readers/file_readers/tdf_blob_reader.rs b/src/io/readers/file_readers/tdf_blob_reader.rs index 1c2fe97..d8e64fc 100644 --- a/src/io/readers/file_readers/tdf_blob_reader.rs +++ b/src/io/readers/file_readers/tdf_blob_reader.rs @@ -75,7 +75,7 @@ impl IndexedTdfBlobReader { pub fn new( file_name: impl AsRef, binary_offsets: Vec, - ) -> Result { + ) -> Result { let blob_reader = TdfBlobReader::new(file_name)?; let reader = Self { binary_offsets, From c6f96de21116acb19f4f3bb31f8d308e32a0c8e0 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 17 Jul 2024 11:20:08 +0200 Subject: [PATCH 04/27] CHORE: rename unsuoppreted acquistion for precrsor reader --- src/io/readers/precursor_reader/tdf.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/io/readers/precursor_reader/tdf.rs b/src/io/readers/precursor_reader/tdf.rs index 30e1dd5..8c02020 100644 --- a/src/io/readers/precursor_reader/tdf.rs +++ b/src/io/readers/precursor_reader/tdf.rs @@ -41,9 +41,11 @@ impl TDFPrecursorReader { Box::new(DIATDFPrecursorReader::new(path)?) }, acquisition_type => { - return Err(TDFPrecursorReaderError::UnknownPrecursorType( - format!("{:?}", acquisition_type), - )) + return Err( + TDFPrecursorReaderError::UnsupportedAcquisition( + format!("{:?}", acquisition_type), + ), + ) }, }; let reader = Self { precursor_reader }; @@ -70,5 +72,5 @@ pub enum TDFPrecursorReaderError { #[error("{0}")] DIATDFPrecursorReaderError(#[from] DIATDFPrecursorReaderError), #[error("Invalid acquistion type for precursor reader: {0}")] - UnknownPrecursorType(String), + UnsupportedAcquisition(String), } From ea8be45b8f81c6522b53a7fc8fefafb5379d180f Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 17 Jul 2024 11:27:05 +0200 Subject: [PATCH 05/27] FEAT: implemented error propagation for spectrum readers --- benches/speed_performance.rs | 6 +- src/io/readers/spectrum_reader.rs | 21 +++++-- src/io/readers/spectrum_reader/minitdf.rs | 57 +++++++++++++------ src/io/readers/spectrum_reader/tdf.rs | 46 +++++++++++---- src/io/readers/spectrum_reader/tdf/dda.rs | 24 +++++--- src/io/readers/spectrum_reader/tdf/dia.rs | 27 ++++++--- .../spectrum_reader/tdf/raw_spectra.rs | 32 ++++++++--- tests/spectrum_readers.rs | 6 +- 8 files changed, 157 insertions(+), 62 deletions(-) diff --git a/benches/speed_performance.rs b/benches/speed_performance.rs index d9bd813..daab1cc 100644 --- a/benches/speed_performance.rs +++ b/benches/speed_performance.rs @@ -34,7 +34,7 @@ fn criterion_benchmark_dda(c: &mut Criterion) { group.significance_level(0.001).sample_size(10); let d_folder_name: &str = DDA_TEST; let frame_reader = FrameReader::new(d_folder_name).unwrap(); - let spectrum_reader = SpectrumReader::new(d_folder_name); + let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap(); group.bench_function("DDA read_all_frames 6m", |b| { b.iter(|| read_all_frames(black_box(&frame_reader))) }); @@ -56,7 +56,7 @@ fn criterion_benchmark_dia(c: &mut Criterion) { group.significance_level(0.001).sample_size(10); let d_folder_name: &str = DIA_TEST; let frame_reader = FrameReader::new(d_folder_name).unwrap(); - let spectrum_reader = SpectrumReader::new(d_folder_name); + let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap(); group.bench_function("DIA read_all_frames 6m", |b| { b.iter(|| read_all_frames(black_box(&frame_reader))) }); @@ -75,7 +75,7 @@ fn criterion_benchmark_syp(c: &mut Criterion) { group.significance_level(0.001).sample_size(10); let d_folder_name: &str = SYP_TEST; let frame_reader = FrameReader::new(d_folder_name).unwrap(); - let spectrum_reader = SpectrumReader::new(d_folder_name); + let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap(); group.bench_function("SYP read_all_frames 6m", |b| { b.iter(|| read_all_frames(black_box(&frame_reader))) }); diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index 336634a..d8851f9 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -2,10 +2,10 @@ mod minitdf; mod tdf; use core::fmt; -use minitdf::MiniTDFSpectrumReader; +use minitdf::{MiniTDFSpectrumReader, MiniTDFSpectrumReaderError}; use rayon::iter::{IntoParallelIterator, ParallelIterator}; use std::path::{Path, PathBuf}; -use tdf::TDFSpectrumReader; +use tdf::{TDFSpectrumReader, TDFSpectrumReaderError}; use crate::ms_data::Spectrum; @@ -20,14 +20,15 @@ impl fmt::Debug for SpectrumReader { } impl SpectrumReader { - pub fn new(path: impl AsRef) -> Self { + pub fn new(path: impl AsRef) -> Result { let spectrum_reader: Box = match path.as_ref().extension().and_then(|e| e.to_str()) { - Some("ms2") => Box::new(MiniTDFSpectrumReader::new(path)), - Some("d") => Box::new(TDFSpectrumReader::new(path)), + Some("ms2") => Box::new(MiniTDFSpectrumReader::new(path)?), + Some("d") => Box::new(TDFSpectrumReader::new(path)?), _ => panic!(), }; - Self { spectrum_reader } + let reader = Self { spectrum_reader }; + Ok(reader) } pub fn get(&self, index: usize) -> Spectrum { @@ -62,3 +63,11 @@ trait SpectrumReaderTrait: Sync { fn len(&self) -> usize; fn calibrate(&mut self); } + +#[derive(Debug, thiserror::Error)] +pub enum SpectrumReaderError { + #[error("{0}")] + MiniTDFSpectrumReaderError(#[from] MiniTDFSpectrumReaderError), + #[error("{0}")] + TDFSpectrumReaderError(#[from] TDFSpectrumReaderError), +} diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs index 83c50bd..9d3938b 100644 --- a/src/io/readers/spectrum_reader/minitdf.rs +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -4,11 +4,15 @@ use crate::{ io::readers::{ file_readers::{ parquet_reader::{ - precursors::ParquetPrecursor, ReadableParquetTable, + precursors::ParquetPrecursor, ParquetError, + ReadableParquetTable, + }, + sql_reader::SqlError, + tdf_blob_reader::{ + IndexedTdfBlobReader, IndexedTdfBlobReaderError, }, - tdf_blob_reader::IndexedTdfBlobReader, }, - PrecursorReader, + PrecursorReader, PrecursorReaderError, }, ms_data::Spectrum, utils::find_extension, @@ -25,31 +29,36 @@ pub struct MiniTDFSpectrumReader { } impl MiniTDFSpectrumReader { - pub fn new(path: impl AsRef) -> Self { - let parquet_file_name = - find_extension(&path, "ms2spectrum.parquet").unwrap(); - let precursor_reader = - PrecursorReader::new(&parquet_file_name).unwrap(); - let offsets = ParquetPrecursor::from_parquet_file(&parquet_file_name) - .unwrap() + pub fn new( + path: impl AsRef, + ) -> Result { + let parquet_file_name = find_extension(&path, "ms2spectrum.parquet") + .ok_or(MiniTDFSpectrumReaderError::FileNotFound( + "analysis.tdf".to_string(), + ))?; + let precursor_reader = PrecursorReader::new(&parquet_file_name)?; + let offsets = ParquetPrecursor::from_parquet_file(&parquet_file_name)? .iter() .map(|x| x.offset as usize) .collect(); let collision_energies = - ParquetPrecursor::from_parquet_file(&parquet_file_name) - .unwrap() + ParquetPrecursor::from_parquet_file(&parquet_file_name)? .iter() .map(|x| x.collision_energy) .collect(); - let bin_file_name = find_extension(&path, "bin").unwrap(); - let blob_reader = - IndexedTdfBlobReader::new(&bin_file_name, offsets).unwrap(); - Self { + let bin_file_name = find_extension(&path, "bin").ok_or( + MiniTDFSpectrumReaderError::FileNotFound( + "analysis.tdf".to_string(), + ), + )?; + let blob_reader = IndexedTdfBlobReader::new(&bin_file_name, offsets)?; + let reader = Self { path: path.as_ref().to_path_buf(), precursor_reader, blob_reader, collision_energies, - } + }; + Ok(reader) } } @@ -100,3 +109,17 @@ impl SpectrumReaderTrait for MiniTDFSpectrumReader { fn calibrate(&mut self) {} } + +#[derive(Debug, thiserror::Error)] +pub enum MiniTDFSpectrumReaderError { + #[error("{0}")] + SqlError(#[from] SqlError), + #[error("{0}")] + PrecursorReaderError(#[from] PrecursorReaderError), + #[error("{0}")] + ParquetError(#[from] ParquetError), + #[error("{0}")] + IndexedTdfBlobReaderError(#[from] IndexedTdfBlobReaderError), + #[error("{0}")] + FileNotFound(String), +} diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index 511730e..559010d 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -2,15 +2,16 @@ mod dda; mod dia; mod raw_spectra; -use raw_spectra::{RawSpectrum, RawSpectrumReader}; +use raw_spectra::{RawSpectrum, RawSpectrumReader, RawSpectrumReaderError}; use rayon::iter::{IntoParallelIterator, ParallelIterator}; use std::path::{Path, PathBuf}; use crate::{ domain_converters::{ConvertableDomain, Tof2MzConverter}, io::readers::{ - file_readers::sql_reader::SqlReader, FrameReader, MetadataReader, - PrecursorReader, + file_readers::sql_reader::{SqlError, SqlReader}, + FrameReader, FrameReaderError, MetadataReader, MetadataReaderError, + PrecursorReader, PrecursorReaderError, }, ms_data::Spectrum, utils::find_extension, @@ -31,25 +32,30 @@ pub struct TDFSpectrumReader { } impl TDFSpectrumReader { - pub fn new(path_name: impl AsRef) -> Self { - let frame_reader: FrameReader = FrameReader::new(&path_name).unwrap(); - let sql_path = find_extension(&path_name, "analysis.tdf").unwrap(); - let metadata = MetadataReader::new(&sql_path).unwrap(); + pub fn new( + path_name: impl AsRef, + ) -> Result { + let frame_reader: FrameReader = FrameReader::new(&path_name)?; + let sql_path = find_extension(&path_name, "analysis.tdf").ok_or( + TDFSpectrumReaderError::FileNotFound("analysis.tdf".to_string()), + )?; + let metadata = MetadataReader::new(&sql_path)?; let mz_reader: Tof2MzConverter = metadata.mz_converter; - let tdf_sql_reader = SqlReader::open(&sql_path).unwrap(); - let precursor_reader = PrecursorReader::new(&sql_path).unwrap(); + let tdf_sql_reader = SqlReader::open(&sql_path)?; + let precursor_reader = PrecursorReader::new(&sql_path)?; let acquisition_type = frame_reader.get_acquisition(); let raw_spectrum_reader = RawSpectrumReader::new( &tdf_sql_reader, frame_reader, acquisition_type, - ); - Self { + )?; + let reader = Self { path: path_name.as_ref().to_path_buf(), precursor_reader, mz_reader, raw_spectrum_reader, - } + }; + Ok(reader) } pub fn read_single_raw_spectrum(&self, index: usize) -> RawSpectrum { @@ -104,3 +110,19 @@ impl SpectrumReaderTrait for TDFSpectrumReader { } } } + +#[derive(Debug, thiserror::Error)] +pub enum TDFSpectrumReaderError { + #[error("{0}")] + SqlError(#[from] SqlError), + #[error("{0}")] + PrecursorReaderError(#[from] PrecursorReaderError), + #[error("{0}")] + MetadaReaderError(#[from] MetadataReaderError), + #[error("{0}")] + FrameReaderError(#[from] FrameReaderError), + #[error("{0}")] + RawSpectrumReaderError(#[from] RawSpectrumReaderError), + #[error("{0}")] + FileNotFound(String), +} diff --git a/src/io/readers/spectrum_reader/tdf/dda.rs b/src/io/readers/spectrum_reader/tdf/dda.rs index 93ab962..be674ab 100644 --- a/src/io/readers/spectrum_reader/tdf/dda.rs +++ b/src/io/readers/spectrum_reader/tdf/dda.rs @@ -1,7 +1,8 @@ use crate::{ io::readers::{ file_readers::sql_reader::{ - pasef_frame_msms::SqlPasefFrameMsMs, ReadableSqlTable, SqlReader, + pasef_frame_msms::SqlPasefFrameMsMs, ReadableSqlTable, SqlError, + SqlReader, }, FrameReader, }, @@ -19,13 +20,15 @@ pub struct DDARawSpectrumReader { } impl DDARawSpectrumReader { - pub fn new(tdf_sql_reader: &SqlReader, frame_reader: FrameReader) -> Self { - let pasef_frames = - SqlPasefFrameMsMs::from_sql_reader(&tdf_sql_reader).unwrap(); + pub fn new( + tdf_sql_reader: &SqlReader, + frame_reader: FrameReader, + ) -> Result { + let pasef_frames = SqlPasefFrameMsMs::from_sql_reader(&tdf_sql_reader)?; let pasef_precursors = &pasef_frames.iter().map(|x| x.precursor).collect(); let order: Vec = argsort(&pasef_precursors); - let max_precursor = pasef_precursors.iter().max().unwrap(); + let max_precursor = pasef_precursors.iter().max().unwrap(); // SqlReader cannot return empty vecs, so always succeeds let mut offsets: Vec = Vec::with_capacity(max_precursor + 1); offsets.push(0); for (offset, &index) in order.iter().enumerate().take(order.len() - 1) { @@ -35,12 +38,13 @@ impl DDARawSpectrumReader { } } offsets.push(order.len()); - Self { + let reader = Self { order, offsets, pasef_frames, frame_reader, - } + }; + Ok(reader) } pub fn iterate_over_pasef_frames( @@ -97,3 +101,9 @@ impl RawSpectrumReaderTrait for DDARawSpectrumReader { raw_spectrum } } + +#[derive(Debug, thiserror::Error)] +pub enum DDARawSpectrumReaderError { + #[error("{0}")] + SqlError(#[from] SqlError), +} diff --git a/src/io/readers/spectrum_reader/tdf/dia.rs b/src/io/readers/spectrum_reader/tdf/dia.rs index e0342e4..13e4a6a 100644 --- a/src/io/readers/spectrum_reader/tdf/dia.rs +++ b/src/io/readers/spectrum_reader/tdf/dia.rs @@ -1,9 +1,9 @@ use crate::{ io::readers::{ file_readers::sql_reader::{ - frame_groups::SqlWindowGroup, ReadableSqlTable, SqlReader, + frame_groups::SqlWindowGroup, ReadableSqlTable, SqlError, SqlReader, }, - FrameReader, QuadrupoleSettingsReader, + FrameReader, QuadrupoleSettingsReader, QuadrupoleSettingsReaderError, }, ms_data::QuadrupoleSettings, utils::vec_utils::group_and_sum, @@ -18,11 +18,13 @@ pub struct DIARawSpectrumReader { } impl DIARawSpectrumReader { - pub fn new(tdf_sql_reader: &SqlReader, frame_reader: FrameReader) -> Self { - let window_groups = - SqlWindowGroup::from_sql_reader(&tdf_sql_reader).unwrap(); + pub fn new( + tdf_sql_reader: &SqlReader, + frame_reader: FrameReader, + ) -> Result { + let window_groups = SqlWindowGroup::from_sql_reader(&tdf_sql_reader)?; let quadrupole_settings = - QuadrupoleSettingsReader::new(&tdf_sql_reader.get_path()).unwrap(); + QuadrupoleSettingsReader::new(&tdf_sql_reader.get_path())?; let mut expanded_quadrupole_settings: Vec = vec![]; for window_group in window_groups { let window = window_group.window_group; @@ -40,10 +42,11 @@ impl DIARawSpectrumReader { expanded_quadrupole_settings.push(sub_quad_settings) } } - Self { + let reader = Self { expanded_quadrupole_settings, frame_reader, - } + }; + Ok(reader) } } @@ -76,3 +79,11 @@ impl RawSpectrumReaderTrait for DIARawSpectrumReader { raw_spectrum } } + +#[derive(Debug, thiserror::Error)] +pub enum DIARawSpectrumReaderError { + #[error("{0}")] + SqlError(#[from] SqlError), + #[error("{0}")] + QuadrupoleSettingsReaderError(#[from] QuadrupoleSettingsReaderError), +} diff --git a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs index 7172940..8b78d65 100644 --- a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs +++ b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs @@ -7,7 +7,10 @@ use crate::{ utils::vec_utils::{filter_with_mask, find_sparse_local_maxima_mask}, }; -use super::{dda::DDARawSpectrumReader, dia::DIARawSpectrumReader}; +use super::{ + dda::{DDARawSpectrumReader, DDARawSpectrumReaderError}, + dia::{DIARawSpectrumReader, DIARawSpectrumReaderError}, +}; #[derive(Debug, PartialEq, Default, Clone)] pub(crate) struct RawSpectrum { @@ -91,20 +94,25 @@ impl RawSpectrumReader { tdf_sql_reader: &SqlReader, frame_reader: FrameReader, acquisition_type: AcquisitionType, - ) -> Self { + ) -> Result { let raw_spectrum_reader: Box = match acquisition_type { AcquisitionType::DDAPASEF => Box::new( - DDARawSpectrumReader::new(tdf_sql_reader, frame_reader), + DDARawSpectrumReader::new(tdf_sql_reader, frame_reader)?, ), AcquisitionType::DIAPASEF => Box::new( - DIARawSpectrumReader::new(tdf_sql_reader, frame_reader), + DIARawSpectrumReader::new(tdf_sql_reader, frame_reader)?, ), - _ => panic!(), + acquisition_type => { + return Err(RawSpectrumReaderError::UnsupportedAcquisition( + format!("{:?}", acquisition_type), + )) + }, }; - Self { + let reader = Self { raw_spectrum_reader, - } + }; + Ok(reader) } pub fn get(&self, index: usize) -> RawSpectrum { @@ -115,3 +123,13 @@ impl RawSpectrumReader { pub trait RawSpectrumReaderTrait: Sync { fn get(&self, index: usize) -> RawSpectrum; } + +#[derive(Debug, thiserror::Error)] +pub enum RawSpectrumReaderError { + #[error("{0}")] + DDARawSpectrumReaderError(#[from] DDARawSpectrumReaderError), + #[error("{0}")] + DIARawSpectrumReaderError(#[from] DIARawSpectrumReaderError), + #[error("Invalid acquistion type for Raw spectrum reader: {0}")] + UnsupportedAcquisition(String), +} diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index 3473d18..43ff4b8 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -18,7 +18,8 @@ fn minitdf_reader() { .to_str() .unwrap() .to_string(); - let spectra: Vec = SpectrumReader::new(file_path).get_all(); + let spectra: Vec = + SpectrumReader::new(file_path).unwrap().get_all(); let expected: Vec = vec![ Spectrum { mz_values: vec![100.0, 200.002, 300.03, 400.4], @@ -68,7 +69,8 @@ fn tdf_reader_dda() { .to_str() .unwrap() .to_string(); - let spectra: Vec = SpectrumReader::new(file_path).get_all(); + let spectra: Vec = + SpectrumReader::new(file_path).unwrap().get_all(); let expected: Vec = vec![ Spectrum { mz_values: vec![199.7633445943076], From 0b7f4b8cfdbafeb182f11c8cf7b4a530456c2e1c Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 17 Jul 2024 14:47:37 +0200 Subject: [PATCH 06/27] FEAT: added error propagation macro --- src/errors.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/errors.rs b/src/errors.rs index 44782f1..82ffef8 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -21,3 +21,16 @@ pub enum Error { // #[error("BinError: {0}")] // BinError(#[from] TdfBlobError), } + +#[macro_export] +macro_rules! propagated_error_enum { + ($name:ident, $($variant:ident),+) => { + #[derive(Debug, thiserror::Error)] + pub enum $name { + $( + #[error(transparent)] + $variant(#[from] $variant), + )+ + } + }; +} From 2adb6857d6fb3953c3e2f014a697de9094a0df84 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Thu, 18 Jul 2024 11:38:42 -0700 Subject: [PATCH 07/27] CHORE(wip): Partial addition of tests and removal of debug prints --- src/io/readers.rs | 1 + src/io/readers/tdf_utils.rs | 38 -------------- tests/frame_readers.rs | 102 +++++++++++++++++++++++++++++++++++- 3 files changed, 101 insertions(+), 40 deletions(-) diff --git a/src/io/readers.rs b/src/io/readers.rs index fd9f3ce..7e350a5 100644 --- a/src/io/readers.rs +++ b/src/io/readers.rs @@ -11,3 +11,4 @@ pub use metadata_reader::*; pub use precursor_reader::*; pub use quad_settings_reader::*; pub use spectrum_reader::*; +pub use tdf_utils::QuadWindowExpansionStrategy; diff --git a/src/io/readers/tdf_utils.rs b/src/io/readers/tdf_utils.rs index 6850aaf..93b9601 100644 --- a/src/io/readers/tdf_utils.rs +++ b/src/io/readers/tdf_utils.rs @@ -58,36 +58,6 @@ fn scan_range_subsplit( out } -fn expansion_strategy_from_env() -> QuadWindowExpansionStrategy { - let splits = match std::env::var("NUM_SUB_SUB_SPLITS") { - Ok(s) => match s.parse::() { - Ok(n) => { - println!("Number of splits: {} from env", n); - QuadWindowExpansionStrategy::Even(n) - }, - Err(_) => { - println!("Invalid number of splits: {}", s); - QuadWindowExpansionStrategy::None - }, - }, - Err(_) => match std::env::var("SUB_SPLITS_SPAN") { - Ok(s) => match s.parse::() { - Ok(n) => { - println!("Number of scans per split: {} from env", n); - QuadWindowExpansionStrategy::Uniform((n, n / 2)) - }, - Err(_) => { - println!("Invalid number of splits: {}", s); - QuadWindowExpansionStrategy::None - }, - }, - Err(_) => QuadWindowExpansionStrategy::None, - }, - }; - - splits -} - pub fn expand_window_settings( window_groups: &[SqlWindowGroup], quadrupole_settings: &[QuadrupoleSettings], @@ -142,10 +112,6 @@ pub fn expand_window_settings( expanded_quadrupole_settings.push(sub_quad_settings) } } - println!( - "Number of expanded quad settings {}", - expanded_quadrupole_settings.len() - ); expanded_quadrupole_settings } @@ -182,9 +148,5 @@ pub fn expand_quadrupole_settings( } } } - println!( - "Number of expanded quad settings {}", - expanded_quadrupole_settings.len() - ); expanded_quadrupole_settings } diff --git a/tests/frame_readers.rs b/tests/frame_readers.rs index c11d2ec..aab0dca 100644 --- a/tests/frame_readers.rs +++ b/tests/frame_readers.rs @@ -1,7 +1,8 @@ use std::{path::Path, sync::Arc}; use timsrust::{ io::readers::{ - FrameReader, FrameWindowSplittingStrategy, SpectrumReaderConfig, + FrameReader, FrameWindowSplittingStrategy, QuadWindowExpansionStrategy, + SpectrumReaderConfig, }, ms_data::{AcquisitionType, Frame, MSLevel, QuadrupoleSettings}, }; @@ -108,4 +109,101 @@ fn tdf_reader_frames2() { } } -// TODO test for DIA +#[test] +fn tdf_reader_frames_dia() { + let file_name = "dia_test.d"; + let file_path = get_local_directory() + .join(file_name) + .to_str() + .unwrap() + .to_string(); + let frames: Vec = + FrameReader::new(&file_path, FrameWindowSplittingStrategy::default()) + .unwrap() + .get_all_ms2() + .into_iter() + .map(|x| x.unwrap()) + .collect(); + + assert_eq!(frames.len(), 4); + for i in 0..frames.len() { + assert_eq!(frames[i].scan_offsets.len(), 710); + assert_eq!(frames[i].scan_offsets[0], 0); + assert_eq!( + frames[i].scan_offsets.last().unwrap(), + &frames[i].intensities.len() + ); + assert_eq!(frames[i].tof_indices.len(), frames[i].intensities.len()); + } + assert_eq!(&frames[0].tof_indices[0], &251695u32); + assert_eq!(&frames[0].intensities[0], &503392u32); + assert_eq!(&frames[0].tof_indices.len(), &754376); + assert_eq!(&frames[0].intensities.len(), &754376); + + assert_eq!(&frames[1].tof_indices[0], &1006071u32); + assert_eq!(&frames[1].intensities[0], &2012144u32); + assert_eq!(&frames[1].tof_indices.len(), &1257057); + assert_eq!(&frames[1].intensities.len(), &1257057); + + assert_eq!(&frames[2].tof_indices[0], &4022866u32); + assert_eq!(&frames[2].intensities[0], &8045734u32); + assert_eq!(&frames[2].tof_indices.len(), &2262419); + assert_eq!(&frames[2].intensities.len(), &2262419); + + assert_eq!(&frames[3].tof_indices[0], &6285285u32); + assert_eq!(&frames[3].intensities[0], &12570572u32); + assert_eq!(&frames[3].tof_indices.len(), &2765100); + assert_eq!(&frames[3].intensities.len(), &2765100); +} + +#[test] +fn test_dia_even() { + let file_name = "dia_test.d"; + let file_path = get_local_directory() + .join(file_name) + .to_str() + .unwrap() + .to_string(); + + for i in 1..3 { + let frames: Vec = FrameReader::new( + &file_path, + FrameWindowSplittingStrategy::Quadrupole( + QuadWindowExpansionStrategy::Even(i), + ), + ) + .unwrap() + .get_all_ms2() + .into_iter() + .map(|x| x.unwrap()) + .collect(); + + assert_eq!(frames.len(), 4 * i); + } +} + +#[test] +fn test_dia_uniform() { + let file_name = "dia_test.d"; + let file_path = get_local_directory() + .join(file_name) + .to_str() + .unwrap() + .to_string(); + + for i in [100, 200, 300] { + let frames: Vec = FrameReader::new( + &file_path, + FrameWindowSplittingStrategy::Quadrupole( + QuadWindowExpansionStrategy::Uniform((i, i)), + ), + ) + .unwrap() + .get_all_ms2() + .into_iter() + .map(|x| x.unwrap()) + .collect(); + + assert_eq!(frames.len(), 4 * i); + } +} From 1261967ed200cd502b921134ded43a04d679ce2a Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Thu, 18 Jul 2024 12:19:00 -0700 Subject: [PATCH 08/27] CHORE: More work towards testing splitting methods --- src/io/readers/spectrum_reader/tdf/dia.rs | 8 --- src/io/readers/tdf_utils.rs | 21 +++++++ tests/frame_readers.rs | 52 ----------------- tests/spectrum_readers.rs | 69 ++++++++++++++++++++++- 4 files changed, 88 insertions(+), 62 deletions(-) diff --git a/src/io/readers/spectrum_reader/tdf/dia.rs b/src/io/readers/spectrum_reader/tdf/dia.rs index f3f9d19..bf016f9 100644 --- a/src/io/readers/spectrum_reader/tdf/dia.rs +++ b/src/io/readers/spectrum_reader/tdf/dia.rs @@ -51,14 +51,6 @@ impl DIARawSpectrumReader { impl RawSpectrumReaderTrait for DIARawSpectrumReader { fn get(&self, index: usize) -> RawSpectrum { let quad_settings = &self.expanded_quadrupole_settings[index]; - if index < 10 { - println!("{}", index); - println!("{:?}", quad_settings); - } - if index > (self.expanded_quadrupole_settings.len() - 10) { - println!("{}", index); - println!("{:?}", quad_settings); - } let collision_energy = quad_settings.collision_energy[0]; let isolation_mz = quad_settings.isolation_mz[0]; diff --git a/src/io/readers/tdf_utils.rs b/src/io/readers/tdf_utils.rs index 93b9601..4e59968 100644 --- a/src/io/readers/tdf_utils.rs +++ b/src/io/readers/tdf_utils.rs @@ -3,6 +3,24 @@ use crate::ms_data::QuadrupoleSettings; type SpanStep = (usize, usize); +/// Strategy for expanding quadrupole settings +/// +/// This enum is used to determine how to expand quadrupole settings +/// when reading in DIA data. And exporting spectra (not frames RN). +/// +/// # Variants +/// +/// For example if we have a window with scan start 50 and end 500 +/// +/// * `None` - Do not expand quadrupole settings; use the original settings +/// * `Even(usize)` - Split the quadrupole settings into `usize` evenly spaced +/// subwindows; e.g. if `usize` is 2, the window will be split into 2 subwindows +/// of equal width. +/// * `Uniform(SpanStep)` - Split the quadrupole settings into subwindows of +/// width `SpanStep.0` and step `SpanStep.1`; e.g. if `SpanStep` is (100, 50), +/// the window will be split into subwindows of width 100 and step 50 between their +/// scan start and end. +/// #[derive(Debug, Copy, Clone)] pub enum QuadWindowExpansionStrategy { None, @@ -41,6 +59,9 @@ fn scan_range_subsplit( curr_start += step; curr_end += step; } + if curr_start < end { + out.push((curr_start, end)); + } out }, }; diff --git a/tests/frame_readers.rs b/tests/frame_readers.rs index aab0dca..400ebf7 100644 --- a/tests/frame_readers.rs +++ b/tests/frame_readers.rs @@ -155,55 +155,3 @@ fn tdf_reader_frames_dia() { assert_eq!(&frames[3].tof_indices.len(), &2765100); assert_eq!(&frames[3].intensities.len(), &2765100); } - -#[test] -fn test_dia_even() { - let file_name = "dia_test.d"; - let file_path = get_local_directory() - .join(file_name) - .to_str() - .unwrap() - .to_string(); - - for i in 1..3 { - let frames: Vec = FrameReader::new( - &file_path, - FrameWindowSplittingStrategy::Quadrupole( - QuadWindowExpansionStrategy::Even(i), - ), - ) - .unwrap() - .get_all_ms2() - .into_iter() - .map(|x| x.unwrap()) - .collect(); - - assert_eq!(frames.len(), 4 * i); - } -} - -#[test] -fn test_dia_uniform() { - let file_name = "dia_test.d"; - let file_path = get_local_directory() - .join(file_name) - .to_str() - .unwrap() - .to_string(); - - for i in [100, 200, 300] { - let frames: Vec = FrameReader::new( - &file_path, - FrameWindowSplittingStrategy::Quadrupole( - QuadWindowExpansionStrategy::Uniform((i, i)), - ), - ) - .unwrap() - .get_all_ms2() - .into_iter() - .map(|x| x.unwrap()) - .collect(); - - assert_eq!(frames.len(), 4 * i); - } -} diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index 7132d48..85488c8 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -1,7 +1,9 @@ use std::path::Path; use timsrust::{ - io::readers::SpectrumReader, - io::readers::SpectrumReaderConfig, + io::readers::{ + FrameWindowSplittingStrategy, QuadWindowExpansionStrategy, + SpectrumProcessingParams, SpectrumReader, SpectrumReaderConfig, + }, ms_data::{Precursor, Spectrum}, }; @@ -131,3 +133,66 @@ fn tdf_reader_dda() { assert_eq!(spectra[i], expected[i]); } } + +#[test] +fn test_dia_even() { + let file_name = "dia_test.d"; + let file_path = get_local_directory() + .join(file_name) + .to_str() + .unwrap() + .to_string(); + + for i in 1..3 { + let frames: Vec = SpectrumReader::new( + &file_path, + SpectrumReaderConfig { + frame_splitting_params: + FrameWindowSplittingStrategy::Quadrupole( + QuadWindowExpansionStrategy::Even(i), + ), + spectrum_processing_params: SpectrumProcessingParams::default(), + }, + ) + .get_all(); + + println!(">>>>> EVEN {:?}", frames.len()); + + // 4 frames, 2 windows in each, i splits/window + assert_eq!(frames.len(), 4 * 2 * i); + } +} + +#[test] +fn test_dia_uniform() { + let file_name = "dia_test.d"; + let file_path = get_local_directory() + .join(file_name) + .to_str() + .unwrap() + .to_string(); + + for i in [100, 200, 300] { + let frames: Vec = SpectrumReader::new( + &file_path, + SpectrumReaderConfig { + frame_splitting_params: FrameWindowSplittingStrategy::Window( + QuadWindowExpansionStrategy::Uniform((i, i)), + ), + spectrum_processing_params: SpectrumProcessingParams::default(), + }, + ) + .get_all(); + + println!(">>>>> UNIFORM {} > {:?}", i, frames.len()); + for f in frames.iter() { + println!("{:?}", f.precursor); + } + + // Not all frames have scan windows from 0 to 709 ... so ... I need to think + // on how to express this in the test + // assert_eq!(frames.len(), 4 * ((709 / i) + 1)); + assert!(frames.len() > (709 / i)); + assert!(frames.len() < 3 * ((709 / i) + 1)); + } +} From cfa0574664d4229c1cd42fd5d73310dfbb48ee8b Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Thu, 25 Jul 2024 15:29:31 +0200 Subject: [PATCH 09/27] CHORE: resolve merge conflicts --- src/io/readers.rs | 2 + src/io/readers/frame_reader.rs | 10 +- src/io/readers/precursor_reader.rs | 18 +- src/io/readers/precursor_reader/tdf.rs | 37 +++- src/io/readers/precursor_reader/tdf/dia.rs | 36 ++-- src/io/readers/spectrum_reader.rs | 39 +++- src/io/readers/spectrum_reader/minitdf.rs | 2 +- src/io/readers/spectrum_reader/tdf.rs | 34 ++-- src/io/readers/spectrum_reader/tdf/dda.rs | 4 + src/io/readers/spectrum_reader/tdf/dia.rs | 40 ++-- .../spectrum_reader/tdf/raw_spectra.rs | 5 + src/io/readers/tdf_utils.rs | 173 ++++++++++++++++++ src/utils/vec_utils.rs | 4 +- tests/frame_readers.rs | 78 ++++++-- tests/spectrum_readers.rs | 76 +++++++- 15 files changed, 478 insertions(+), 80 deletions(-) create mode 100644 src/io/readers/tdf_utils.rs diff --git a/src/io/readers.rs b/src/io/readers.rs index 03d5248..7e350a5 100644 --- a/src/io/readers.rs +++ b/src/io/readers.rs @@ -4,9 +4,11 @@ mod metadata_reader; mod precursor_reader; mod quad_settings_reader; mod spectrum_reader; +mod tdf_utils; pub use frame_reader::*; pub use metadata_reader::*; pub use precursor_reader::*; pub use quad_settings_reader::*; pub use spectrum_reader::*; +pub use tdf_utils::QuadWindowExpansionStrategy; diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index af9b713..f4b0d48 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -19,7 +19,8 @@ use super::{ }, tdf_blob_reader::{TdfBlob, TdfBlobReader, TdfBlobReaderError}, }, - QuadrupoleSettingsReader, QuadrupoleSettingsReaderError, + FrameWindowSplittingStrategy, QuadrupoleSettingsReader, + QuadrupoleSettingsReaderError, }; #[derive(Debug)] @@ -30,10 +31,14 @@ pub struct FrameReader { acquisition: AcquisitionType, window_groups: Vec, quadrupole_settings: Vec>, + pub splitting_strategy: FrameWindowSplittingStrategy, } impl FrameReader { - pub fn new(path: impl AsRef) -> Result { + pub fn new( + path: impl AsRef, + config: FrameWindowSplittingStrategy, + ) -> Result { let sql_path = find_extension(&path, "analysis.tdf").ok_or( FrameReaderError::FileNotFound("analysis.tdf".to_string()), )?; @@ -74,6 +79,7 @@ impl FrameReader { .into_iter() .map(|x| Arc::new(x)) .collect(), + splitting_strategy: config, }; Ok(reader) } diff --git a/src/io/readers/precursor_reader.rs b/src/io/readers/precursor_reader.rs index 5544dce..b755a25 100644 --- a/src/io/readers/precursor_reader.rs +++ b/src/io/readers/precursor_reader.rs @@ -9,6 +9,8 @@ use tdf::{TDFPrecursorReader, TDFPrecursorReaderError}; use crate::ms_data::Precursor; +use super::FrameWindowSplittingStrategy; + pub struct PrecursorReader { precursor_reader: Box, } @@ -20,11 +22,19 @@ impl fmt::Debug for PrecursorReader { } impl PrecursorReader { - pub fn new(path: impl AsRef) -> Result { + pub fn new( + path: impl AsRef, + config: Option, + ) -> Result { + let tmp = path.as_ref().extension().and_then(|e| e.to_str()); let precursor_reader: Box = - match path.as_ref().extension().and_then(|e| e.to_str()) { - Some("parquet") => Box::new(MiniTDFPrecursorReader::new(path)?), - Some("tdf") => Box::new(TDFPrecursorReader::new(path)?), + match (tmp, config) { + (Some("parquet"), None) => { + Box::new(MiniTDFPrecursorReader::new(path)?) + }, + (Some("tdf"), strat) => { + Box::new(TDFPrecursorReader::new(path, strat)?) + }, _ => panic!(), }; let reader = Self { precursor_reader }; diff --git a/src/io/readers/precursor_reader/tdf.rs b/src/io/readers/precursor_reader/tdf.rs index 8c02020..1795e4c 100644 --- a/src/io/readers/precursor_reader/tdf.rs +++ b/src/io/readers/precursor_reader/tdf.rs @@ -7,7 +7,10 @@ use dda::{DDATDFPrecursorReader, DDATDFPrecursorReaderError}; use dia::{DIATDFPrecursorReader, DIATDFPrecursorReaderError}; use crate::{ - io::readers::file_readers::sql_reader::{SqlError, SqlReader}, + io::readers::{ + file_readers::sql_reader::{SqlError, SqlReader}, + FrameWindowSplittingStrategy, + }, ms_data::{AcquisitionType, Precursor}, }; @@ -20,6 +23,7 @@ pub struct TDFPrecursorReader { impl TDFPrecursorReader { pub fn new( path: impl AsRef, + splitting_strategy: Option, ) -> Result { let sql_path = path.as_ref(); let tdf_sql_reader = SqlReader::open(sql_path)?; @@ -33,17 +37,36 @@ impl TDFPrecursorReader { AcquisitionType::Unknown }; let precursor_reader: Box = - match acquisition_type { - AcquisitionType::DDAPASEF => { + match (acquisition_type, splitting_strategy) { + (AcquisitionType::DDAPASEF, None) => { Box::new(DDATDFPrecursorReader::new(path)?) }, - AcquisitionType::DIAPASEF => { - Box::new(DIATDFPrecursorReader::new(path)?) + ( + AcquisitionType::DDAPASEF, + Some(FrameWindowSplittingStrategy::None), + ) => { + // Not 100% sure when this happens ... + // By this I mean generating a Some(None) + // ./tests/frame_readers.rs:60:25 generates it. + // JSPP - 2024-Jul-16 + Box::new(DDATDFPrecursorReader::new(path)?) + }, + (AcquisitionType::DIAPASEF, Some(splitting_strat)) => { + Box::new(DIATDFPrecursorReader::new(path, splitting_strat)?) + }, + (AcquisitionType::DIAPASEF, None) => { + Box::new(DIATDFPrecursorReader::new( + path, + FrameWindowSplittingStrategy::None, + )?) }, - acquisition_type => { + (acq_type, acq_config) => { return Err( TDFPrecursorReaderError::UnsupportedAcquisition( - format!("{:?}", acquisition_type), + format!( + "{:?} + {:?}", + acquisition_type, acq_config + ), ), ) }, diff --git a/src/io/readers/precursor_reader/tdf/dia.rs b/src/io/readers/precursor_reader/tdf/dia.rs index 9531fdd..2b4dae0 100644 --- a/src/io/readers/precursor_reader/tdf/dia.rs +++ b/src/io/readers/precursor_reader/tdf/dia.rs @@ -1,5 +1,9 @@ use std::path::Path; +use crate::io::readers::tdf_utils::{ + expand_quadrupole_settings, expand_window_settings, +}; +use crate::io::readers::FrameWindowSplittingStrategy; use crate::{ domain_converters::{ ConvertableDomain, Frame2RtConverter, Scan2ImConverter, @@ -26,6 +30,7 @@ pub struct DIATDFPrecursorReader { impl DIATDFPrecursorReader { pub fn new( path: impl AsRef, + splitting_strat: FrameWindowSplittingStrategy, ) -> Result { let sql_path = path.as_ref(); let tdf_sql_reader = SqlReader::open(sql_path)?; @@ -35,23 +40,20 @@ impl DIATDFPrecursorReader { let window_groups = SqlWindowGroup::from_sql_reader(&tdf_sql_reader)?; let quadrupole_settings = QuadrupoleSettingsReader::new(tdf_sql_reader.get_path())?; - let mut expanded_quadrupole_settings: Vec = vec![]; - for window_group in window_groups { - let window = window_group.window_group; - let frame = window_group.frame; - let group = &quadrupole_settings[window as usize - 1]; - for sub_window in 0..group.isolation_mz.len() { - let sub_quad_settings = QuadrupoleSettings { - index: frame, - scan_starts: vec![group.scan_starts[sub_window]], - scan_ends: vec![group.scan_ends[sub_window]], - isolation_mz: vec![group.isolation_mz[sub_window]], - isolation_width: vec![group.isolation_width[sub_window]], - collision_energy: vec![group.collision_energy[sub_window]], - }; - expanded_quadrupole_settings.push(sub_quad_settings) - } - } + let expanded_quadrupole_settings = match splitting_strat { + FrameWindowSplittingStrategy::None => quadrupole_settings, + FrameWindowSplittingStrategy::Quadrupole(x) => { + expand_quadrupole_settings( + &window_groups, + &quadrupole_settings, + &x, + ) + }, + FrameWindowSplittingStrategy::Window(x) => { + expand_window_settings(&window_groups, &quadrupole_settings, &x) + }, + }; + let reader = Self { expanded_quadrupole_settings, rt_converter, diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index d8851f9..b158afc 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -7,12 +7,44 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator}; use std::path::{Path, PathBuf}; use tdf::{TDFSpectrumReader, TDFSpectrumReaderError}; +use crate::io::readers::tdf_utils::QuadWindowExpansionStrategy; use crate::ms_data::Spectrum; pub struct SpectrumReader { spectrum_reader: Box, } +#[derive(Debug)] +pub struct SpectrumProcessingParams { + smoothing_window: u32, + centroiding_window: u32, + calibration_tolerance: f64, +} + +impl Default for SpectrumProcessingParams { + fn default() -> Self { + Self { + smoothing_window: 1, + centroiding_window: 1, + calibration_tolerance: 0.1, + } + } +} + +#[derive(Debug, Clone, Copy, Default)] +pub enum FrameWindowSplittingStrategy { + #[default] + None, + Quadrupole(QuadWindowExpansionStrategy), + Window(QuadWindowExpansionStrategy), +} + +#[derive(Debug, Default)] +pub struct SpectrumReaderConfig { + pub spectrum_processing_params: SpectrumProcessingParams, + pub frame_splitting_params: FrameWindowSplittingStrategy, +} + impl fmt::Debug for SpectrumReader { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "SpectrumReader {{ /* fields omitted */ }}") @@ -20,11 +52,14 @@ impl fmt::Debug for SpectrumReader { } impl SpectrumReader { - pub fn new(path: impl AsRef) -> Result { + pub fn new( + path: impl AsRef, + config: SpectrumReaderConfig, + ) -> Result { let spectrum_reader: Box = match path.as_ref().extension().and_then(|e| e.to_str()) { Some("ms2") => Box::new(MiniTDFSpectrumReader::new(path)?), - Some("d") => Box::new(TDFSpectrumReader::new(path)?), + Some("d") => Box::new(TDFSpectrumReader::new(path, config)?), _ => panic!(), }; let reader = Self { spectrum_reader }; diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs index 9d3938b..5c24e77 100644 --- a/src/io/readers/spectrum_reader/minitdf.rs +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -36,7 +36,7 @@ impl MiniTDFSpectrumReader { .ok_or(MiniTDFSpectrumReaderError::FileNotFound( "analysis.tdf".to_string(), ))?; - let precursor_reader = PrecursorReader::new(&parquet_file_name)?; + let precursor_reader = PrecursorReader::new(&parquet_file_name, None)?; let offsets = ParquetPrecursor::from_parquet_file(&parquet_file_name)? .iter() .map(|x| x.offset as usize) diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index 559010d..2f18147 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -17,11 +17,7 @@ use crate::{ utils::find_extension, }; -use super::SpectrumReaderTrait; - -const SMOOTHING_WINDOW: u32 = 1; -const CENTROIDING_WINDOW: u32 = 1; -const CALIBRATION_TOLERANCE: f64 = 0.1; +use super::{SpectrumReaderConfig, SpectrumReaderTrait}; #[derive(Debug)] pub struct TDFSpectrumReader { @@ -29,20 +25,26 @@ pub struct TDFSpectrumReader { precursor_reader: PrecursorReader, mz_reader: Tof2MzConverter, raw_spectrum_reader: RawSpectrumReader, + config: SpectrumReaderConfig, } impl TDFSpectrumReader { pub fn new( path_name: impl AsRef, + config: SpectrumReaderConfig, ) -> Result { - let frame_reader: FrameReader = FrameReader::new(&path_name)?; + let frame_reader: FrameReader = + FrameReader::new(&path_name, config.frame_splitting_params)?; let sql_path = find_extension(&path_name, "analysis.tdf").ok_or( TDFSpectrumReaderError::FileNotFound("analysis.tdf".to_string()), )?; let metadata = MetadataReader::new(&sql_path)?; let mz_reader: Tof2MzConverter = metadata.mz_converter; let tdf_sql_reader = SqlReader::open(&sql_path)?; - let precursor_reader = PrecursorReader::new(&sql_path)?; + let precursor_reader = PrecursorReader::new( + &sql_path, + Some(config.frame_splitting_params), + )?; let acquisition_type = frame_reader.get_acquisition(); let raw_spectrum_reader = RawSpectrumReader::new( &tdf_sql_reader, @@ -54,6 +56,7 @@ impl TDFSpectrumReader { precursor_reader, mz_reader, raw_spectrum_reader, + config, }; Ok(reader) } @@ -61,8 +64,8 @@ impl TDFSpectrumReader { pub fn read_single_raw_spectrum(&self, index: usize) -> RawSpectrum { let raw_spectrum = self.raw_spectrum_reader.get(index); raw_spectrum - .smooth(SMOOTHING_WINDOW) - .centroid(CENTROIDING_WINDOW) + .smooth(self.config.spectrum_processing_params.smoothing_window) + .centroid(self.config.spectrum_processing_params.centroiding_window) } } @@ -77,7 +80,11 @@ impl SpectrumReaderTrait for TDFSpectrumReader { } fn len(&self) -> usize { - self.precursor_reader.len() + debug_assert_eq!( + self.precursor_reader.len(), + self.raw_spectrum_reader.len() + ); + self.raw_spectrum_reader.len() } fn get_path(&self) -> PathBuf { @@ -94,7 +101,12 @@ impl SpectrumReaderTrait for TDFSpectrumReader { let mut result: Vec<(f64, u32)> = vec![]; for &tof_index in spectrum.tof_indices.iter() { let mz = self.mz_reader.convert(tof_index); - if (mz - precursor_mz).abs() < CALIBRATION_TOLERANCE { + if (mz - precursor_mz).abs() + < self + .config + .spectrum_processing_params + .calibration_tolerance + { let hit = (precursor_mz, tof_index); result.push(hit); } diff --git a/src/io/readers/spectrum_reader/tdf/dda.rs b/src/io/readers/spectrum_reader/tdf/dda.rs index be674ab..be27104 100644 --- a/src/io/readers/spectrum_reader/tdf/dda.rs +++ b/src/io/readers/spectrum_reader/tdf/dda.rs @@ -100,6 +100,10 @@ impl RawSpectrumReaderTrait for DDARawSpectrumReader { }; raw_spectrum } + + fn len(&self) -> usize { + self.offsets.len() - 1 + } } #[derive(Debug, thiserror::Error)] diff --git a/src/io/readers/spectrum_reader/tdf/dia.rs b/src/io/readers/spectrum_reader/tdf/dia.rs index 13e4a6a..5962aa0 100644 --- a/src/io/readers/spectrum_reader/tdf/dia.rs +++ b/src/io/readers/spectrum_reader/tdf/dia.rs @@ -1,3 +1,7 @@ +use crate::io::readers::tdf_utils::{ + expand_quadrupole_settings, expand_window_settings, +}; +use crate::io::readers::FrameWindowSplittingStrategy; use crate::{ io::readers::{ file_readers::sql_reader::{ @@ -25,23 +29,20 @@ impl DIARawSpectrumReader { let window_groups = SqlWindowGroup::from_sql_reader(&tdf_sql_reader)?; let quadrupole_settings = QuadrupoleSettingsReader::new(&tdf_sql_reader.get_path())?; - let mut expanded_quadrupole_settings: Vec = vec![]; - for window_group in window_groups { - let window = window_group.window_group; - let frame = window_group.frame; - let group = &quadrupole_settings[window as usize - 1]; - for sub_window in 0..group.isolation_mz.len() { - let sub_quad_settings = QuadrupoleSettings { - index: frame, - scan_starts: vec![group.scan_starts[sub_window]], - scan_ends: vec![group.scan_ends[sub_window]], - isolation_mz: vec![group.isolation_mz[sub_window]], - isolation_width: vec![group.isolation_width[sub_window]], - collision_energy: vec![group.collision_energy[sub_window]], - }; - expanded_quadrupole_settings.push(sub_quad_settings) - } - } + let expanded_quadrupole_settings = match frame_reader.splitting_strategy + { + FrameWindowSplittingStrategy::None => quadrupole_settings, + FrameWindowSplittingStrategy::Quadrupole(x) => { + expand_quadrupole_settings( + &window_groups, + &quadrupole_settings, + &x, + ) + }, + FrameWindowSplittingStrategy::Window(x) => { + expand_window_settings(&window_groups, &quadrupole_settings, &x) + }, + }; let reader = Self { expanded_quadrupole_settings, frame_reader, @@ -53,6 +54,7 @@ impl DIARawSpectrumReader { impl RawSpectrumReaderTrait for DIARawSpectrumReader { fn get(&self, index: usize) -> RawSpectrum { let quad_settings = &self.expanded_quadrupole_settings[index]; + let collision_energy = quad_settings.collision_energy[0]; let isolation_mz = quad_settings.isolation_mz[0]; let isolation_width = quad_settings.isolation_width[0]; @@ -78,6 +80,10 @@ impl RawSpectrumReaderTrait for DIARawSpectrumReader { }; raw_spectrum } + + fn len(&self) -> usize { + self.expanded_quadrupole_settings.len() + } } #[derive(Debug, thiserror::Error)] diff --git a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs index 8b78d65..d2239b0 100644 --- a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs +++ b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs @@ -118,10 +118,15 @@ impl RawSpectrumReader { pub fn get(&self, index: usize) -> RawSpectrum { self.raw_spectrum_reader.get(index) } + + pub fn len(&self) -> usize { + self.raw_spectrum_reader.len() + } } pub trait RawSpectrumReaderTrait: Sync { fn get(&self, index: usize) -> RawSpectrum; + fn len(&self) -> usize; } #[derive(Debug, thiserror::Error)] diff --git a/src/io/readers/tdf_utils.rs b/src/io/readers/tdf_utils.rs new file mode 100644 index 0000000..4e59968 --- /dev/null +++ b/src/io/readers/tdf_utils.rs @@ -0,0 +1,173 @@ +use crate::io::readers::file_readers::sql_reader::frame_groups::SqlWindowGroup; +use crate::ms_data::QuadrupoleSettings; + +type SpanStep = (usize, usize); + +/// Strategy for expanding quadrupole settings +/// +/// This enum is used to determine how to expand quadrupole settings +/// when reading in DIA data. And exporting spectra (not frames RN). +/// +/// # Variants +/// +/// For example if we have a window with scan start 50 and end 500 +/// +/// * `None` - Do not expand quadrupole settings; use the original settings +/// * `Even(usize)` - Split the quadrupole settings into `usize` evenly spaced +/// subwindows; e.g. if `usize` is 2, the window will be split into 2 subwindows +/// of equal width. +/// * `Uniform(SpanStep)` - Split the quadrupole settings into subwindows of +/// width `SpanStep.0` and step `SpanStep.1`; e.g. if `SpanStep` is (100, 50), +/// the window will be split into subwindows of width 100 and step 50 between their +/// scan start and end. +/// +#[derive(Debug, Copy, Clone)] +pub enum QuadWindowExpansionStrategy { + None, + Even(usize), + Uniform(SpanStep), +} + +fn scan_range_subsplit( + start: usize, + end: usize, + strategy: &QuadWindowExpansionStrategy, +) -> Vec<(usize, usize)> { + let out = match strategy { + QuadWindowExpansionStrategy::None => { + vec![(start, end)] + }, + QuadWindowExpansionStrategy::Even(num_splits) => { + let sub_subwindow_width = (end - start) / (num_splits + 1); + let mut out = Vec::new(); + for sub_subwindow in 0..num_splits.clone() { + let sub_subwindow_scan_start = + start + (sub_subwindow_width * sub_subwindow); + let sub_subwindow_scan_end = + start + (sub_subwindow_width * (sub_subwindow + 2)); + + out.push((sub_subwindow_scan_start, sub_subwindow_scan_end)) + } + out + }, + QuadWindowExpansionStrategy::Uniform((span, step)) => { + let mut curr_start = start.clone(); + let mut curr_end = start + span; + let mut out = Vec::new(); + while curr_end < end { + out.push((curr_start, curr_end)); + curr_start += step; + curr_end += step; + } + if curr_start < end { + out.push((curr_start, end)); + } + out + }, + }; + + debug_assert!( + out.iter().all(|(s, e)| s < e), + "Invalid scan range: {:?}", + out + ); + debug_assert!( + out.iter().all(|(s, e)| *s >= start && *e <= end), + "Invalid scan range: {:?}", + out + ); + out +} + +pub fn expand_window_settings( + window_groups: &[SqlWindowGroup], + quadrupole_settings: &[QuadrupoleSettings], + strategy: &QuadWindowExpansionStrategy, +) -> Vec { + let mut expanded_quadrupole_settings: Vec = vec![]; + for window_group in window_groups { + let window = window_group.window_group; + let frame = window_group.frame; + let group = &quadrupole_settings[window as usize - 1]; + let window_group_start = + group.scan_starts.iter().min().unwrap().clone(); + let window_group_end = group.scan_ends.iter().max().unwrap().clone(); + + for (sws, swe) in + scan_range_subsplit(window_group_start, window_group_end, &strategy) + { + let mut mz_sum = 0.0; + let mut mz_min = std::f64::MAX; + let mut mz_max = std::f64::MIN; + let mut nce_sum = 0.0; + let mut num_added = 0; + + for i in 0..group.isolation_mz.len() { + // Should I be checking here for overlap instead of full containment? + if sws <= group.scan_starts[i] && swe >= group.scan_ends[i] { + mz_sum += group.isolation_mz[i]; + mz_min = mz_min.min( + group.isolation_mz[i] + - (group.isolation_width[i] / 2.0), + ); + mz_max = mz_max.max( + group.isolation_mz[i] + + (group.isolation_width[i] / 2.0), + ); + nce_sum += group.collision_energy[i]; + num_added += 1; + } + } + + let mz_mean = mz_sum / num_added as f64; + let mean_nce = nce_sum / num_added as f64; + + let sub_quad_settings = QuadrupoleSettings { + index: frame, + scan_starts: vec![sws], + scan_ends: vec![swe], + isolation_mz: vec![mz_mean], + isolation_width: vec![mz_min - mz_max], + collision_energy: vec![mean_nce], + }; + expanded_quadrupole_settings.push(sub_quad_settings) + } + } + expanded_quadrupole_settings +} + +pub fn expand_quadrupole_settings( + window_groups: &[SqlWindowGroup], + quadrupole_settings: &[QuadrupoleSettings], + strategy: &QuadWindowExpansionStrategy, +) -> Vec { + // Read the 'NUM_SUB_SUB_SPLITS' from env variables ... default to 1 + // (for now) + + let mut expanded_quadrupole_settings: Vec = vec![]; + for window_group in window_groups { + let window = window_group.window_group; + let frame = window_group.frame; + let group = &quadrupole_settings[window as usize - 1]; + for sub_window in 0..group.isolation_mz.len() { + let subwindow_scan_start = group.scan_starts[sub_window]; + let subwindow_scan_end = group.scan_ends[sub_window]; + for (sws, swe) in scan_range_subsplit( + subwindow_scan_start, + subwindow_scan_end, + &strategy, + ) { + let sub_quad_settings = QuadrupoleSettings { + index: frame, + scan_starts: vec![sws], + scan_ends: vec![swe], + isolation_mz: vec![group.isolation_mz[sub_window]], + isolation_width: vec![group.isolation_width[sub_window]], + collision_energy: vec![group.collision_energy[sub_window]], + }; + expanded_quadrupole_settings.push(sub_quad_settings) + } + } + } + expanded_quadrupole_settings +} diff --git a/src/utils/vec_utils.rs b/src/utils/vec_utils.rs index 724fc3c..3ee53c0 100644 --- a/src/utils/vec_utils.rs +++ b/src/utils/vec_utils.rs @@ -12,8 +12,8 @@ pub fn group_and_sum + Copy>( return (vec![], vec![]); } let order: Vec = argsort(&groups); - let mut new_groups: Vec = vec![]; - let mut new_values: Vec = vec![]; + let mut new_groups: Vec = Vec::with_capacity(order.len()); + let mut new_values: Vec = Vec::with_capacity(order.len()); let mut current_group: T = groups[order[0]]; let mut current_value: U = values[order[0]]; for &index in &order[1..] { diff --git a/tests/frame_readers.rs b/tests/frame_readers.rs index c67d7ab..400ebf7 100644 --- a/tests/frame_readers.rs +++ b/tests/frame_readers.rs @@ -1,6 +1,9 @@ use std::{path::Path, sync::Arc}; use timsrust::{ - io::readers::FrameReader, + io::readers::{ + FrameReader, FrameWindowSplittingStrategy, QuadWindowExpansionStrategy, + SpectrumReaderConfig, + }, ms_data::{AcquisitionType, Frame, MSLevel, QuadrupoleSettings}, }; @@ -18,12 +21,13 @@ fn tdf_reader_frames1() { .to_str() .unwrap() .to_string(); - let frames: Vec = FrameReader::new(&file_path) - .unwrap() - .get_all_ms1() - .into_iter() - .map(|x| x.unwrap()) - .collect(); + let frames: Vec = + FrameReader::new(&file_path, FrameWindowSplittingStrategy::default()) + .unwrap() + .get_all_ms1() + .into_iter() + .map(|x| x.unwrap()) + .collect(); let expected: Vec = vec![ Frame { scan_offsets: vec![0, 1, 3, 6, 10], @@ -65,12 +69,13 @@ fn tdf_reader_frames2() { .to_str() .unwrap() .to_string(); - let frames: Vec = FrameReader::new(&file_path) - .unwrap() - .get_all_ms2() - .into_iter() - .map(|x| x.unwrap()) - .collect(); + let frames: Vec = + FrameReader::new(&file_path, FrameWindowSplittingStrategy::default()) + .unwrap() + .get_all_ms2() + .into_iter() + .map(|x| x.unwrap()) + .collect(); let expected: Vec = vec![ // Frame::default(), Frame { @@ -104,4 +109,49 @@ fn tdf_reader_frames2() { } } -// TODO test for DIA +#[test] +fn tdf_reader_frames_dia() { + let file_name = "dia_test.d"; + let file_path = get_local_directory() + .join(file_name) + .to_str() + .unwrap() + .to_string(); + let frames: Vec = + FrameReader::new(&file_path, FrameWindowSplittingStrategy::default()) + .unwrap() + .get_all_ms2() + .into_iter() + .map(|x| x.unwrap()) + .collect(); + + assert_eq!(frames.len(), 4); + for i in 0..frames.len() { + assert_eq!(frames[i].scan_offsets.len(), 710); + assert_eq!(frames[i].scan_offsets[0], 0); + assert_eq!( + frames[i].scan_offsets.last().unwrap(), + &frames[i].intensities.len() + ); + assert_eq!(frames[i].tof_indices.len(), frames[i].intensities.len()); + } + assert_eq!(&frames[0].tof_indices[0], &251695u32); + assert_eq!(&frames[0].intensities[0], &503392u32); + assert_eq!(&frames[0].tof_indices.len(), &754376); + assert_eq!(&frames[0].intensities.len(), &754376); + + assert_eq!(&frames[1].tof_indices[0], &1006071u32); + assert_eq!(&frames[1].intensities[0], &2012144u32); + assert_eq!(&frames[1].tof_indices.len(), &1257057); + assert_eq!(&frames[1].intensities.len(), &1257057); + + assert_eq!(&frames[2].tof_indices[0], &4022866u32); + assert_eq!(&frames[2].intensities[0], &8045734u32); + assert_eq!(&frames[2].tof_indices.len(), &2262419); + assert_eq!(&frames[2].intensities.len(), &2262419); + + assert_eq!(&frames[3].tof_indices[0], &6285285u32); + assert_eq!(&frames[3].intensities[0], &12570572u32); + assert_eq!(&frames[3].tof_indices.len(), &2765100); + assert_eq!(&frames[3].intensities.len(), &2765100); +} diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index 43ff4b8..0586059 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -1,6 +1,9 @@ use std::path::Path; use timsrust::{ - io::readers::SpectrumReader, + io::readers::{ + FrameWindowSplittingStrategy, QuadWindowExpansionStrategy, + SpectrumProcessingParams, SpectrumReader, SpectrumReaderConfig, + }, ms_data::{Precursor, Spectrum}, }; @@ -19,7 +22,9 @@ fn minitdf_reader() { .unwrap() .to_string(); let spectra: Vec = - SpectrumReader::new(file_path).unwrap().get_all(); + SpectrumReader::new(file_path, SpectrumReaderConfig::default()) + .unwrap() + .get_all(); let expected: Vec = vec![ Spectrum { mz_values: vec![100.0, 200.002, 300.03, 400.4], @@ -70,7 +75,9 @@ fn tdf_reader_dda() { .unwrap() .to_string(); let spectra: Vec = - SpectrumReader::new(file_path).unwrap().get_all(); + SpectrumReader::new(file_path, SpectrumReaderConfig::default()) + .unwrap() + .get_all(); let expected: Vec = vec![ Spectrum { mz_values: vec![199.7633445943076], @@ -128,3 +135,66 @@ fn tdf_reader_dda() { assert_eq!(spectra[i], expected[i]); } } + +#[test] +fn test_dia_even() { + let file_name = "dia_test.d"; + let file_path = get_local_directory() + .join(file_name) + .to_str() + .unwrap() + .to_string(); + + for i in 1..3 { + let frames: Vec = SpectrumReader::new( + &file_path, + SpectrumReaderConfig { + frame_splitting_params: + FrameWindowSplittingStrategy::Quadrupole( + QuadWindowExpansionStrategy::Even(i), + ), + spectrum_processing_params: SpectrumProcessingParams::default(), + }, + ) + .get_all(); + + println!(">>>>> EVEN {:?}", frames.len()); + + // 4 frames, 2 windows in each, i splits/window + assert_eq!(frames.len(), 4 * 2 * i); + } +} + +#[test] +fn test_dia_uniform() { + let file_name = "dia_test.d"; + let file_path = get_local_directory() + .join(file_name) + .to_str() + .unwrap() + .to_string(); + + for i in [100, 200, 300] { + let frames: Vec = SpectrumReader::new( + &file_path, + SpectrumReaderConfig { + frame_splitting_params: FrameWindowSplittingStrategy::Window( + QuadWindowExpansionStrategy::Uniform((i, i)), + ), + spectrum_processing_params: SpectrumProcessingParams::default(), + }, + ) + .get_all(); + + println!(">>>>> UNIFORM {} > {:?}", i, frames.len()); + for f in frames.iter() { + println!("{:?}", f.precursor); + } + + // Not all frames have scan windows from 0 to 709 ... so ... I need to think + // on how to express this in the test + // assert_eq!(frames.len(), 4 * ((709 / i) + 1)); + assert!(frames.len() > (709 / i)); + assert!(frames.len() < 3 * ((709 / i) + 1)); + } +} From a5fd90d59013af5b04a5616f2e6271c137a4702f Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Thu, 25 Jul 2024 15:30:00 +0200 Subject: [PATCH 10/27] FIX: resolve test and bench conflicts --- benches/speed_performance.rs | 29 ++++++++++++++++++++++------- tests/spectrum_readers.rs | 2 ++ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/benches/speed_performance.rs b/benches/speed_performance.rs index daab1cc..e3f1b5a 100644 --- a/benches/speed_performance.rs +++ b/benches/speed_performance.rs @@ -1,7 +1,10 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use rayon::iter::ParallelIterator; use timsrust::{ - io::readers::{FrameReader, SpectrumReader}, + io::readers::{ + FrameReader, FrameWindowSplittingStrategy, SpectrumReader, + SpectrumReaderConfig, + }, ms_data::Frame, }; @@ -33,8 +36,12 @@ fn criterion_benchmark_dda(c: &mut Criterion) { let mut group = c.benchmark_group("sample-size-example"); group.significance_level(0.001).sample_size(10); let d_folder_name: &str = DDA_TEST; - let frame_reader = FrameReader::new(d_folder_name).unwrap(); - let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap(); + let frame_reader = + FrameReader::new(d_folder_name, FrameWindowSplittingStrategy::None) + .unwrap(); + let spectrum_reader = + SpectrumReader::new(d_folder_name, SpectrumReaderConfig::default()) + .unwrap(); group.bench_function("DDA read_all_frames 6m", |b| { b.iter(|| read_all_frames(black_box(&frame_reader))) }); @@ -55,8 +62,12 @@ fn criterion_benchmark_dia(c: &mut Criterion) { let mut group = c.benchmark_group("sample-size-example"); group.significance_level(0.001).sample_size(10); let d_folder_name: &str = DIA_TEST; - let frame_reader = FrameReader::new(d_folder_name).unwrap(); - let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap(); + let frame_reader = + FrameReader::new(d_folder_name, FrameWindowSplittingStrategy::None) + .unwrap(); + let spectrum_reader = + SpectrumReader::new(d_folder_name, SpectrumReaderConfig::default()) + .unwrap(); group.bench_function("DIA read_all_frames 6m", |b| { b.iter(|| read_all_frames(black_box(&frame_reader))) }); @@ -74,8 +85,12 @@ fn criterion_benchmark_syp(c: &mut Criterion) { let mut group = c.benchmark_group("sample-size-example"); group.significance_level(0.001).sample_size(10); let d_folder_name: &str = SYP_TEST; - let frame_reader = FrameReader::new(d_folder_name).unwrap(); - let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap(); + let frame_reader = + FrameReader::new(d_folder_name, FrameWindowSplittingStrategy::None) + .unwrap(); + let spectrum_reader = + SpectrumReader::new(d_folder_name, SpectrumReaderConfig::default()) + .unwrap(); group.bench_function("SYP read_all_frames 6m", |b| { b.iter(|| read_all_frames(black_box(&frame_reader))) }); diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index 0586059..4d44e31 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -156,6 +156,7 @@ fn test_dia_even() { spectrum_processing_params: SpectrumProcessingParams::default(), }, ) + .unwrap() .get_all(); println!(">>>>> EVEN {:?}", frames.len()); @@ -184,6 +185,7 @@ fn test_dia_uniform() { spectrum_processing_params: SpectrumProcessingParams::default(), }, ) + .unwrap() .get_all(); println!(">>>>> UNIFORM {} > {:?}", i, frames.len()); From db6a217d7b75241703bd024154a32005a8fdf66b Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Thu, 25 Jul 2024 16:18:41 +0200 Subject: [PATCH 11/27] CHORE: compartmentalize how to build a spectrumreader --- src/io/readers/spectrum_reader.rs | 36 ++++++++++++++++++++++-- tests/spectrum_readers.rs | 46 ++++++++++++++++--------------- 2 files changed, 58 insertions(+), 24 deletions(-) diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index b158afc..8ba2c89 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -14,7 +14,7 @@ pub struct SpectrumReader { spectrum_reader: Box, } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct SpectrumProcessingParams { smoothing_window: u32, centroiding_window: u32, @@ -39,7 +39,7 @@ pub enum FrameWindowSplittingStrategy { Window(QuadWindowExpansionStrategy), } -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone)] pub struct SpectrumReaderConfig { pub spectrum_processing_params: SpectrumProcessingParams, pub frame_splitting_params: FrameWindowSplittingStrategy, @@ -51,7 +51,39 @@ impl fmt::Debug for SpectrumReader { } } +#[derive(Debug, Default, Clone)] +pub struct SpectrumReaderBuilder { + path: PathBuf, + config: SpectrumReaderConfig, +} + +impl SpectrumReaderBuilder { + pub fn with_path(&self, path: impl AsRef) -> Self { + Self { + path: path.as_ref().to_path_buf(), + config: self.config.clone(), + } + } + + pub fn with_config(&self, config: SpectrumReaderConfig) -> Self { + Self { + path: self.path.clone(), + config: config, + } + } + + pub fn finalize(&self) -> Result { + let reader = + SpectrumReader::new(self.path.clone(), self.config.clone())?; + Ok(reader) + } +} + impl SpectrumReader { + pub fn build() -> SpectrumReaderBuilder { + SpectrumReaderBuilder::default() + } + pub fn new( path: impl AsRef, config: SpectrumReaderConfig, diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index 4d44e31..e931b78 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -21,10 +21,11 @@ fn minitdf_reader() { .to_str() .unwrap() .to_string(); - let spectra: Vec = - SpectrumReader::new(file_path, SpectrumReaderConfig::default()) - .unwrap() - .get_all(); + let spectra: Vec = SpectrumReader::build() + .with_path(file_path) + .finalize() + .unwrap() + .get_all(); let expected: Vec = vec![ Spectrum { mz_values: vec![100.0, 200.002, 300.03, 400.4], @@ -74,10 +75,11 @@ fn tdf_reader_dda() { .to_str() .unwrap() .to_string(); - let spectra: Vec = - SpectrumReader::new(file_path, SpectrumReaderConfig::default()) - .unwrap() - .get_all(); + let spectra: Vec = SpectrumReader::build() + .with_path(file_path) + .finalize() + .unwrap() + .get_all(); let expected: Vec = vec![ Spectrum { mz_values: vec![199.7633445943076], @@ -146,18 +148,18 @@ fn test_dia_even() { .to_string(); for i in 1..3 { - let frames: Vec = SpectrumReader::new( - &file_path, - SpectrumReaderConfig { + let frames: Vec = SpectrumReader::build() + .with_path(&file_path) + .with_config(SpectrumReaderConfig { frame_splitting_params: FrameWindowSplittingStrategy::Quadrupole( QuadWindowExpansionStrategy::Even(i), ), spectrum_processing_params: SpectrumProcessingParams::default(), - }, - ) - .unwrap() - .get_all(); + }) + .finalize() + .unwrap() + .get_all(); println!(">>>>> EVEN {:?}", frames.len()); @@ -176,17 +178,17 @@ fn test_dia_uniform() { .to_string(); for i in [100, 200, 300] { - let frames: Vec = SpectrumReader::new( - &file_path, - SpectrumReaderConfig { + let frames: Vec = SpectrumReader::build() + .with_path(&file_path) + .with_config(SpectrumReaderConfig { frame_splitting_params: FrameWindowSplittingStrategy::Window( QuadWindowExpansionStrategy::Uniform((i, i)), ), spectrum_processing_params: SpectrumProcessingParams::default(), - }, - ) - .unwrap() - .get_all(); + }) + .finalize() + .unwrap() + .get_all(); println!(">>>>> UNIFORM {} > {:?}", i, frames.len()); for f in frames.iter() { From c0039d8a50ce353c75eedb93389165953fd39552 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Thu, 25 Jul 2024 16:24:37 +0200 Subject: [PATCH 12/27] FEAT: simplified frame reader creation --- src/io/readers/frame_reader.rs | 14 +++++--- src/io/readers/precursor_reader/tdf.rs | 5 +-- src/io/readers/spectrum_reader/tdf.rs | 4 +-- tests/frame_readers.rs | 44 +++++++++++--------------- 4 files changed, 31 insertions(+), 36 deletions(-) diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index f4b0d48..a5d09a0 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -35,10 +35,7 @@ pub struct FrameReader { } impl FrameReader { - pub fn new( - path: impl AsRef, - config: FrameWindowSplittingStrategy, - ) -> Result { + pub fn new(path: impl AsRef) -> Result { let sql_path = find_extension(&path, "analysis.tdf").ok_or( FrameReaderError::FileNotFound("analysis.tdf".to_string()), )?; @@ -79,11 +76,18 @@ impl FrameReader { .into_iter() .map(|x| Arc::new(x)) .collect(), - splitting_strategy: config, + splitting_strategy: FrameWindowSplittingStrategy::default(), }; Ok(reader) } + pub fn set_splitting_strategy( + &mut self, + config: &FrameWindowSplittingStrategy, + ) { + self.splitting_strategy = *config; + } + pub fn parallel_filter<'a, F: Fn(&SqlFrame) -> bool + Sync + Send + 'a>( &'a self, predicate: F, diff --git a/src/io/readers/precursor_reader/tdf.rs b/src/io/readers/precursor_reader/tdf.rs index 1795e4c..901c320 100644 --- a/src/io/readers/precursor_reader/tdf.rs +++ b/src/io/readers/precursor_reader/tdf.rs @@ -63,10 +63,7 @@ impl TDFPrecursorReader { (acq_type, acq_config) => { return Err( TDFPrecursorReaderError::UnsupportedAcquisition( - format!( - "{:?} + {:?}", - acquisition_type, acq_config - ), + format!("{:?} + {:?}", acq_type, acq_config), ), ) }, diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index 2f18147..b569f33 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -33,8 +33,8 @@ impl TDFSpectrumReader { path_name: impl AsRef, config: SpectrumReaderConfig, ) -> Result { - let frame_reader: FrameReader = - FrameReader::new(&path_name, config.frame_splitting_params)?; + let mut frame_reader: FrameReader = FrameReader::new(&path_name)?; + frame_reader.set_splitting_strategy(&config.frame_splitting_params); let sql_path = find_extension(&path_name, "analysis.tdf").ok_or( TDFSpectrumReaderError::FileNotFound("analysis.tdf".to_string()), )?; diff --git a/tests/frame_readers.rs b/tests/frame_readers.rs index 400ebf7..b6fa001 100644 --- a/tests/frame_readers.rs +++ b/tests/frame_readers.rs @@ -1,9 +1,6 @@ use std::{path::Path, sync::Arc}; use timsrust::{ - io::readers::{ - FrameReader, FrameWindowSplittingStrategy, QuadWindowExpansionStrategy, - SpectrumReaderConfig, - }, + io::readers::FrameReader, ms_data::{AcquisitionType, Frame, MSLevel, QuadrupoleSettings}, }; @@ -21,13 +18,12 @@ fn tdf_reader_frames1() { .to_str() .unwrap() .to_string(); - let frames: Vec = - FrameReader::new(&file_path, FrameWindowSplittingStrategy::default()) - .unwrap() - .get_all_ms1() - .into_iter() - .map(|x| x.unwrap()) - .collect(); + let frames: Vec = FrameReader::new(&file_path) + .unwrap() + .get_all_ms1() + .into_iter() + .map(|x| x.unwrap()) + .collect(); let expected: Vec = vec![ Frame { scan_offsets: vec![0, 1, 3, 6, 10], @@ -69,13 +65,12 @@ fn tdf_reader_frames2() { .to_str() .unwrap() .to_string(); - let frames: Vec = - FrameReader::new(&file_path, FrameWindowSplittingStrategy::default()) - .unwrap() - .get_all_ms2() - .into_iter() - .map(|x| x.unwrap()) - .collect(); + let frames: Vec = FrameReader::new(&file_path) + .unwrap() + .get_all_ms2() + .into_iter() + .map(|x| x.unwrap()) + .collect(); let expected: Vec = vec![ // Frame::default(), Frame { @@ -117,13 +112,12 @@ fn tdf_reader_frames_dia() { .to_str() .unwrap() .to_string(); - let frames: Vec = - FrameReader::new(&file_path, FrameWindowSplittingStrategy::default()) - .unwrap() - .get_all_ms2() - .into_iter() - .map(|x| x.unwrap()) - .collect(); + let frames: Vec = FrameReader::new(&file_path) + .unwrap() + .get_all_ms2() + .into_iter() + .map(|x| x.unwrap()) + .collect(); assert_eq!(frames.len(), 4); for i in 0..frames.len() { From e5a69f1d076cc9646b17c55ae5e2b3d82fcf7827 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Thu, 25 Jul 2024 16:26:10 +0200 Subject: [PATCH 13/27] FIX: rebuild bench tests --- benches/speed_performance.rs | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/benches/speed_performance.rs b/benches/speed_performance.rs index e3f1b5a..e1e6533 100644 --- a/benches/speed_performance.rs +++ b/benches/speed_performance.rs @@ -1,11 +1,7 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use rayon::iter::ParallelIterator; -use timsrust::{ - io::readers::{ - FrameReader, FrameWindowSplittingStrategy, SpectrumReader, - SpectrumReaderConfig, - }, - ms_data::Frame, +use timsrust::io::readers::{ + FrameReader, SpectrumReader, SpectrumReaderConfig, }; const DDA_TEST: &str = @@ -36,9 +32,7 @@ fn criterion_benchmark_dda(c: &mut Criterion) { let mut group = c.benchmark_group("sample-size-example"); group.significance_level(0.001).sample_size(10); let d_folder_name: &str = DDA_TEST; - let frame_reader = - FrameReader::new(d_folder_name, FrameWindowSplittingStrategy::None) - .unwrap(); + let frame_reader = FrameReader::new(d_folder_name).unwrap(); let spectrum_reader = SpectrumReader::new(d_folder_name, SpectrumReaderConfig::default()) .unwrap(); @@ -62,9 +56,7 @@ fn criterion_benchmark_dia(c: &mut Criterion) { let mut group = c.benchmark_group("sample-size-example"); group.significance_level(0.001).sample_size(10); let d_folder_name: &str = DIA_TEST; - let frame_reader = - FrameReader::new(d_folder_name, FrameWindowSplittingStrategy::None) - .unwrap(); + let frame_reader = FrameReader::new(d_folder_name).unwrap(); let spectrum_reader = SpectrumReader::new(d_folder_name, SpectrumReaderConfig::default()) .unwrap(); @@ -85,9 +77,7 @@ fn criterion_benchmark_syp(c: &mut Criterion) { let mut group = c.benchmark_group("sample-size-example"); group.significance_level(0.001).sample_size(10); let d_folder_name: &str = SYP_TEST; - let frame_reader = - FrameReader::new(d_folder_name, FrameWindowSplittingStrategy::None) - .unwrap(); + let frame_reader = FrameReader::new(d_folder_name).unwrap(); let spectrum_reader = SpectrumReader::new(d_folder_name, SpectrumReaderConfig::default()) .unwrap(); From e53b95807433ef1a6059f3282272d0c2932f4692 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 26 Jul 2024 11:17:34 +0200 Subject: [PATCH 14/27] FEAT: implemented domain inverters --- src/domain_converters.rs | 2 ++ src/domain_converters/frame_to_rt.rs | 16 ++++++++++++++++ src/domain_converters/scan_to_im.rs | 9 +++++++-- src/domain_converters/tof_to_mz.rs | 8 ++++++-- 4 files changed, 31 insertions(+), 4 deletions(-) diff --git a/src/domain_converters.rs b/src/domain_converters.rs index 11c1387..e38bbf4 100644 --- a/src/domain_converters.rs +++ b/src/domain_converters.rs @@ -10,4 +10,6 @@ pub use tof_to_mz::Tof2MzConverter; /// Convert from one domain (e.g. Time of Flight) to another (m/z). pub trait ConvertableDomain { fn convert + Copy>(&self, value: T) -> f64; + + fn invert + Copy>(&self, value: T) -> f64; } diff --git a/src/domain_converters/frame_to_rt.rs b/src/domain_converters/frame_to_rt.rs index eb7d1d1..090b8b4 100644 --- a/src/domain_converters/frame_to_rt.rs +++ b/src/domain_converters/frame_to_rt.rs @@ -16,4 +16,20 @@ impl super::ConvertableDomain for Frame2RtConverter { let upper_value: f64 = self.rt_values[value.into().ceil() as usize]; (lower_value + upper_value) / 2. } + fn invert + Copy>(&self, value: T) -> f64 { + let rt_value = value.into(); + match self.rt_values.binary_search_by(|probe| { + probe.partial_cmp(&rt_value).expect("Cannot handle NaNs") + }) { + Ok(index) => index as f64, + Err(index) => match index { + _ if (index > 0) && (index < self.rt_values.len()) => { + let start = self.rt_values[index - 1]; + let end = self.rt_values[index]; + index as f64 + (rt_value - start) / (end - start) + }, + _ => index as f64, + }, + } + } } diff --git a/src/domain_converters/scan_to_im.rs b/src/domain_converters/scan_to_im.rs index e7390ff..2a9d8bd 100644 --- a/src/domain_converters/scan_to_im.rs +++ b/src/domain_converters/scan_to_im.rs @@ -22,7 +22,12 @@ impl Scan2ImConverter { impl super::ConvertableDomain for Scan2ImConverter { fn convert + Copy>(&self, value: T) -> f64 { - let scan_index_f64: f64 = value.into(); - self.scan_intercept + self.scan_slope * scan_index_f64 + let scan_index: f64 = value.into(); + self.scan_intercept + self.scan_slope * scan_index + } + + fn invert + Copy>(&self, value: T) -> f64 { + let im_value: f64 = value.into(); + (im_value - self.scan_intercept) / self.scan_slope } } diff --git a/src/domain_converters/tof_to_mz.rs b/src/domain_converters/tof_to_mz.rs index c9a3abc..c42078b 100644 --- a/src/domain_converters/tof_to_mz.rs +++ b/src/domain_converters/tof_to_mz.rs @@ -36,7 +36,11 @@ impl Tof2MzConverter { impl super::ConvertableDomain for Tof2MzConverter { fn convert + Copy>(&self, value: T) -> f64 { - let tof_index_f64: f64 = value.into(); - (self.tof_intercept + self.tof_slope * tof_index_f64).powi(2) + let tof_index: f64 = value.into(); + (self.tof_intercept + self.tof_slope * tof_index).powi(2) + } + fn invert + Copy>(&self, value: T) -> f64 { + let mz_value: f64 = value.into(); + (mz_value.sqrt() - self.tof_intercept) / self.tof_slope } } From b5879eb92a4d092f81adefe2ea9d8b8326a21d43 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 26 Jul 2024 11:27:09 +0200 Subject: [PATCH 15/27] DOCS: add future plans to readme --- README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README.md b/README.md index 83300ee..1283485 100644 --- a/README.md +++ b/README.md @@ -43,3 +43,12 @@ Two file formats are supported: ## Python bindings The [timsrust_pyo3](https://github.com/jspaezp/timsrust_pyo3) package is an example of how the performance of TimsRust can be utilized in Python + +## Planned changes for future versions + +* Improve docs +* Improve tests +* Pase CompressionType1 +* Error propagation for SpectrumReader(Trait).get +* Make Path of TimsTOF data into special type +* ... From 64f1cad8f837ecc5003436b6fe397e0bf3650b31 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 26 Jul 2024 11:29:52 +0200 Subject: [PATCH 16/27] FEAT: MAde spectrum reader more user friendly to create and set proper defaults for dia data --- src/io/readers/precursor_reader.rs | 13 +++------ src/io/readers/precursor_reader/tdf.rs | 32 ++++++---------------- src/io/readers/precursor_reader/tdf/dia.rs | 1 - src/io/readers/spectrum_reader.rs | 10 +++++-- src/io/readers/spectrum_reader/minitdf.rs | 7 +++-- src/io/readers/spectrum_reader/tdf.rs | 6 ++-- src/io/readers/spectrum_reader/tdf/dia.rs | 1 - 7 files changed, 26 insertions(+), 44 deletions(-) diff --git a/src/io/readers/precursor_reader.rs b/src/io/readers/precursor_reader.rs index b755a25..b5e4aef 100644 --- a/src/io/readers/precursor_reader.rs +++ b/src/io/readers/precursor_reader.rs @@ -24,17 +24,12 @@ impl fmt::Debug for PrecursorReader { impl PrecursorReader { pub fn new( path: impl AsRef, - config: Option, + config: FrameWindowSplittingStrategy, ) -> Result { - let tmp = path.as_ref().extension().and_then(|e| e.to_str()); let precursor_reader: Box = - match (tmp, config) { - (Some("parquet"), None) => { - Box::new(MiniTDFPrecursorReader::new(path)?) - }, - (Some("tdf"), strat) => { - Box::new(TDFPrecursorReader::new(path, strat)?) - }, + match path.as_ref().extension().and_then(|e| e.to_str()) { + Some("parquet") => Box::new(MiniTDFPrecursorReader::new(path)?), + Some("tdf") => Box::new(TDFPrecursorReader::new(path, config)?), _ => panic!(), }; let reader = Self { precursor_reader }; diff --git a/src/io/readers/precursor_reader/tdf.rs b/src/io/readers/precursor_reader/tdf.rs index 901c320..2f92a78 100644 --- a/src/io/readers/precursor_reader/tdf.rs +++ b/src/io/readers/precursor_reader/tdf.rs @@ -23,7 +23,7 @@ pub struct TDFPrecursorReader { impl TDFPrecursorReader { pub fn new( path: impl AsRef, - splitting_strategy: Option, + splitting_strategy: FrameWindowSplittingStrategy, ) -> Result { let sql_path = path.as_ref(); let tdf_sql_reader = SqlReader::open(sql_path)?; @@ -37,33 +37,17 @@ impl TDFPrecursorReader { AcquisitionType::Unknown }; let precursor_reader: Box = - match (acquisition_type, splitting_strategy) { - (AcquisitionType::DDAPASEF, None) => { + match acquisition_type { + AcquisitionType::DDAPASEF => { Box::new(DDATDFPrecursorReader::new(path)?) }, - ( - AcquisitionType::DDAPASEF, - Some(FrameWindowSplittingStrategy::None), - ) => { - // Not 100% sure when this happens ... - // By this I mean generating a Some(None) - // ./tests/frame_readers.rs:60:25 generates it. - // JSPP - 2024-Jul-16 - Box::new(DDATDFPrecursorReader::new(path)?) - }, - (AcquisitionType::DIAPASEF, Some(splitting_strat)) => { - Box::new(DIATDFPrecursorReader::new(path, splitting_strat)?) - }, - (AcquisitionType::DIAPASEF, None) => { - Box::new(DIATDFPrecursorReader::new( - path, - FrameWindowSplittingStrategy::None, - )?) - }, - (acq_type, acq_config) => { + AcquisitionType::DIAPASEF => Box::new( + DIATDFPrecursorReader::new(path, splitting_strategy)?, + ), + acquisition_type => { return Err( TDFPrecursorReaderError::UnsupportedAcquisition( - format!("{:?} + {:?}", acq_type, acq_config), + format!("{:?}", acquisition_type), ), ) }, diff --git a/src/io/readers/precursor_reader/tdf/dia.rs b/src/io/readers/precursor_reader/tdf/dia.rs index 2b4dae0..c7b23df 100644 --- a/src/io/readers/precursor_reader/tdf/dia.rs +++ b/src/io/readers/precursor_reader/tdf/dia.rs @@ -41,7 +41,6 @@ impl DIATDFPrecursorReader { let quadrupole_settings = QuadrupoleSettingsReader::new(tdf_sql_reader.get_path())?; let expanded_quadrupole_settings = match splitting_strat { - FrameWindowSplittingStrategy::None => quadrupole_settings, FrameWindowSplittingStrategy::Quadrupole(x) => { expand_quadrupole_settings( &window_groups, diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index 8ba2c89..0920e52 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -31,14 +31,18 @@ impl Default for SpectrumProcessingParams { } } -#[derive(Debug, Clone, Copy, Default)] +#[derive(Debug, Clone, Copy)] pub enum FrameWindowSplittingStrategy { - #[default] - None, Quadrupole(QuadWindowExpansionStrategy), Window(QuadWindowExpansionStrategy), } +impl Default for FrameWindowSplittingStrategy { + fn default() -> Self { + Self::Quadrupole(QuadWindowExpansionStrategy::Even(1)) + } +} + #[derive(Debug, Default, Clone)] pub struct SpectrumReaderConfig { pub spectrum_processing_params: SpectrumProcessingParams, diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs index 5c24e77..16f5b39 100644 --- a/src/io/readers/spectrum_reader/minitdf.rs +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -18,7 +18,7 @@ use crate::{ utils::find_extension, }; -use super::SpectrumReaderTrait; +use super::{FrameWindowSplittingStrategy, SpectrumReaderTrait}; #[derive(Debug)] pub struct MiniTDFSpectrumReader { @@ -36,7 +36,10 @@ impl MiniTDFSpectrumReader { .ok_or(MiniTDFSpectrumReaderError::FileNotFound( "analysis.tdf".to_string(), ))?; - let precursor_reader = PrecursorReader::new(&parquet_file_name, None)?; + let precursor_reader = PrecursorReader::new( + &parquet_file_name, + FrameWindowSplittingStrategy::default(), + )?; let offsets = ParquetPrecursor::from_parquet_file(&parquet_file_name)? .iter() .map(|x| x.offset as usize) diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index b569f33..36437bd 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -41,10 +41,8 @@ impl TDFSpectrumReader { let metadata = MetadataReader::new(&sql_path)?; let mz_reader: Tof2MzConverter = metadata.mz_converter; let tdf_sql_reader = SqlReader::open(&sql_path)?; - let precursor_reader = PrecursorReader::new( - &sql_path, - Some(config.frame_splitting_params), - )?; + let precursor_reader = + PrecursorReader::new(&sql_path, config.frame_splitting_params)?; let acquisition_type = frame_reader.get_acquisition(); let raw_spectrum_reader = RawSpectrumReader::new( &tdf_sql_reader, diff --git a/src/io/readers/spectrum_reader/tdf/dia.rs b/src/io/readers/spectrum_reader/tdf/dia.rs index 5962aa0..1f0c9d0 100644 --- a/src/io/readers/spectrum_reader/tdf/dia.rs +++ b/src/io/readers/spectrum_reader/tdf/dia.rs @@ -31,7 +31,6 @@ impl DIARawSpectrumReader { QuadrupoleSettingsReader::new(&tdf_sql_reader.get_path())?; let expanded_quadrupole_settings = match frame_reader.splitting_strategy { - FrameWindowSplittingStrategy::None => quadrupole_settings, FrameWindowSplittingStrategy::Quadrupole(x) => { expand_quadrupole_settings( &window_groups, From 1f950232eca5f91fa70e978079eb05c156f36e5b Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 26 Jul 2024 11:55:08 +0200 Subject: [PATCH 17/27] FEAT: simplified spectrumreader new and build --- benches/speed_performance.rs | 12 +-- src/io/readers/spectrum_reader.rs | 150 +++++++++++++++--------------- 2 files changed, 78 insertions(+), 84 deletions(-) diff --git a/benches/speed_performance.rs b/benches/speed_performance.rs index e1e6533..3beeeac 100644 --- a/benches/speed_performance.rs +++ b/benches/speed_performance.rs @@ -33,9 +33,7 @@ fn criterion_benchmark_dda(c: &mut Criterion) { group.significance_level(0.001).sample_size(10); let d_folder_name: &str = DDA_TEST; let frame_reader = FrameReader::new(d_folder_name).unwrap(); - let spectrum_reader = - SpectrumReader::new(d_folder_name, SpectrumReaderConfig::default()) - .unwrap(); + let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap(); group.bench_function("DDA read_all_frames 6m", |b| { b.iter(|| read_all_frames(black_box(&frame_reader))) }); @@ -57,9 +55,7 @@ fn criterion_benchmark_dia(c: &mut Criterion) { group.significance_level(0.001).sample_size(10); let d_folder_name: &str = DIA_TEST; let frame_reader = FrameReader::new(d_folder_name).unwrap(); - let spectrum_reader = - SpectrumReader::new(d_folder_name, SpectrumReaderConfig::default()) - .unwrap(); + let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap(); group.bench_function("DIA read_all_frames 6m", |b| { b.iter(|| read_all_frames(black_box(&frame_reader))) }); @@ -78,9 +74,7 @@ fn criterion_benchmark_syp(c: &mut Criterion) { group.significance_level(0.001).sample_size(10); let d_folder_name: &str = SYP_TEST; let frame_reader = FrameReader::new(d_folder_name).unwrap(); - let spectrum_reader = - SpectrumReader::new(d_folder_name, SpectrumReaderConfig::default()) - .unwrap(); + let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap(); group.bench_function("SYP read_all_frames 6m", |b| { b.iter(|| read_all_frames(black_box(&frame_reader))) }); diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index 0920e52..d7e8f34 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -14,92 +14,19 @@ pub struct SpectrumReader { spectrum_reader: Box, } -#[derive(Debug, Clone)] -pub struct SpectrumProcessingParams { - smoothing_window: u32, - centroiding_window: u32, - calibration_tolerance: f64, -} - -impl Default for SpectrumProcessingParams { - fn default() -> Self { - Self { - smoothing_window: 1, - centroiding_window: 1, - calibration_tolerance: 0.1, - } - } -} - -#[derive(Debug, Clone, Copy)] -pub enum FrameWindowSplittingStrategy { - Quadrupole(QuadWindowExpansionStrategy), - Window(QuadWindowExpansionStrategy), -} - -impl Default for FrameWindowSplittingStrategy { - fn default() -> Self { - Self::Quadrupole(QuadWindowExpansionStrategy::Even(1)) - } -} - -#[derive(Debug, Default, Clone)] -pub struct SpectrumReaderConfig { - pub spectrum_processing_params: SpectrumProcessingParams, - pub frame_splitting_params: FrameWindowSplittingStrategy, -} - impl fmt::Debug for SpectrumReader { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "SpectrumReader {{ /* fields omitted */ }}") } } -#[derive(Debug, Default, Clone)] -pub struct SpectrumReaderBuilder { - path: PathBuf, - config: SpectrumReaderConfig, -} - -impl SpectrumReaderBuilder { - pub fn with_path(&self, path: impl AsRef) -> Self { - Self { - path: path.as_ref().to_path_buf(), - config: self.config.clone(), - } - } - - pub fn with_config(&self, config: SpectrumReaderConfig) -> Self { - Self { - path: self.path.clone(), - config: config, - } - } - - pub fn finalize(&self) -> Result { - let reader = - SpectrumReader::new(self.path.clone(), self.config.clone())?; - Ok(reader) - } -} - impl SpectrumReader { pub fn build() -> SpectrumReaderBuilder { SpectrumReaderBuilder::default() } - pub fn new( - path: impl AsRef, - config: SpectrumReaderConfig, - ) -> Result { - let spectrum_reader: Box = - match path.as_ref().extension().and_then(|e| e.to_str()) { - Some("ms2") => Box::new(MiniTDFSpectrumReader::new(path)?), - Some("d") => Box::new(TDFSpectrumReader::new(path, config)?), - _ => panic!(), - }; - let reader = Self { spectrum_reader }; - Ok(reader) + pub fn new(path: impl AsRef) -> Result { + Ok(Self::build().with_path(path).finalize()?) } pub fn get(&self, index: usize) -> Spectrum { @@ -128,6 +55,44 @@ impl SpectrumReader { } } +#[derive(Debug, Default, Clone)] +pub struct SpectrumReaderBuilder { + path: PathBuf, + config: SpectrumReaderConfig, +} + +impl SpectrumReaderBuilder { + pub fn with_path(&self, path: impl AsRef) -> Self { + Self { + path: path.as_ref().to_path_buf(), + ..self.clone() + } + } + + pub fn with_config(&self, config: SpectrumReaderConfig) -> Self { + Self { + config: config, + ..self.clone() + } + } + + pub fn finalize(&self) -> Result { + let spectrum_reader: Box = + match self.path.extension().and_then(|e| e.to_str()) { + Some("ms2") => { + Box::new(MiniTDFSpectrumReader::new(self.path.clone())?) + }, + Some("d") => Box::new(TDFSpectrumReader::new( + self.path.clone(), + self.config.clone(), + )?), + _ => panic!(), + }; + let reader = SpectrumReader { spectrum_reader }; + Ok(reader) + } +} + trait SpectrumReaderTrait: Sync { fn get(&self, index: usize) -> Spectrum; fn get_path(&self) -> PathBuf; @@ -142,3 +107,38 @@ pub enum SpectrumReaderError { #[error("{0}")] TDFSpectrumReaderError(#[from] TDFSpectrumReaderError), } + +#[derive(Debug, Clone)] +pub struct SpectrumProcessingParams { + smoothing_window: u32, + centroiding_window: u32, + calibration_tolerance: f64, +} + +impl Default for SpectrumProcessingParams { + fn default() -> Self { + Self { + smoothing_window: 1, + centroiding_window: 1, + calibration_tolerance: 0.1, + } + } +} + +#[derive(Debug, Clone, Copy)] +pub enum FrameWindowSplittingStrategy { + Quadrupole(QuadWindowExpansionStrategy), + Window(QuadWindowExpansionStrategy), +} + +impl Default for FrameWindowSplittingStrategy { + fn default() -> Self { + Self::Quadrupole(QuadWindowExpansionStrategy::Even(1)) + } +} + +#[derive(Debug, Default, Clone)] +pub struct SpectrumReaderConfig { + pub spectrum_processing_params: SpectrumProcessingParams, + pub frame_splitting_params: FrameWindowSplittingStrategy, +} From 9c75d2a22034215824352a8262f4191f12a644ae Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 26 Jul 2024 12:12:24 +0200 Subject: [PATCH 18/27] FEAT: cleaned up precursorreader --- src/io/readers/precursor_reader.rs | 64 ++++++++++++++++++----- src/io/readers/spectrum_reader/minitdf.rs | 9 ++-- src/io/readers/spectrum_reader/tdf.rs | 6 ++- 3 files changed, 59 insertions(+), 20 deletions(-) diff --git a/src/io/readers/precursor_reader.rs b/src/io/readers/precursor_reader.rs index b5e4aef..4cb3357 100644 --- a/src/io/readers/precursor_reader.rs +++ b/src/io/readers/precursor_reader.rs @@ -2,7 +2,7 @@ mod minitdf; mod tdf; use core::fmt; -use std::path::Path; +use std::path::{Path, PathBuf}; use minitdf::{MiniTDFPrecursorReader, MiniTDFPrecursorReaderError}; use tdf::{TDFPrecursorReader, TDFPrecursorReaderError}; @@ -22,18 +22,12 @@ impl fmt::Debug for PrecursorReader { } impl PrecursorReader { - pub fn new( - path: impl AsRef, - config: FrameWindowSplittingStrategy, - ) -> Result { - let precursor_reader: Box = - match path.as_ref().extension().and_then(|e| e.to_str()) { - Some("parquet") => Box::new(MiniTDFPrecursorReader::new(path)?), - Some("tdf") => Box::new(TDFPrecursorReader::new(path, config)?), - _ => panic!(), - }; - let reader = Self { precursor_reader }; - Ok(reader) + pub fn build() -> PrecursorReaderBuilder { + PrecursorReaderBuilder::default() + } + + pub fn new(path: impl AsRef) -> Result { + Ok(Self::build().with_path(path).finalize()?) } pub fn get(&self, index: usize) -> Option { @@ -45,6 +39,48 @@ impl PrecursorReader { } } +#[derive(Debug, Default, Clone)] +pub struct PrecursorReaderBuilder { + path: PathBuf, + config: FrameWindowSplittingStrategy, +} + +impl PrecursorReaderBuilder { + pub fn with_path(&self, path: impl AsRef) -> Self { + Self { + path: path.as_ref().to_path_buf(), + ..self.clone() + } + } + + pub fn with_config(&self, config: FrameWindowSplittingStrategy) -> Self { + Self { + config: config, + ..self.clone() + } + } + + pub fn finalize(&self) -> Result { + let precursor_reader: Box = + match self.path.extension().and_then(|e| e.to_str()) { + Some("parquet") => { + Box::new(MiniTDFPrecursorReader::new(self.path.clone())?) + }, + Some("tdf") => Box::new(TDFPrecursorReader::new( + self.path.clone(), + self.config.clone(), + )?), + _ => { + return Err(PrecursorReaderError::PrecursorReaderFileError( + self.path.clone(), + )) + }, + }; + let reader = PrecursorReader { precursor_reader }; + Ok(reader) + } +} + trait PrecursorReaderTrait: Sync { fn get(&self, index: usize) -> Option; fn len(&self) -> usize; @@ -56,4 +92,6 @@ pub enum PrecursorReaderError { MiniTDFPrecursorReaderError(#[from] MiniTDFPrecursorReaderError), #[error("{0}")] TDFPrecursorReaderError(#[from] TDFPrecursorReaderError), + #[error("File {0} not valid")] + PrecursorReaderFileError(PathBuf), } diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs index 16f5b39..6cbccaf 100644 --- a/src/io/readers/spectrum_reader/minitdf.rs +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -18,7 +18,7 @@ use crate::{ utils::find_extension, }; -use super::{FrameWindowSplittingStrategy, SpectrumReaderTrait}; +use super::SpectrumReaderTrait; #[derive(Debug)] pub struct MiniTDFSpectrumReader { @@ -36,10 +36,9 @@ impl MiniTDFSpectrumReader { .ok_or(MiniTDFSpectrumReaderError::FileNotFound( "analysis.tdf".to_string(), ))?; - let precursor_reader = PrecursorReader::new( - &parquet_file_name, - FrameWindowSplittingStrategy::default(), - )?; + let precursor_reader = PrecursorReader::build() + .with_path(&parquet_file_name) + .finalize()?; let offsets = ParquetPrecursor::from_parquet_file(&parquet_file_name)? .iter() .map(|x| x.offset as usize) diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index 36437bd..ac56eff 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -41,8 +41,10 @@ impl TDFSpectrumReader { let metadata = MetadataReader::new(&sql_path)?; let mz_reader: Tof2MzConverter = metadata.mz_converter; let tdf_sql_reader = SqlReader::open(&sql_path)?; - let precursor_reader = - PrecursorReader::new(&sql_path, config.frame_splitting_params)?; + let precursor_reader = PrecursorReader::build() + .with_path(&sql_path) + .with_config(config.frame_splitting_params) + .finalize()?; let acquisition_type = frame_reader.get_acquisition(); let raw_spectrum_reader = RawSpectrumReader::new( &tdf_sql_reader, From a404bcd0937973d52cb591c5d3874797359be3a2 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 26 Jul 2024 17:10:05 +0200 Subject: [PATCH 19/27] FEAT: propagating spectrum reader errrors (pt1) --- src/io/readers/spectrum_reader.rs | 21 ++++++++++------ src/io/readers/spectrum_reader/minitdf.rs | 6 ++--- src/io/readers/spectrum_reader/tdf.rs | 25 ++++++++++++------- src/io/readers/spectrum_reader/tdf/dda.rs | 8 +++--- src/io/readers/spectrum_reader/tdf/dia.rs | 8 +++--- .../spectrum_reader/tdf/raw_spectra.rs | 7 ++++-- tests/spectrum_readers.rs | 20 ++++++++++++--- 7 files changed, 64 insertions(+), 31 deletions(-) diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index d7e8f34..d0c4249 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -26,10 +26,10 @@ impl SpectrumReader { } pub fn new(path: impl AsRef) -> Result { - Ok(Self::build().with_path(path).finalize()?) + Self::build().with_path(path).finalize() } - pub fn get(&self, index: usize) -> Spectrum { + pub fn get(&self, index: usize) -> Result { self.spectrum_reader.get(index) } @@ -41,12 +41,13 @@ impl SpectrumReader { self.spectrum_reader.len() } - pub fn get_all(&self) -> Vec { - let mut spectra: Vec = (0..self.len()) + pub fn get_all(&self) -> Vec> { + let mut spectra: Vec> = (0..self + .len()) .into_par_iter() .map(|index| self.get(index)) .collect(); - spectra.sort_by_key(|x| x.precursor.unwrap().index); + spectra.sort_by_key(|x| x.as_ref().unwrap().precursor.unwrap().index); spectra } @@ -86,7 +87,11 @@ impl SpectrumReaderBuilder { self.path.clone(), self.config.clone(), )?), - _ => panic!(), + _ => { + return Err(SpectrumReaderError::SpectrumReaderFileError( + self.path.clone(), + )) + }, }; let reader = SpectrumReader { spectrum_reader }; Ok(reader) @@ -94,7 +99,7 @@ impl SpectrumReaderBuilder { } trait SpectrumReaderTrait: Sync { - fn get(&self, index: usize) -> Spectrum; + fn get(&self, index: usize) -> Result; fn get_path(&self) -> PathBuf; fn len(&self) -> usize; fn calibrate(&mut self); @@ -106,6 +111,8 @@ pub enum SpectrumReaderError { MiniTDFSpectrumReaderError(#[from] MiniTDFSpectrumReaderError), #[error("{0}")] TDFSpectrumReaderError(#[from] TDFSpectrumReaderError), + #[error("File {0} not valid")] + SpectrumReaderFileError(PathBuf), } #[derive(Debug, Clone)] diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs index 6cbccaf..03adce4 100644 --- a/src/io/readers/spectrum_reader/minitdf.rs +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -18,7 +18,7 @@ use crate::{ utils::find_extension, }; -use super::SpectrumReaderTrait; +use super::{SpectrumReaderError, SpectrumReaderTrait}; #[derive(Debug)] pub struct MiniTDFSpectrumReader { @@ -65,7 +65,7 @@ impl MiniTDFSpectrumReader { } impl SpectrumReaderTrait for MiniTDFSpectrumReader { - fn get(&self, index: usize) -> Spectrum { + fn get(&self, index: usize) -> Result { let mut spectrum = Spectrum::default(); spectrum.index = index; let blob = self.blob_reader.get(index).unwrap(); @@ -98,7 +98,7 @@ impl SpectrumReaderTrait for MiniTDFSpectrumReader { } else { 2.0 + (precursor.mz - 700.0) / 100.0 }; //FIX? - spectrum + Ok(spectrum) } fn len(&self) -> usize { diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index ac56eff..9ee78c5 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -17,7 +17,7 @@ use crate::{ utils::find_extension, }; -use super::{SpectrumReaderConfig, SpectrumReaderTrait}; +use super::{SpectrumReaderConfig, SpectrumReaderError, SpectrumReaderTrait}; #[derive(Debug)] pub struct TDFSpectrumReader { @@ -61,22 +61,29 @@ impl TDFSpectrumReader { Ok(reader) } - pub fn read_single_raw_spectrum(&self, index: usize) -> RawSpectrum { - let raw_spectrum = self.raw_spectrum_reader.get(index); - raw_spectrum + pub fn read_single_raw_spectrum( + &self, + index: usize, + ) -> Result { + let raw_spectrum = self + .raw_spectrum_reader + .get(index)? .smooth(self.config.spectrum_processing_params.smoothing_window) - .centroid(self.config.spectrum_processing_params.centroiding_window) + .centroid( + self.config.spectrum_processing_params.centroiding_window, + ); + Ok(raw_spectrum) } } impl SpectrumReaderTrait for TDFSpectrumReader { - fn get(&self, index: usize) -> Spectrum { - let raw_spectrum = self.read_single_raw_spectrum(index); + fn get(&self, index: usize) -> Result { + let raw_spectrum = self.read_single_raw_spectrum(index).unwrap(); let spectrum = raw_spectrum.finalize( self.precursor_reader.get(index).unwrap(), &self.mz_reader, ); - spectrum + Ok(spectrum) } fn len(&self) -> usize { @@ -95,7 +102,7 @@ impl SpectrumReaderTrait for TDFSpectrumReader { let hits: Vec<(f64, u32)> = (0..self.precursor_reader.len()) .into_par_iter() .map(|index| { - let spectrum = self.read_single_raw_spectrum(index); + let spectrum = self.read_single_raw_spectrum(index).unwrap(); let precursor = self.precursor_reader.get(index).unwrap(); let precursor_mz: f64 = precursor.mz; let mut result: Vec<(f64, u32)> = vec![]; diff --git a/src/io/readers/spectrum_reader/tdf/dda.rs b/src/io/readers/spectrum_reader/tdf/dda.rs index be27104..15cb08f 100644 --- a/src/io/readers/spectrum_reader/tdf/dda.rs +++ b/src/io/readers/spectrum_reader/tdf/dda.rs @@ -9,7 +9,9 @@ use crate::{ utils::vec_utils::{argsort, group_and_sum}, }; -use super::raw_spectra::{RawSpectrum, RawSpectrumReaderTrait}; +use super::raw_spectra::{ + RawSpectrum, RawSpectrumReaderError, RawSpectrumReaderTrait, +}; #[derive(Debug)] pub struct DDARawSpectrumReader { @@ -60,7 +62,7 @@ impl DDARawSpectrumReader { } impl RawSpectrumReaderTrait for DDARawSpectrumReader { - fn get(&self, index: usize) -> RawSpectrum { + fn get(&self, index: usize) -> Result { let mut collision_energy = 0.0; let mut isolation_mz = 0.0; let mut isolation_width = 0.0; @@ -98,7 +100,7 @@ impl RawSpectrumReaderTrait for DDARawSpectrumReader { isolation_mz, isolation_width, }; - raw_spectrum + Ok(raw_spectrum) } fn len(&self) -> usize { diff --git a/src/io/readers/spectrum_reader/tdf/dia.rs b/src/io/readers/spectrum_reader/tdf/dia.rs index 1f0c9d0..2934442 100644 --- a/src/io/readers/spectrum_reader/tdf/dia.rs +++ b/src/io/readers/spectrum_reader/tdf/dia.rs @@ -13,7 +13,9 @@ use crate::{ utils::vec_utils::group_and_sum, }; -use super::raw_spectra::{RawSpectrum, RawSpectrumReaderTrait}; +use super::raw_spectra::{ + RawSpectrum, RawSpectrumReaderError, RawSpectrumReaderTrait, +}; #[derive(Debug)] pub struct DIARawSpectrumReader { @@ -51,7 +53,7 @@ impl DIARawSpectrumReader { } impl RawSpectrumReaderTrait for DIARawSpectrumReader { - fn get(&self, index: usize) -> RawSpectrum { + fn get(&self, index: usize) -> Result { let quad_settings = &self.expanded_quadrupole_settings[index]; let collision_energy = quad_settings.collision_energy[0]; @@ -77,7 +79,7 @@ impl RawSpectrumReaderTrait for DIARawSpectrumReader { isolation_mz, isolation_width, }; - raw_spectrum + Ok(raw_spectrum) } fn len(&self) -> usize { diff --git a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs index d2239b0..ac7e441 100644 --- a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs +++ b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs @@ -115,7 +115,10 @@ impl RawSpectrumReader { Ok(reader) } - pub fn get(&self, index: usize) -> RawSpectrum { + pub fn get( + &self, + index: usize, + ) -> Result { self.raw_spectrum_reader.get(index) } @@ -125,7 +128,7 @@ impl RawSpectrumReader { } pub trait RawSpectrumReaderTrait: Sync { - fn get(&self, index: usize) -> RawSpectrum; + fn get(&self, index: usize) -> Result; fn len(&self) -> usize; } diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index e931b78..32ae31a 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -25,7 +25,10 @@ fn minitdf_reader() { .with_path(file_path) .finalize() .unwrap() - .get_all(); + .get_all() + .into_iter() + .map(|x| x.unwrap()) + .collect(); let expected: Vec = vec![ Spectrum { mz_values: vec![100.0, 200.002, 300.03, 400.4], @@ -79,7 +82,10 @@ fn tdf_reader_dda() { .with_path(file_path) .finalize() .unwrap() - .get_all(); + .get_all() + .into_iter() + .map(|x| x.unwrap()) + .collect(); let expected: Vec = vec![ Spectrum { mz_values: vec![199.7633445943076], @@ -159,7 +165,10 @@ fn test_dia_even() { }) .finalize() .unwrap() - .get_all(); + .get_all() + .into_iter() + .map(|x| x.unwrap()) + .collect(); println!(">>>>> EVEN {:?}", frames.len()); @@ -188,7 +197,10 @@ fn test_dia_uniform() { }) .finalize() .unwrap() - .get_all(); + .get_all() + .into_iter() + .map(|x| x.unwrap()) + .collect(); println!(">>>>> UNIFORM {} > {:?}", i, frames.len()); for f in frames.iter() { From 7f4ab57d54521e91e56cc81f8e6107e0ef0fc8d0 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 26 Jul 2024 17:41:39 +0200 Subject: [PATCH 20/27] FIX: More error propagation for spectrumreaders --- src/io/readers/spectrum_reader/minitdf.rs | 22 +++++++++++++++++----- src/io/readers/spectrum_reader/tdf.rs | 18 +++++++++++++----- src/io/readers/spectrum_reader/tdf/dda.rs | 19 ++++++++++++++----- src/io/readers/spectrum_reader/tdf/dia.rs | 19 ++++++++++++++----- 4 files changed, 58 insertions(+), 20 deletions(-) diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs index 03adce4..80246b3 100644 --- a/src/io/readers/spectrum_reader/minitdf.rs +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -62,13 +62,14 @@ impl MiniTDFSpectrumReader { }; Ok(reader) } -} -impl SpectrumReaderTrait for MiniTDFSpectrumReader { - fn get(&self, index: usize) -> Result { + fn _get( + &self, + index: usize, + ) -> Result { let mut spectrum = Spectrum::default(); spectrum.index = index; - let blob = self.blob_reader.get(index).unwrap(); + let blob = self.blob_reader.get(index)?; if !blob.is_empty() { let size: usize = blob.len(); let spectrum_data: Vec = @@ -86,7 +87,10 @@ impl SpectrumReaderTrait for MiniTDFSpectrumReader { intensity_values.iter().map(|&x| x as f64).collect(); spectrum.mz_values = mz_values.to_vec(); } - let precursor = self.precursor_reader.get(index).unwrap(); + let precursor = self + .precursor_reader + .get(index) + .ok_or(MiniTDFSpectrumReaderError::NoPrecursor)?; spectrum.precursor = Some(precursor); spectrum.index = precursor.index; spectrum.collision_energy = self.collision_energies[index]; @@ -100,6 +104,12 @@ impl SpectrumReaderTrait for MiniTDFSpectrumReader { }; //FIX? Ok(spectrum) } +} + +impl SpectrumReaderTrait for MiniTDFSpectrumReader { + fn get(&self, index: usize) -> Result { + Ok(self._get(index)?) + } fn len(&self) -> usize { self.precursor_reader.len() @@ -124,4 +134,6 @@ pub enum MiniTDFSpectrumReaderError { IndexedTdfBlobReaderError(#[from] IndexedTdfBlobReaderError), #[error("{0}")] FileNotFound(String), + #[error("No precursor")] + NoPrecursor, } diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index 9ee78c5..06cffb7 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -74,17 +74,23 @@ impl TDFSpectrumReader { ); Ok(raw_spectrum) } -} -impl SpectrumReaderTrait for TDFSpectrumReader { - fn get(&self, index: usize) -> Result { - let raw_spectrum = self.read_single_raw_spectrum(index).unwrap(); + fn _get(&self, index: usize) -> Result { + let raw_spectrum = self.read_single_raw_spectrum(index)?; let spectrum = raw_spectrum.finalize( - self.precursor_reader.get(index).unwrap(), + self.precursor_reader + .get(index) + .ok_or(TDFSpectrumReaderError::NoPrecursor)?, &self.mz_reader, ); Ok(spectrum) } +} + +impl SpectrumReaderTrait for TDFSpectrumReader { + fn get(&self, index: usize) -> Result { + Ok(self._get(index)?) + } fn len(&self) -> usize { debug_assert_eq!( @@ -144,4 +150,6 @@ pub enum TDFSpectrumReaderError { RawSpectrumReaderError(#[from] RawSpectrumReaderError), #[error("{0}")] FileNotFound(String), + #[error("No precursor")] + NoPrecursor, } diff --git a/src/io/readers/spectrum_reader/tdf/dda.rs b/src/io/readers/spectrum_reader/tdf/dda.rs index 15cb08f..2434192 100644 --- a/src/io/readers/spectrum_reader/tdf/dda.rs +++ b/src/io/readers/spectrum_reader/tdf/dda.rs @@ -4,7 +4,7 @@ use crate::{ pasef_frame_msms::SqlPasefFrameMsMs, ReadableSqlTable, SqlError, SqlReader, }, - FrameReader, + FrameReader, FrameReaderError, }, utils::vec_utils::{argsort, group_and_sum}, }; @@ -59,10 +59,11 @@ impl DDARawSpectrumReader { .iter() .map(|&x| &self.pasef_frames[x]) } -} -impl RawSpectrumReaderTrait for DDARawSpectrumReader { - fn get(&self, index: usize) -> Result { + fn _get( + &self, + index: usize, + ) -> Result { let mut collision_energy = 0.0; let mut isolation_mz = 0.0; let mut isolation_width = 0.0; @@ -73,7 +74,7 @@ impl RawSpectrumReaderTrait for DDARawSpectrumReader { isolation_mz = pasef_frame.isolation_mz; isolation_width = pasef_frame.isolation_width; let frame_index: usize = pasef_frame.frame - 1; - let frame = self.frame_reader.get(frame_index).unwrap(); + let frame = self.frame_reader.get(frame_index)?; if frame.intensities.len() == 0 { continue; } @@ -102,6 +103,12 @@ impl RawSpectrumReaderTrait for DDARawSpectrumReader { }; Ok(raw_spectrum) } +} + +impl RawSpectrumReaderTrait for DDARawSpectrumReader { + fn get(&self, index: usize) -> Result { + Ok(self._get(index)?) + } fn len(&self) -> usize { self.offsets.len() - 1 @@ -112,4 +119,6 @@ impl RawSpectrumReaderTrait for DDARawSpectrumReader { pub enum DDARawSpectrumReaderError { #[error("{0}")] SqlError(#[from] SqlError), + #[error("{0}")] + FrameReaderError(#[from] FrameReaderError), } diff --git a/src/io/readers/spectrum_reader/tdf/dia.rs b/src/io/readers/spectrum_reader/tdf/dia.rs index 2934442..7fd9ded 100644 --- a/src/io/readers/spectrum_reader/tdf/dia.rs +++ b/src/io/readers/spectrum_reader/tdf/dia.rs @@ -1,7 +1,7 @@ use crate::io::readers::tdf_utils::{ expand_quadrupole_settings, expand_window_settings, }; -use crate::io::readers::FrameWindowSplittingStrategy; +use crate::io::readers::{FrameReaderError, FrameWindowSplittingStrategy}; use crate::{ io::readers::{ file_readers::sql_reader::{ @@ -50,10 +50,11 @@ impl DIARawSpectrumReader { }; Ok(reader) } -} -impl RawSpectrumReaderTrait for DIARawSpectrumReader { - fn get(&self, index: usize) -> Result { + fn _get( + &self, + index: usize, + ) -> Result { let quad_settings = &self.expanded_quadrupole_settings[index]; let collision_energy = quad_settings.collision_energy[0]; @@ -62,7 +63,7 @@ impl RawSpectrumReaderTrait for DIARawSpectrumReader { let scan_start = quad_settings.scan_starts[0]; let scan_end = quad_settings.scan_ends[0]; let frame_index = quad_settings.index - 1; - let frame = self.frame_reader.get(frame_index).unwrap(); + let frame = self.frame_reader.get(frame_index)?; let offset_start = frame.scan_offsets[scan_start] as usize; let offset_end = frame.scan_offsets[scan_end] as usize; let tof_indices = &frame.tof_indices[offset_start..offset_end]; @@ -81,6 +82,12 @@ impl RawSpectrumReaderTrait for DIARawSpectrumReader { }; Ok(raw_spectrum) } +} + +impl RawSpectrumReaderTrait for DIARawSpectrumReader { + fn get(&self, index: usize) -> Result { + Ok(self._get(index)?) + } fn len(&self) -> usize { self.expanded_quadrupole_settings.len() @@ -93,4 +100,6 @@ pub enum DIARawSpectrumReaderError { SqlError(#[from] SqlError), #[error("{0}")] QuadrupoleSettingsReaderError(#[from] QuadrupoleSettingsReaderError), + #[error("{0}")] + FrameReaderError(#[from] FrameReaderError), } From ed57539fef9a3a281c1bfc8c368df01c7926f267 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 29 Jul 2024 13:17:25 +0200 Subject: [PATCH 21/27] FIX: More error propagation for SpectrumReaders --- src/io/readers/spectrum_reader.rs | 11 +++++------ src/io/readers/spectrum_reader/minitdf.rs | 9 +++++++-- tests/spectrum_readers.rs | 16 ++++------------ 3 files changed, 16 insertions(+), 20 deletions(-) diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index d0c4249..f0b1d6a 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -41,14 +41,13 @@ impl SpectrumReader { self.spectrum_reader.len() } - pub fn get_all(&self) -> Vec> { - let mut spectra: Vec> = (0..self - .len()) + pub fn get_all(&self) -> Result, SpectrumReaderError> { + let mut spectra: Vec = (0..self.len()) .into_par_iter() .map(|index| self.get(index)) - .collect(); - spectra.sort_by_key(|x| x.as_ref().unwrap().precursor.unwrap().index); - spectra + .collect::, _>>()?; + spectra.sort_by_key(|x| x.precursor.unwrap_or_default().index); + Ok(spectra) } pub fn calibrate(&mut self) { diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs index 80246b3..e5cc23c 100644 --- a/src/io/readers/spectrum_reader/minitdf.rs +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -72,8 +72,11 @@ impl MiniTDFSpectrumReader { let blob = self.blob_reader.get(index)?; if !blob.is_empty() { let size: usize = blob.len(); - let spectrum_data: Vec = - (0..size).map(|i| blob.get(i).unwrap()).collect(); + let spectrum_data: Vec = (0..size) + .map(|i| { + blob.get(i).ok_or(MiniTDFSpectrumReaderError::BlobError) + }) + .collect::, _>>()?; let scan_count: usize = blob.len() / 3; let tof_indices_bytes: &[u32] = &spectrum_data[..scan_count as usize * 2]; @@ -136,4 +139,6 @@ pub enum MiniTDFSpectrumReaderError { FileNotFound(String), #[error("No precursor")] NoPrecursor, + #[error("BlobError")] + BlobError, } diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index 32ae31a..8d6966e 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -26,9 +26,7 @@ fn minitdf_reader() { .finalize() .unwrap() .get_all() - .into_iter() - .map(|x| x.unwrap()) - .collect(); + .unwrap(); let expected: Vec = vec![ Spectrum { mz_values: vec![100.0, 200.002, 300.03, 400.4], @@ -83,9 +81,7 @@ fn tdf_reader_dda() { .finalize() .unwrap() .get_all() - .into_iter() - .map(|x| x.unwrap()) - .collect(); + .unwrap(); let expected: Vec = vec![ Spectrum { mz_values: vec![199.7633445943076], @@ -166,9 +162,7 @@ fn test_dia_even() { .finalize() .unwrap() .get_all() - .into_iter() - .map(|x| x.unwrap()) - .collect(); + .unwrap(); println!(">>>>> EVEN {:?}", frames.len()); @@ -198,9 +192,7 @@ fn test_dia_uniform() { .finalize() .unwrap() .get_all() - .into_iter() - .map(|x| x.unwrap()) - .collect(); + .unwrap(); println!(">>>>> UNIFORM {} > {:?}", i, frames.len()); for f in frames.iter() { From bea23c250799d22fd37584ab58f5d85b3bd14849 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 29 Jul 2024 13:17:53 +0200 Subject: [PATCH 22/27] CHORE: flagged todo items for error propagation --- src/io/readers/spectrum_reader/tdf.rs | 1 + src/io/readers/tdf_utils.rs | 4 ++-- src/io/writers/mgf.rs | 1 + src/ms_data/spectra.rs | 1 + 4 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index 06cffb7..0891b8c 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -108,6 +108,7 @@ impl SpectrumReaderTrait for TDFSpectrumReader { let hits: Vec<(f64, u32)> = (0..self.precursor_reader.len()) .into_par_iter() .map(|index| { + // TODO let spectrum = self.read_single_raw_spectrum(index).unwrap(); let precursor = self.precursor_reader.get(index).unwrap(); let precursor_mz: f64 = precursor.mz; diff --git a/src/io/readers/tdf_utils.rs b/src/io/readers/tdf_utils.rs index 4e59968..5ea7064 100644 --- a/src/io/readers/tdf_utils.rs +++ b/src/io/readers/tdf_utils.rs @@ -90,8 +90,8 @@ pub fn expand_window_settings( let frame = window_group.frame; let group = &quadrupole_settings[window as usize - 1]; let window_group_start = - group.scan_starts.iter().min().unwrap().clone(); - let window_group_end = group.scan_ends.iter().max().unwrap().clone(); + group.scan_starts.iter().min().unwrap().clone(); // SqlReader cannot return empty vecs, so always succeeds + let window_group_end = group.scan_ends.iter().max().unwrap().clone(); // SqlReader cannot return empty vecs, so always succeeds for (sws, swe) in scan_range_subsplit(window_group_start, window_group_end, &strategy) diff --git a/src/io/writers/mgf.rs b/src/io/writers/mgf.rs index 0ad5ef0..715a5ed 100644 --- a/src/io/writers/mgf.rs +++ b/src/io/writers/mgf.rs @@ -30,6 +30,7 @@ pub struct MGFEntry; impl MGFEntry { pub fn write_header(spectrum: &Spectrum) -> String { + // TODO let precursor = spectrum.precursor.unwrap(); let title = precursor.index; let intensity = precursor.intensity.unwrap_or(0.0); diff --git a/src/ms_data/spectra.rs b/src/ms_data/spectra.rs index 7ffb9f7..36e2c33 100644 --- a/src/ms_data/spectra.rs +++ b/src/ms_data/spectra.rs @@ -17,6 +17,7 @@ impl Spectrum { let top_n = if n == 0 { self.len() } else { n }; let mut indexed: Vec<(f64, usize)> = self.intensities.iter().cloned().zip(0..).collect(); + // TODO indexed.sort_by(|a, b| { b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal) }); From 3fbfca37fc7925fb87340160190378a9e34415a7 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 29 Jul 2024 13:24:10 +0200 Subject: [PATCH 23/27] FEAT: added calibration option to spectrumreaderconfig --- src/io/readers/spectrum_reader.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index f0b1d6a..53bfa1f 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -92,7 +92,10 @@ impl SpectrumReaderBuilder { )) }, }; - let reader = SpectrumReader { spectrum_reader }; + let mut reader = SpectrumReader { spectrum_reader }; + if self.config.spectrum_processing_params.calibrate { + reader.calibrate(); + } Ok(reader) } } @@ -119,6 +122,7 @@ pub struct SpectrumProcessingParams { smoothing_window: u32, centroiding_window: u32, calibration_tolerance: f64, + calibrate: bool, } impl Default for SpectrumProcessingParams { @@ -127,6 +131,7 @@ impl Default for SpectrumProcessingParams { smoothing_window: 1, centroiding_window: 1, calibration_tolerance: 0.1, + calibrate: false, } } } From b4dc3bacdc4e21760dd2bfd4dc111f6c2bc9fb21 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 29 Jul 2024 13:24:31 +0200 Subject: [PATCH 24/27] FEAT: improved error options --- src/errors.rs | 45 ++++++++------------------ src/io/readers/quad_settings_reader.rs | 4 --- 2 files changed, 14 insertions(+), 35 deletions(-) diff --git a/src/errors.rs b/src/errors.rs index 82ffef8..f8c713b 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -1,36 +1,19 @@ -#[derive(thiserror::Error, Debug)] -pub enum FileFormatError { - #[error("DirectoryDoesNotExist")] - DirectoryDoesNotExist, - #[error("NoParentWithBrukerExtension")] - NoParentWithBrukerExtension, - #[error("BinaryFilesAreMissing")] - BinaryFilesAreMissing, - #[error("MetadataFilesAreMissing")] - MetadataFilesAreMissing, -} +use crate::io::readers::{ + FrameReaderError, MetadataReaderError, PrecursorReaderError, + QuadrupoleSettingsReaderError, SpectrumReaderError, +}; /// An error that is produced by timsrust (uses [thiserror]). #[derive(thiserror::Error, Debug)] pub enum Error { - /// An error to indicate a path is not a Bruker File Format. - #[error("FileFormatError: {0}")] - FileFormatError(#[from] FileFormatError), - // #[error("SqlError: {0}")] - // SqlError(#[from] SqlError), - // #[error("BinError: {0}")] - // BinError(#[from] TdfBlobError), -} - -#[macro_export] -macro_rules! propagated_error_enum { - ($name:ident, $($variant:ident),+) => { - #[derive(Debug, thiserror::Error)] - pub enum $name { - $( - #[error(transparent)] - $variant(#[from] $variant), - )+ - } - }; + #[error("{0}")] + FrameReaderError(#[from] FrameReaderError), + #[error("{0}")] + SpectrumReaderError(#[from] SpectrumReaderError), + #[error("{0}")] + MetadataReaderError(#[from] MetadataReaderError), + #[error("{0}")] + PrecursorReaderError(#[from] PrecursorReaderError), + #[error("{0}")] + QuadrupoleSettingsReaderError(#[from] QuadrupoleSettingsReaderError), } diff --git a/src/io/readers/quad_settings_reader.rs b/src/io/readers/quad_settings_reader.rs index 8a62d3c..8f71932 100644 --- a/src/io/readers/quad_settings_reader.rs +++ b/src/io/readers/quad_settings_reader.rs @@ -86,10 +86,6 @@ impl QuadrupoleSettingsReader { #[derive(Debug, thiserror::Error)] pub enum QuadrupoleSettingsReaderError { - // #[error("{0}")] - // MiniTDFPrecursorReaderError(#[from] MiniTDFPrecursorReaderError), - // #[error("{0}")] - // TDFPrecursorReaderError(#[from] TDFPrecursorReaderError), #[error("{0}")] SqlError(#[from] SqlError), } From d9e98c12dfa73925971cfda5e58696c80703ba89 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 29 Jul 2024 16:52:29 +0200 Subject: [PATCH 25/27] FEAT: moved tdf_utils to quadrupole_reader and cleaned up --- src/io/readers.rs | 2 - src/io/readers/frame_reader.rs | 12 +- src/io/readers/precursor_reader.rs | 2 +- src/io/readers/precursor_reader/tdf.rs | 2 +- src/io/readers/precursor_reader/tdf/dia.rs | 32 +-- src/io/readers/quad_settings_reader.rs | 203 +++++++++++++++++- src/io/readers/spectrum_reader.rs | 15 +- src/io/readers/spectrum_reader/tdf.rs | 4 +- src/io/readers/spectrum_reader/tdf/dia.rs | 32 +-- .../spectrum_reader/tdf/raw_spectra.rs | 16 +- src/io/readers/tdf_utils.rs | 173 --------------- src/ms_data/quadrupole.rs | 6 + 12 files changed, 244 insertions(+), 255 deletions(-) delete mode 100644 src/io/readers/tdf_utils.rs diff --git a/src/io/readers.rs b/src/io/readers.rs index 7e350a5..03d5248 100644 --- a/src/io/readers.rs +++ b/src/io/readers.rs @@ -4,11 +4,9 @@ mod metadata_reader; mod precursor_reader; mod quad_settings_reader; mod spectrum_reader; -mod tdf_utils; pub use frame_reader::*; pub use metadata_reader::*; pub use precursor_reader::*; pub use quad_settings_reader::*; pub use spectrum_reader::*; -pub use tdf_utils::QuadWindowExpansionStrategy; diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index a5d09a0..af9b713 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -19,8 +19,7 @@ use super::{ }, tdf_blob_reader::{TdfBlob, TdfBlobReader, TdfBlobReaderError}, }, - FrameWindowSplittingStrategy, QuadrupoleSettingsReader, - QuadrupoleSettingsReaderError, + QuadrupoleSettingsReader, QuadrupoleSettingsReaderError, }; #[derive(Debug)] @@ -31,7 +30,6 @@ pub struct FrameReader { acquisition: AcquisitionType, window_groups: Vec, quadrupole_settings: Vec>, - pub splitting_strategy: FrameWindowSplittingStrategy, } impl FrameReader { @@ -76,18 +74,10 @@ impl FrameReader { .into_iter() .map(|x| Arc::new(x)) .collect(), - splitting_strategy: FrameWindowSplittingStrategy::default(), }; Ok(reader) } - pub fn set_splitting_strategy( - &mut self, - config: &FrameWindowSplittingStrategy, - ) { - self.splitting_strategy = *config; - } - pub fn parallel_filter<'a, F: Fn(&SqlFrame) -> bool + Sync + Send + 'a>( &'a self, predicate: F, diff --git a/src/io/readers/precursor_reader.rs b/src/io/readers/precursor_reader.rs index c91d615..e4750c5 100644 --- a/src/io/readers/precursor_reader.rs +++ b/src/io/readers/precursor_reader.rs @@ -9,7 +9,7 @@ use tdf::{TDFPrecursorReader, TDFPrecursorReaderError}; use crate::ms_data::Precursor; -use super::FrameWindowSplittingStrategy; +use super::quad_settings_reader::FrameWindowSplittingStrategy; pub struct PrecursorReader { precursor_reader: Box, diff --git a/src/io/readers/precursor_reader/tdf.rs b/src/io/readers/precursor_reader/tdf.rs index 2f92a78..34efef1 100644 --- a/src/io/readers/precursor_reader/tdf.rs +++ b/src/io/readers/precursor_reader/tdf.rs @@ -9,7 +9,7 @@ use dia::{DIATDFPrecursorReader, DIATDFPrecursorReaderError}; use crate::{ io::readers::{ file_readers::sql_reader::{SqlError, SqlReader}, - FrameWindowSplittingStrategy, + quad_settings_reader::FrameWindowSplittingStrategy, }, ms_data::{AcquisitionType, Precursor}, }; diff --git a/src/io/readers/precursor_reader/tdf/dia.rs b/src/io/readers/precursor_reader/tdf/dia.rs index c7b23df..9eacaa7 100644 --- a/src/io/readers/precursor_reader/tdf/dia.rs +++ b/src/io/readers/precursor_reader/tdf/dia.rs @@ -1,17 +1,12 @@ use std::path::Path; -use crate::io::readers::tdf_utils::{ - expand_quadrupole_settings, expand_window_settings, -}; -use crate::io::readers::FrameWindowSplittingStrategy; +use crate::io::readers::quad_settings_reader::FrameWindowSplittingStrategy; use crate::{ domain_converters::{ ConvertableDomain, Frame2RtConverter, Scan2ImConverter, }, io::readers::{ - file_readers::sql_reader::{ - frame_groups::SqlWindowGroup, ReadableSqlTable, SqlError, SqlReader, - }, + file_readers::sql_reader::{SqlError, SqlReader}, MetadataReader, MetadataReaderError, QuadrupoleSettingsReader, QuadrupoleSettingsReaderError, }, @@ -30,29 +25,18 @@ pub struct DIATDFPrecursorReader { impl DIATDFPrecursorReader { pub fn new( path: impl AsRef, - splitting_strat: FrameWindowSplittingStrategy, + splitting_strategy: FrameWindowSplittingStrategy, ) -> Result { let sql_path = path.as_ref(); let tdf_sql_reader = SqlReader::open(sql_path)?; let metadata = MetadataReader::new(&path)?; let rt_converter: Frame2RtConverter = metadata.rt_converter; let im_converter: Scan2ImConverter = metadata.im_converter; - let window_groups = SqlWindowGroup::from_sql_reader(&tdf_sql_reader)?; - let quadrupole_settings = - QuadrupoleSettingsReader::new(tdf_sql_reader.get_path())?; - let expanded_quadrupole_settings = match splitting_strat { - FrameWindowSplittingStrategy::Quadrupole(x) => { - expand_quadrupole_settings( - &window_groups, - &quadrupole_settings, - &x, - ) - }, - FrameWindowSplittingStrategy::Window(x) => { - expand_window_settings(&window_groups, &quadrupole_settings, &x) - }, - }; - + let expanded_quadrupole_settings = + QuadrupoleSettingsReader::from_splitting( + tdf_sql_reader.get_path(), + splitting_strategy, + )?; let reader = Self { expanded_quadrupole_settings, rt_converter, diff --git a/src/io/readers/quad_settings_reader.rs b/src/io/readers/quad_settings_reader.rs index 8f71932..6f5f398 100644 --- a/src/io/readers/quad_settings_reader.rs +++ b/src/io/readers/quad_settings_reader.rs @@ -3,7 +3,8 @@ use std::path::Path; use crate::{ms_data::QuadrupoleSettings, utils::vec_utils::argsort}; use super::file_readers::sql_reader::{ - quad_settings::SqlQuadSettings, ReadableSqlTable, SqlError, SqlReader, + frame_groups::SqlWindowGroup, quad_settings::SqlQuadSettings, + ReadableSqlTable, SqlError, SqlReader, }; pub struct QuadrupoleSettingsReader { @@ -17,6 +18,12 @@ impl QuadrupoleSettingsReader { ) -> Result, QuadrupoleSettingsReaderError> { let sql_path = path.as_ref(); let tdf_sql_reader = SqlReader::open(&sql_path)?; + Self::from_sql_settings(&tdf_sql_reader) + } + + pub fn from_sql_settings( + tdf_sql_reader: &SqlReader, + ) -> Result, QuadrupoleSettingsReaderError> { let sql_quadrupole_settings = SqlQuadSettings::from_sql_reader(&tdf_sql_reader)?; let window_group_count = sql_quadrupole_settings @@ -40,6 +47,29 @@ impl QuadrupoleSettingsReader { Ok(quad_reader.quadrupole_settings) } + pub fn from_splitting( + path: impl AsRef, + splitting_strat: FrameWindowSplittingStrategy, + ) -> Result, QuadrupoleSettingsReaderError> { + let sql_path = path.as_ref(); + let tdf_sql_reader = SqlReader::open(&sql_path)?; + let quadrupole_settings = Self::from_sql_settings(&tdf_sql_reader)?; + let window_groups = SqlWindowGroup::from_sql_reader(&tdf_sql_reader)?; + let expanded_quadrupole_settings = match splitting_strat { + FrameWindowSplittingStrategy::Quadrupole(x) => { + expand_quadrupole_settings( + &window_groups, + &quadrupole_settings, + &x, + ) + }, + FrameWindowSplittingStrategy::Window(x) => { + expand_window_settings(&window_groups, &quadrupole_settings, &x) + }, + }; + Ok(expanded_quadrupole_settings) + } + fn update_from_sql_quadrupole_settings(&mut self) { for window_group in self.sql_quadrupole_settings.iter() { let group = window_group.window_group - 1; @@ -89,3 +119,174 @@ pub enum QuadrupoleSettingsReaderError { #[error("{0}")] SqlError(#[from] SqlError), } + +type SpanStep = (usize, usize); + +/// Strategy for expanding quadrupole settings +/// +/// This enum is used to determine how to expand quadrupole settings +/// when reading in DIA data. And exporting spectra (not frames RN). +/// +/// # Variants +/// +/// For example if we have a window with scan start 50 and end 500 +/// +/// * `None` - Do not expand quadrupole settings; use the original settings +/// * `Even(usize)` - Split the quadrupole settings into `usize` evenly spaced +/// subwindows; e.g. if `usize` is 2, the window will be split into 2 subwindows +/// of equal width. +/// * `Uniform(SpanStep)` - Split the quadrupole settings into subwindows of +/// width `SpanStep.0` and step `SpanStep.1`; e.g. if `SpanStep` is (100, 50), +/// the window will be split into subwindows of width 100 and step 50 between their +/// scan start and end. +/// +#[derive(Debug, Copy, Clone)] +pub enum QuadWindowExpansionStrategy { + None, + Even(usize), + Uniform(SpanStep), +} + +#[derive(Debug, Clone, Copy)] +pub enum FrameWindowSplittingStrategy { + Quadrupole(QuadWindowExpansionStrategy), + Window(QuadWindowExpansionStrategy), +} + +impl Default for FrameWindowSplittingStrategy { + fn default() -> Self { + Self::Quadrupole(QuadWindowExpansionStrategy::Even(1)) + } +} + +fn scan_range_subsplit( + start: usize, + end: usize, + strategy: &QuadWindowExpansionStrategy, +) -> Vec<(usize, usize)> { + let out = match strategy { + QuadWindowExpansionStrategy::None => { + vec![(start, end)] + }, + QuadWindowExpansionStrategy::Even(num_splits) => { + let sub_subwindow_width = (end - start) / (num_splits + 1); + let mut out = Vec::new(); + for sub_subwindow in 0..num_splits.clone() { + let sub_subwindow_scan_start = + start + (sub_subwindow_width * sub_subwindow); + let sub_subwindow_scan_end = + start + (sub_subwindow_width * (sub_subwindow + 2)); + + out.push((sub_subwindow_scan_start, sub_subwindow_scan_end)) + } + out + }, + QuadWindowExpansionStrategy::Uniform((span, step)) => { + let mut curr_start = start.clone(); + let mut curr_end = start + span; + let mut out = Vec::new(); + while curr_end < end { + out.push((curr_start, curr_end)); + curr_start += step; + curr_end += step; + } + if curr_start < end { + out.push((curr_start, end)); + } + out + }, + }; + + debug_assert!( + out.iter().all(|(s, e)| s < e), + "Invalid scan range: {:?}", + out + ); + debug_assert!( + out.iter().all(|(s, e)| *s >= start && *e <= end), + "Invalid scan range: {:?}", + out + ); + out +} + +fn expand_window_settings( + window_groups: &[SqlWindowGroup], + quadrupole_settings: &[QuadrupoleSettings], + strategy: &QuadWindowExpansionStrategy, +) -> Vec { + let mut expanded_quadrupole_settings: Vec = vec![]; + for window_group in window_groups { + let window = window_group.window_group; + let frame = window_group.frame; + let group = &quadrupole_settings[window as usize - 1]; + let window_group_start = + group.scan_starts.iter().min().unwrap().clone(); // SqlReader cannot return empty vecs, so always succeeds + let window_group_end = group.scan_ends.iter().max().unwrap().clone(); // SqlReader cannot return empty vecs, so always succeeds + for (sws, swe) in + scan_range_subsplit(window_group_start, window_group_end, &strategy) + { + let mut mz_min = std::f64::MAX; + let mut mz_max = std::f64::MIN; + let mut nce_sum = 0.0; + let mut total_scan_width = 0.0; + for i in 0..group.len() { + let gss = group.scan_starts[i]; + let gse = group.scan_ends[i]; + if (swe <= gse) || (gss <= sws) { + continue; + } + let half_isolation_width = group.isolation_width[i] / 2.0; + let isolation_mz = group.isolation_mz[i]; + mz_min = mz_min.min(isolation_mz - half_isolation_width); + mz_max = mz_max.max(isolation_mz + half_isolation_width); + let scan_width = (gse.min(swe) - gss.max(sws)) as f64; + nce_sum += group.collision_energy[i] * scan_width; + total_scan_width += scan_width + } + let sub_quad_settings = QuadrupoleSettings { + index: frame, + scan_starts: vec![sws], + scan_ends: vec![swe], + isolation_mz: vec![(mz_min + mz_max) / 2.0], + isolation_width: vec![mz_min - mz_max], + collision_energy: vec![nce_sum / total_scan_width], + }; + expanded_quadrupole_settings.push(sub_quad_settings) + } + } + expanded_quadrupole_settings +} + +fn expand_quadrupole_settings( + window_groups: &[SqlWindowGroup], + quadrupole_settings: &[QuadrupoleSettings], + strategy: &QuadWindowExpansionStrategy, +) -> Vec { + let mut expanded_quadrupole_settings: Vec = vec![]; + for window_group in window_groups { + let window = window_group.window_group; + let frame = window_group.frame; + let group = &quadrupole_settings[window as usize - 1]; + for sub_window in 0..group.isolation_mz.len() { + let subwindow_scan_start = group.scan_starts[sub_window]; + let subwindow_scan_end = group.scan_ends[sub_window]; + for (sws, swe) in scan_range_subsplit( + subwindow_scan_start, + subwindow_scan_end, + &strategy, + ) { + let sub_quad_settings = QuadrupoleSettings { + index: frame, + scan_starts: vec![sws], + scan_ends: vec![swe], + isolation_mz: vec![group.isolation_mz[sub_window]], + isolation_width: vec![group.isolation_width[sub_window]], + collision_energy: vec![group.collision_energy[sub_window]], + }; + expanded_quadrupole_settings.push(sub_quad_settings) + } + } + } + expanded_quadrupole_settings +} diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index 53bfa1f..ef5f070 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -7,9 +7,10 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator}; use std::path::{Path, PathBuf}; use tdf::{TDFSpectrumReader, TDFSpectrumReaderError}; -use crate::io::readers::tdf_utils::QuadWindowExpansionStrategy; use crate::ms_data::Spectrum; +use super::FrameWindowSplittingStrategy; + pub struct SpectrumReader { spectrum_reader: Box, } @@ -136,18 +137,6 @@ impl Default for SpectrumProcessingParams { } } -#[derive(Debug, Clone, Copy)] -pub enum FrameWindowSplittingStrategy { - Quadrupole(QuadWindowExpansionStrategy), - Window(QuadWindowExpansionStrategy), -} - -impl Default for FrameWindowSplittingStrategy { - fn default() -> Self { - Self::Quadrupole(QuadWindowExpansionStrategy::Even(1)) - } -} - #[derive(Debug, Default, Clone)] pub struct SpectrumReaderConfig { pub spectrum_processing_params: SpectrumProcessingParams, diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index 0891b8c..7f29748 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -33,8 +33,7 @@ impl TDFSpectrumReader { path_name: impl AsRef, config: SpectrumReaderConfig, ) -> Result { - let mut frame_reader: FrameReader = FrameReader::new(&path_name)?; - frame_reader.set_splitting_strategy(&config.frame_splitting_params); + let frame_reader: FrameReader = FrameReader::new(&path_name)?; let sql_path = find_extension(&path_name, "analysis.tdf").ok_or( TDFSpectrumReaderError::FileNotFound("analysis.tdf".to_string()), )?; @@ -50,6 +49,7 @@ impl TDFSpectrumReader { &tdf_sql_reader, frame_reader, acquisition_type, + config.frame_splitting_params, )?; let reader = Self { path: path_name.as_ref().to_path_buf(), diff --git a/src/io/readers/spectrum_reader/tdf/dia.rs b/src/io/readers/spectrum_reader/tdf/dia.rs index 7fd9ded..3313acf 100644 --- a/src/io/readers/spectrum_reader/tdf/dia.rs +++ b/src/io/readers/spectrum_reader/tdf/dia.rs @@ -1,12 +1,8 @@ -use crate::io::readers::tdf_utils::{ - expand_quadrupole_settings, expand_window_settings, -}; -use crate::io::readers::{FrameReaderError, FrameWindowSplittingStrategy}; +use crate::io::readers::quad_settings_reader::FrameWindowSplittingStrategy; +use crate::io::readers::FrameReaderError; use crate::{ io::readers::{ - file_readers::sql_reader::{ - frame_groups::SqlWindowGroup, ReadableSqlTable, SqlError, SqlReader, - }, + file_readers::sql_reader::{SqlError, SqlReader}, FrameReader, QuadrupoleSettingsReader, QuadrupoleSettingsReaderError, }, ms_data::QuadrupoleSettings, @@ -27,23 +23,13 @@ impl DIARawSpectrumReader { pub fn new( tdf_sql_reader: &SqlReader, frame_reader: FrameReader, + splitting_strategy: FrameWindowSplittingStrategy, ) -> Result { - let window_groups = SqlWindowGroup::from_sql_reader(&tdf_sql_reader)?; - let quadrupole_settings = - QuadrupoleSettingsReader::new(&tdf_sql_reader.get_path())?; - let expanded_quadrupole_settings = match frame_reader.splitting_strategy - { - FrameWindowSplittingStrategy::Quadrupole(x) => { - expand_quadrupole_settings( - &window_groups, - &quadrupole_settings, - &x, - ) - }, - FrameWindowSplittingStrategy::Window(x) => { - expand_window_settings(&window_groups, &quadrupole_settings, &x) - }, - }; + let expanded_quadrupole_settings = + QuadrupoleSettingsReader::from_splitting( + tdf_sql_reader.get_path(), + splitting_strategy, + )?; let reader = Self { expanded_quadrupole_settings, frame_reader, diff --git a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs index ac7e441..95b4a46 100644 --- a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs +++ b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs @@ -2,7 +2,10 @@ use core::fmt; use crate::{ domain_converters::{ConvertableDomain, Tof2MzConverter}, - io::readers::{file_readers::sql_reader::SqlReader, FrameReader}, + io::readers::{ + file_readers::sql_reader::SqlReader, + quad_settings_reader::FrameWindowSplittingStrategy, FrameReader, + }, ms_data::{AcquisitionType, Precursor, Spectrum}, utils::vec_utils::{filter_with_mask, find_sparse_local_maxima_mask}, }; @@ -94,15 +97,20 @@ impl RawSpectrumReader { tdf_sql_reader: &SqlReader, frame_reader: FrameReader, acquisition_type: AcquisitionType, + splitting_strategy: FrameWindowSplittingStrategy, ) -> Result { let raw_spectrum_reader: Box = match acquisition_type { AcquisitionType::DDAPASEF => Box::new( DDARawSpectrumReader::new(tdf_sql_reader, frame_reader)?, ), - AcquisitionType::DIAPASEF => Box::new( - DIARawSpectrumReader::new(tdf_sql_reader, frame_reader)?, - ), + AcquisitionType::DIAPASEF => { + Box::new(DIARawSpectrumReader::new( + tdf_sql_reader, + frame_reader, + splitting_strategy, + )?) + }, acquisition_type => { return Err(RawSpectrumReaderError::UnsupportedAcquisition( format!("{:?}", acquisition_type), diff --git a/src/io/readers/tdf_utils.rs b/src/io/readers/tdf_utils.rs deleted file mode 100644 index 5ea7064..0000000 --- a/src/io/readers/tdf_utils.rs +++ /dev/null @@ -1,173 +0,0 @@ -use crate::io::readers::file_readers::sql_reader::frame_groups::SqlWindowGroup; -use crate::ms_data::QuadrupoleSettings; - -type SpanStep = (usize, usize); - -/// Strategy for expanding quadrupole settings -/// -/// This enum is used to determine how to expand quadrupole settings -/// when reading in DIA data. And exporting spectra (not frames RN). -/// -/// # Variants -/// -/// For example if we have a window with scan start 50 and end 500 -/// -/// * `None` - Do not expand quadrupole settings; use the original settings -/// * `Even(usize)` - Split the quadrupole settings into `usize` evenly spaced -/// subwindows; e.g. if `usize` is 2, the window will be split into 2 subwindows -/// of equal width. -/// * `Uniform(SpanStep)` - Split the quadrupole settings into subwindows of -/// width `SpanStep.0` and step `SpanStep.1`; e.g. if `SpanStep` is (100, 50), -/// the window will be split into subwindows of width 100 and step 50 between their -/// scan start and end. -/// -#[derive(Debug, Copy, Clone)] -pub enum QuadWindowExpansionStrategy { - None, - Even(usize), - Uniform(SpanStep), -} - -fn scan_range_subsplit( - start: usize, - end: usize, - strategy: &QuadWindowExpansionStrategy, -) -> Vec<(usize, usize)> { - let out = match strategy { - QuadWindowExpansionStrategy::None => { - vec![(start, end)] - }, - QuadWindowExpansionStrategy::Even(num_splits) => { - let sub_subwindow_width = (end - start) / (num_splits + 1); - let mut out = Vec::new(); - for sub_subwindow in 0..num_splits.clone() { - let sub_subwindow_scan_start = - start + (sub_subwindow_width * sub_subwindow); - let sub_subwindow_scan_end = - start + (sub_subwindow_width * (sub_subwindow + 2)); - - out.push((sub_subwindow_scan_start, sub_subwindow_scan_end)) - } - out - }, - QuadWindowExpansionStrategy::Uniform((span, step)) => { - let mut curr_start = start.clone(); - let mut curr_end = start + span; - let mut out = Vec::new(); - while curr_end < end { - out.push((curr_start, curr_end)); - curr_start += step; - curr_end += step; - } - if curr_start < end { - out.push((curr_start, end)); - } - out - }, - }; - - debug_assert!( - out.iter().all(|(s, e)| s < e), - "Invalid scan range: {:?}", - out - ); - debug_assert!( - out.iter().all(|(s, e)| *s >= start && *e <= end), - "Invalid scan range: {:?}", - out - ); - out -} - -pub fn expand_window_settings( - window_groups: &[SqlWindowGroup], - quadrupole_settings: &[QuadrupoleSettings], - strategy: &QuadWindowExpansionStrategy, -) -> Vec { - let mut expanded_quadrupole_settings: Vec = vec![]; - for window_group in window_groups { - let window = window_group.window_group; - let frame = window_group.frame; - let group = &quadrupole_settings[window as usize - 1]; - let window_group_start = - group.scan_starts.iter().min().unwrap().clone(); // SqlReader cannot return empty vecs, so always succeeds - let window_group_end = group.scan_ends.iter().max().unwrap().clone(); // SqlReader cannot return empty vecs, so always succeeds - - for (sws, swe) in - scan_range_subsplit(window_group_start, window_group_end, &strategy) - { - let mut mz_sum = 0.0; - let mut mz_min = std::f64::MAX; - let mut mz_max = std::f64::MIN; - let mut nce_sum = 0.0; - let mut num_added = 0; - - for i in 0..group.isolation_mz.len() { - // Should I be checking here for overlap instead of full containment? - if sws <= group.scan_starts[i] && swe >= group.scan_ends[i] { - mz_sum += group.isolation_mz[i]; - mz_min = mz_min.min( - group.isolation_mz[i] - - (group.isolation_width[i] / 2.0), - ); - mz_max = mz_max.max( - group.isolation_mz[i] - + (group.isolation_width[i] / 2.0), - ); - nce_sum += group.collision_energy[i]; - num_added += 1; - } - } - - let mz_mean = mz_sum / num_added as f64; - let mean_nce = nce_sum / num_added as f64; - - let sub_quad_settings = QuadrupoleSettings { - index: frame, - scan_starts: vec![sws], - scan_ends: vec![swe], - isolation_mz: vec![mz_mean], - isolation_width: vec![mz_min - mz_max], - collision_energy: vec![mean_nce], - }; - expanded_quadrupole_settings.push(sub_quad_settings) - } - } - expanded_quadrupole_settings -} - -pub fn expand_quadrupole_settings( - window_groups: &[SqlWindowGroup], - quadrupole_settings: &[QuadrupoleSettings], - strategy: &QuadWindowExpansionStrategy, -) -> Vec { - // Read the 'NUM_SUB_SUB_SPLITS' from env variables ... default to 1 - // (for now) - - let mut expanded_quadrupole_settings: Vec = vec![]; - for window_group in window_groups { - let window = window_group.window_group; - let frame = window_group.frame; - let group = &quadrupole_settings[window as usize - 1]; - for sub_window in 0..group.isolation_mz.len() { - let subwindow_scan_start = group.scan_starts[sub_window]; - let subwindow_scan_end = group.scan_ends[sub_window]; - for (sws, swe) in scan_range_subsplit( - subwindow_scan_start, - subwindow_scan_end, - &strategy, - ) { - let sub_quad_settings = QuadrupoleSettings { - index: frame, - scan_starts: vec![sws], - scan_ends: vec![swe], - isolation_mz: vec![group.isolation_mz[sub_window]], - isolation_width: vec![group.isolation_width[sub_window]], - collision_energy: vec![group.collision_energy[sub_window]], - }; - expanded_quadrupole_settings.push(sub_quad_settings) - } - } - } - expanded_quadrupole_settings -} diff --git a/src/ms_data/quadrupole.rs b/src/ms_data/quadrupole.rs index b9d2185..0d96f61 100644 --- a/src/ms_data/quadrupole.rs +++ b/src/ms_data/quadrupole.rs @@ -8,3 +8,9 @@ pub struct QuadrupoleSettings { pub isolation_width: Vec, pub collision_energy: Vec, } + +impl QuadrupoleSettings { + pub fn len(&self) -> usize { + self.isolation_mz.len() + } +} From 6bcd6328c072a72d7c6c31ac2207585862dfcc32 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 31 Jul 2024 10:21:40 +0200 Subject: [PATCH 26/27] FEATL reverted spectrum reader get all to vec of result instead of result of vec --- src/io/readers/spectrum_reader.rs | 17 ++++++++---- tests/spectrum_readers.rs | 46 ++++++++++++------------------- 2 files changed, 29 insertions(+), 34 deletions(-) diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index ef5f070..7f905cc 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -42,13 +42,20 @@ impl SpectrumReader { self.spectrum_reader.len() } - pub fn get_all(&self) -> Result, SpectrumReaderError> { - let mut spectra: Vec = (0..self.len()) + pub fn get_all(&self) -> Vec> { + let mut spectra: Vec> = (0..self + .len()) .into_par_iter() .map(|index| self.get(index)) - .collect::, _>>()?; - spectra.sort_by_key(|x| x.precursor.unwrap_or_default().index); - Ok(spectra) + .collect(); + spectra.sort_by_key(|x| match x { + Ok(spectrum) => match spectrum.precursor { + Some(precursor) => precursor.index, + None => spectrum.index, + }, + Err(_) => 0, + }); + spectra } pub fn calibrate(&mut self) { diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index 8d6966e..8f6f198 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -21,12 +21,11 @@ fn minitdf_reader() { .to_str() .unwrap() .to_string(); - let spectra: Vec = SpectrumReader::build() + let spectra: Vec> = SpectrumReader::build() .with_path(file_path) .finalize() .unwrap() - .get_all() - .unwrap(); + .get_all(); let expected: Vec = vec![ Spectrum { mz_values: vec![100.0, 200.002, 300.03, 400.4], @@ -63,8 +62,8 @@ fn minitdf_reader() { isolation_width: 3.0, }, ]; - for i in 0..spectra.len() { - assert_eq!(spectra[i], expected[i]); + for (i, spectrum) in spectra.into_iter().enumerate() { + assert_eq!(spectrum.unwrap(), expected[i]); } } @@ -76,12 +75,11 @@ fn tdf_reader_dda() { .to_str() .unwrap() .to_string(); - let spectra: Vec = SpectrumReader::build() + let spectra: Vec> = SpectrumReader::build() .with_path(file_path) .finalize() .unwrap() - .get_all() - .unwrap(); + .get_all(); let expected: Vec = vec![ Spectrum { mz_values: vec![199.7633445943076], @@ -135,8 +133,8 @@ fn tdf_reader_dda() { isolation_width: 2.0, }, ]; - for i in 0..spectra.len() { - assert_eq!(spectra[i], expected[i]); + for (i, spectrum) in spectra.into_iter().enumerate() { + assert_eq!(spectrum.unwrap(), expected[i]); } } @@ -148,9 +146,8 @@ fn test_dia_even() { .to_str() .unwrap() .to_string(); - for i in 1..3 { - let frames: Vec = SpectrumReader::build() + let spectra = SpectrumReader::build() .with_path(&file_path) .with_config(SpectrumReaderConfig { frame_splitting_params: @@ -161,13 +158,9 @@ fn test_dia_even() { }) .finalize() .unwrap() - .get_all() - .unwrap(); - - println!(">>>>> EVEN {:?}", frames.len()); - + .get_all(); // 4 frames, 2 windows in each, i splits/window - assert_eq!(frames.len(), 4 * 2 * i); + assert_eq!(spectra.len(), 4 * 2 * i); } } @@ -179,9 +172,8 @@ fn test_dia_uniform() { .to_str() .unwrap() .to_string(); - for i in [100, 200, 300] { - let frames: Vec = SpectrumReader::build() + let spectra = SpectrumReader::build() .with_path(&file_path) .with_config(SpectrumReaderConfig { frame_splitting_params: FrameWindowSplittingStrategy::Window( @@ -191,18 +183,14 @@ fn test_dia_uniform() { }) .finalize() .unwrap() - .get_all() - .unwrap(); - - println!(">>>>> UNIFORM {} > {:?}", i, frames.len()); - for f in frames.iter() { - println!("{:?}", f.precursor); + .get_all(); + for f in spectra.iter() { + println!("{:?}", f.as_ref().unwrap().precursor); } - // Not all frames have scan windows from 0 to 709 ... so ... I need to think // on how to express this in the test // assert_eq!(frames.len(), 4 * ((709 / i) + 1)); - assert!(frames.len() > (709 / i)); - assert!(frames.len() < 3 * ((709 / i) + 1)); + assert!(spectra.len() > (709 / i)); + assert!(spectra.len() < 3 * ((709 / i) + 1)); } } From 23f43cb7b6d8c229d2ba526be29d801946d49c98 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 31 Jul 2024 10:22:07 +0200 Subject: [PATCH 27/27] DOCS: readme update --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 1283485..9b01172 100644 --- a/README.md +++ b/README.md @@ -45,10 +45,9 @@ Two file formats are supported: The [timsrust_pyo3](https://github.com/jspaezp/timsrust_pyo3) package is an example of how the performance of TimsRust can be utilized in Python ## Planned changes for future versions - +TODO * Improve docs * Improve tests * Pase CompressionType1 -* Error propagation for SpectrumReader(Trait).get * Make Path of TimsTOF data into special type * ...