diff --git a/README.md b/README.md index 83300ee..9b01172 100644 --- a/README.md +++ b/README.md @@ -43,3 +43,11 @@ Two file formats are supported: ## Python bindings The [timsrust_pyo3](https://github.com/jspaezp/timsrust_pyo3) package is an example of how the performance of TimsRust can be utilized in Python + +## Planned changes for future versions +TODO +* Improve docs +* Improve tests +* Pase CompressionType1 +* Make Path of TimsTOF data into special type +* ... diff --git a/benches/speed_performance.rs b/benches/speed_performance.rs index d9bd813..3beeeac 100644 --- a/benches/speed_performance.rs +++ b/benches/speed_performance.rs @@ -1,8 +1,7 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use rayon::iter::ParallelIterator; -use timsrust::{ - io::readers::{FrameReader, SpectrumReader}, - ms_data::Frame, +use timsrust::io::readers::{ + FrameReader, SpectrumReader, SpectrumReaderConfig, }; const DDA_TEST: &str = @@ -34,7 +33,7 @@ fn criterion_benchmark_dda(c: &mut Criterion) { group.significance_level(0.001).sample_size(10); let d_folder_name: &str = DDA_TEST; let frame_reader = FrameReader::new(d_folder_name).unwrap(); - let spectrum_reader = SpectrumReader::new(d_folder_name); + let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap(); group.bench_function("DDA read_all_frames 6m", |b| { b.iter(|| read_all_frames(black_box(&frame_reader))) }); @@ -56,7 +55,7 @@ fn criterion_benchmark_dia(c: &mut Criterion) { group.significance_level(0.001).sample_size(10); let d_folder_name: &str = DIA_TEST; let frame_reader = FrameReader::new(d_folder_name).unwrap(); - let spectrum_reader = SpectrumReader::new(d_folder_name); + let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap(); group.bench_function("DIA read_all_frames 6m", |b| { b.iter(|| read_all_frames(black_box(&frame_reader))) }); @@ -75,7 +74,7 @@ fn criterion_benchmark_syp(c: &mut Criterion) { group.significance_level(0.001).sample_size(10); let d_folder_name: &str = SYP_TEST; let frame_reader = FrameReader::new(d_folder_name).unwrap(); - let spectrum_reader = SpectrumReader::new(d_folder_name); + let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap(); group.bench_function("SYP read_all_frames 6m", |b| { b.iter(|| read_all_frames(black_box(&frame_reader))) }); diff --git a/src/domain_converters.rs b/src/domain_converters.rs index 11c1387..e38bbf4 100644 --- a/src/domain_converters.rs +++ b/src/domain_converters.rs @@ -10,4 +10,6 @@ pub use tof_to_mz::Tof2MzConverter; /// Convert from one domain (e.g. Time of Flight) to another (m/z). pub trait ConvertableDomain { fn convert + Copy>(&self, value: T) -> f64; + + fn invert + Copy>(&self, value: T) -> f64; } diff --git a/src/domain_converters/frame_to_rt.rs b/src/domain_converters/frame_to_rt.rs index eb7d1d1..090b8b4 100644 --- a/src/domain_converters/frame_to_rt.rs +++ b/src/domain_converters/frame_to_rt.rs @@ -16,4 +16,20 @@ impl super::ConvertableDomain for Frame2RtConverter { let upper_value: f64 = self.rt_values[value.into().ceil() as usize]; (lower_value + upper_value) / 2. } + fn invert + Copy>(&self, value: T) -> f64 { + let rt_value = value.into(); + match self.rt_values.binary_search_by(|probe| { + probe.partial_cmp(&rt_value).expect("Cannot handle NaNs") + }) { + Ok(index) => index as f64, + Err(index) => match index { + _ if (index > 0) && (index < self.rt_values.len()) => { + let start = self.rt_values[index - 1]; + let end = self.rt_values[index]; + index as f64 + (rt_value - start) / (end - start) + }, + _ => index as f64, + }, + } + } } diff --git a/src/domain_converters/scan_to_im.rs b/src/domain_converters/scan_to_im.rs index e7390ff..2a9d8bd 100644 --- a/src/domain_converters/scan_to_im.rs +++ b/src/domain_converters/scan_to_im.rs @@ -22,7 +22,12 @@ impl Scan2ImConverter { impl super::ConvertableDomain for Scan2ImConverter { fn convert + Copy>(&self, value: T) -> f64 { - let scan_index_f64: f64 = value.into(); - self.scan_intercept + self.scan_slope * scan_index_f64 + let scan_index: f64 = value.into(); + self.scan_intercept + self.scan_slope * scan_index + } + + fn invert + Copy>(&self, value: T) -> f64 { + let im_value: f64 = value.into(); + (im_value - self.scan_intercept) / self.scan_slope } } diff --git a/src/domain_converters/tof_to_mz.rs b/src/domain_converters/tof_to_mz.rs index c9a3abc..c42078b 100644 --- a/src/domain_converters/tof_to_mz.rs +++ b/src/domain_converters/tof_to_mz.rs @@ -36,7 +36,11 @@ impl Tof2MzConverter { impl super::ConvertableDomain for Tof2MzConverter { fn convert + Copy>(&self, value: T) -> f64 { - let tof_index_f64: f64 = value.into(); - (self.tof_intercept + self.tof_slope * tof_index_f64).powi(2) + let tof_index: f64 = value.into(); + (self.tof_intercept + self.tof_slope * tof_index).powi(2) + } + fn invert + Copy>(&self, value: T) -> f64 { + let mz_value: f64 = value.into(); + (mz_value.sqrt() - self.tof_intercept) / self.tof_slope } } diff --git a/src/errors.rs b/src/errors.rs index 44782f1..f8c713b 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -1,23 +1,19 @@ -#[derive(thiserror::Error, Debug)] -pub enum FileFormatError { - #[error("DirectoryDoesNotExist")] - DirectoryDoesNotExist, - #[error("NoParentWithBrukerExtension")] - NoParentWithBrukerExtension, - #[error("BinaryFilesAreMissing")] - BinaryFilesAreMissing, - #[error("MetadataFilesAreMissing")] - MetadataFilesAreMissing, -} +use crate::io::readers::{ + FrameReaderError, MetadataReaderError, PrecursorReaderError, + QuadrupoleSettingsReaderError, SpectrumReaderError, +}; /// An error that is produced by timsrust (uses [thiserror]). #[derive(thiserror::Error, Debug)] pub enum Error { - /// An error to indicate a path is not a Bruker File Format. - #[error("FileFormatError: {0}")] - FileFormatError(#[from] FileFormatError), - // #[error("SqlError: {0}")] - // SqlError(#[from] SqlError), - // #[error("BinError: {0}")] - // BinError(#[from] TdfBlobError), + #[error("{0}")] + FrameReaderError(#[from] FrameReaderError), + #[error("{0}")] + SpectrumReaderError(#[from] SpectrumReaderError), + #[error("{0}")] + MetadataReaderError(#[from] MetadataReaderError), + #[error("{0}")] + PrecursorReaderError(#[from] PrecursorReaderError), + #[error("{0}")] + QuadrupoleSettingsReaderError(#[from] QuadrupoleSettingsReaderError), } diff --git a/src/io/readers/file_readers/tdf_blob_reader.rs b/src/io/readers/file_readers/tdf_blob_reader.rs index 1c2fe97..d8e64fc 100644 --- a/src/io/readers/file_readers/tdf_blob_reader.rs +++ b/src/io/readers/file_readers/tdf_blob_reader.rs @@ -75,7 +75,7 @@ impl IndexedTdfBlobReader { pub fn new( file_name: impl AsRef, binary_offsets: Vec, - ) -> Result { + ) -> Result { let blob_reader = TdfBlobReader::new(file_name)?; let reader = Self { binary_offsets, diff --git a/src/io/readers/precursor_reader.rs b/src/io/readers/precursor_reader.rs index 5544dce..e4750c5 100644 --- a/src/io/readers/precursor_reader.rs +++ b/src/io/readers/precursor_reader.rs @@ -2,13 +2,15 @@ mod minitdf; mod tdf; use core::fmt; -use std::path::Path; +use std::path::{Path, PathBuf}; use minitdf::{MiniTDFPrecursorReader, MiniTDFPrecursorReaderError}; use tdf::{TDFPrecursorReader, TDFPrecursorReaderError}; use crate::ms_data::Precursor; +use super::quad_settings_reader::FrameWindowSplittingStrategy; + pub struct PrecursorReader { precursor_reader: Box, } @@ -20,15 +22,12 @@ impl fmt::Debug for PrecursorReader { } impl PrecursorReader { + pub fn build() -> PrecursorReaderBuilder { + PrecursorReaderBuilder::default() + } + pub fn new(path: impl AsRef) -> Result { - let precursor_reader: Box = - match path.as_ref().extension().and_then(|e| e.to_str()) { - Some("parquet") => Box::new(MiniTDFPrecursorReader::new(path)?), - Some("tdf") => Box::new(TDFPrecursorReader::new(path)?), - _ => panic!(), - }; - let reader = Self { precursor_reader }; - Ok(reader) + Self::build().with_path(path).finalize() } pub fn get(&self, index: usize) -> Option { @@ -40,6 +39,48 @@ impl PrecursorReader { } } +#[derive(Debug, Default, Clone)] +pub struct PrecursorReaderBuilder { + path: PathBuf, + config: FrameWindowSplittingStrategy, +} + +impl PrecursorReaderBuilder { + pub fn with_path(&self, path: impl AsRef) -> Self { + Self { + path: path.as_ref().to_path_buf(), + ..self.clone() + } + } + + pub fn with_config(&self, config: FrameWindowSplittingStrategy) -> Self { + Self { + config: config, + ..self.clone() + } + } + + pub fn finalize(&self) -> Result { + let precursor_reader: Box = + match self.path.extension().and_then(|e| e.to_str()) { + Some("parquet") => { + Box::new(MiniTDFPrecursorReader::new(self.path.clone())?) + }, + Some("tdf") => Box::new(TDFPrecursorReader::new( + self.path.clone(), + self.config.clone(), + )?), + _ => { + return Err(PrecursorReaderError::PrecursorReaderFileError( + self.path.clone(), + )) + }, + }; + let reader = PrecursorReader { precursor_reader }; + Ok(reader) + } +} + trait PrecursorReaderTrait: Sync { fn get(&self, index: usize) -> Option; fn len(&self) -> usize; @@ -51,4 +92,6 @@ pub enum PrecursorReaderError { MiniTDFPrecursorReaderError(#[from] MiniTDFPrecursorReaderError), #[error("{0}")] TDFPrecursorReaderError(#[from] TDFPrecursorReaderError), + #[error("File {0} not valid")] + PrecursorReaderFileError(PathBuf), } diff --git a/src/io/readers/precursor_reader/tdf.rs b/src/io/readers/precursor_reader/tdf.rs index 30e1dd5..34efef1 100644 --- a/src/io/readers/precursor_reader/tdf.rs +++ b/src/io/readers/precursor_reader/tdf.rs @@ -7,7 +7,10 @@ use dda::{DDATDFPrecursorReader, DDATDFPrecursorReaderError}; use dia::{DIATDFPrecursorReader, DIATDFPrecursorReaderError}; use crate::{ - io::readers::file_readers::sql_reader::{SqlError, SqlReader}, + io::readers::{ + file_readers::sql_reader::{SqlError, SqlReader}, + quad_settings_reader::FrameWindowSplittingStrategy, + }, ms_data::{AcquisitionType, Precursor}, }; @@ -20,6 +23,7 @@ pub struct TDFPrecursorReader { impl TDFPrecursorReader { pub fn new( path: impl AsRef, + splitting_strategy: FrameWindowSplittingStrategy, ) -> Result { let sql_path = path.as_ref(); let tdf_sql_reader = SqlReader::open(sql_path)?; @@ -37,13 +41,15 @@ impl TDFPrecursorReader { AcquisitionType::DDAPASEF => { Box::new(DDATDFPrecursorReader::new(path)?) }, - AcquisitionType::DIAPASEF => { - Box::new(DIATDFPrecursorReader::new(path)?) - }, + AcquisitionType::DIAPASEF => Box::new( + DIATDFPrecursorReader::new(path, splitting_strategy)?, + ), acquisition_type => { - return Err(TDFPrecursorReaderError::UnknownPrecursorType( - format!("{:?}", acquisition_type), - )) + return Err( + TDFPrecursorReaderError::UnsupportedAcquisition( + format!("{:?}", acquisition_type), + ), + ) }, }; let reader = Self { precursor_reader }; @@ -70,5 +76,5 @@ pub enum TDFPrecursorReaderError { #[error("{0}")] DIATDFPrecursorReaderError(#[from] DIATDFPrecursorReaderError), #[error("Invalid acquistion type for precursor reader: {0}")] - UnknownPrecursorType(String), + UnsupportedAcquisition(String), } diff --git a/src/io/readers/precursor_reader/tdf/dia.rs b/src/io/readers/precursor_reader/tdf/dia.rs index 9531fdd..9eacaa7 100644 --- a/src/io/readers/precursor_reader/tdf/dia.rs +++ b/src/io/readers/precursor_reader/tdf/dia.rs @@ -1,13 +1,12 @@ use std::path::Path; +use crate::io::readers::quad_settings_reader::FrameWindowSplittingStrategy; use crate::{ domain_converters::{ ConvertableDomain, Frame2RtConverter, Scan2ImConverter, }, io::readers::{ - file_readers::sql_reader::{ - frame_groups::SqlWindowGroup, ReadableSqlTable, SqlError, SqlReader, - }, + file_readers::sql_reader::{SqlError, SqlReader}, MetadataReader, MetadataReaderError, QuadrupoleSettingsReader, QuadrupoleSettingsReaderError, }, @@ -26,32 +25,18 @@ pub struct DIATDFPrecursorReader { impl DIATDFPrecursorReader { pub fn new( path: impl AsRef, + splitting_strategy: FrameWindowSplittingStrategy, ) -> Result { let sql_path = path.as_ref(); let tdf_sql_reader = SqlReader::open(sql_path)?; let metadata = MetadataReader::new(&path)?; let rt_converter: Frame2RtConverter = metadata.rt_converter; let im_converter: Scan2ImConverter = metadata.im_converter; - let window_groups = SqlWindowGroup::from_sql_reader(&tdf_sql_reader)?; - let quadrupole_settings = - QuadrupoleSettingsReader::new(tdf_sql_reader.get_path())?; - let mut expanded_quadrupole_settings: Vec = vec![]; - for window_group in window_groups { - let window = window_group.window_group; - let frame = window_group.frame; - let group = &quadrupole_settings[window as usize - 1]; - for sub_window in 0..group.isolation_mz.len() { - let sub_quad_settings = QuadrupoleSettings { - index: frame, - scan_starts: vec![group.scan_starts[sub_window]], - scan_ends: vec![group.scan_ends[sub_window]], - isolation_mz: vec![group.isolation_mz[sub_window]], - isolation_width: vec![group.isolation_width[sub_window]], - collision_energy: vec![group.collision_energy[sub_window]], - }; - expanded_quadrupole_settings.push(sub_quad_settings) - } - } + let expanded_quadrupole_settings = + QuadrupoleSettingsReader::from_splitting( + tdf_sql_reader.get_path(), + splitting_strategy, + )?; let reader = Self { expanded_quadrupole_settings, rt_converter, diff --git a/src/io/readers/quad_settings_reader.rs b/src/io/readers/quad_settings_reader.rs index 8a62d3c..6f5f398 100644 --- a/src/io/readers/quad_settings_reader.rs +++ b/src/io/readers/quad_settings_reader.rs @@ -3,7 +3,8 @@ use std::path::Path; use crate::{ms_data::QuadrupoleSettings, utils::vec_utils::argsort}; use super::file_readers::sql_reader::{ - quad_settings::SqlQuadSettings, ReadableSqlTable, SqlError, SqlReader, + frame_groups::SqlWindowGroup, quad_settings::SqlQuadSettings, + ReadableSqlTable, SqlError, SqlReader, }; pub struct QuadrupoleSettingsReader { @@ -17,6 +18,12 @@ impl QuadrupoleSettingsReader { ) -> Result, QuadrupoleSettingsReaderError> { let sql_path = path.as_ref(); let tdf_sql_reader = SqlReader::open(&sql_path)?; + Self::from_sql_settings(&tdf_sql_reader) + } + + pub fn from_sql_settings( + tdf_sql_reader: &SqlReader, + ) -> Result, QuadrupoleSettingsReaderError> { let sql_quadrupole_settings = SqlQuadSettings::from_sql_reader(&tdf_sql_reader)?; let window_group_count = sql_quadrupole_settings @@ -40,6 +47,29 @@ impl QuadrupoleSettingsReader { Ok(quad_reader.quadrupole_settings) } + pub fn from_splitting( + path: impl AsRef, + splitting_strat: FrameWindowSplittingStrategy, + ) -> Result, QuadrupoleSettingsReaderError> { + let sql_path = path.as_ref(); + let tdf_sql_reader = SqlReader::open(&sql_path)?; + let quadrupole_settings = Self::from_sql_settings(&tdf_sql_reader)?; + let window_groups = SqlWindowGroup::from_sql_reader(&tdf_sql_reader)?; + let expanded_quadrupole_settings = match splitting_strat { + FrameWindowSplittingStrategy::Quadrupole(x) => { + expand_quadrupole_settings( + &window_groups, + &quadrupole_settings, + &x, + ) + }, + FrameWindowSplittingStrategy::Window(x) => { + expand_window_settings(&window_groups, &quadrupole_settings, &x) + }, + }; + Ok(expanded_quadrupole_settings) + } + fn update_from_sql_quadrupole_settings(&mut self) { for window_group in self.sql_quadrupole_settings.iter() { let group = window_group.window_group - 1; @@ -86,10 +116,177 @@ impl QuadrupoleSettingsReader { #[derive(Debug, thiserror::Error)] pub enum QuadrupoleSettingsReaderError { - // #[error("{0}")] - // MiniTDFPrecursorReaderError(#[from] MiniTDFPrecursorReaderError), - // #[error("{0}")] - // TDFPrecursorReaderError(#[from] TDFPrecursorReaderError), #[error("{0}")] SqlError(#[from] SqlError), } + +type SpanStep = (usize, usize); + +/// Strategy for expanding quadrupole settings +/// +/// This enum is used to determine how to expand quadrupole settings +/// when reading in DIA data. And exporting spectra (not frames RN). +/// +/// # Variants +/// +/// For example if we have a window with scan start 50 and end 500 +/// +/// * `None` - Do not expand quadrupole settings; use the original settings +/// * `Even(usize)` - Split the quadrupole settings into `usize` evenly spaced +/// subwindows; e.g. if `usize` is 2, the window will be split into 2 subwindows +/// of equal width. +/// * `Uniform(SpanStep)` - Split the quadrupole settings into subwindows of +/// width `SpanStep.0` and step `SpanStep.1`; e.g. if `SpanStep` is (100, 50), +/// the window will be split into subwindows of width 100 and step 50 between their +/// scan start and end. +/// +#[derive(Debug, Copy, Clone)] +pub enum QuadWindowExpansionStrategy { + None, + Even(usize), + Uniform(SpanStep), +} + +#[derive(Debug, Clone, Copy)] +pub enum FrameWindowSplittingStrategy { + Quadrupole(QuadWindowExpansionStrategy), + Window(QuadWindowExpansionStrategy), +} + +impl Default for FrameWindowSplittingStrategy { + fn default() -> Self { + Self::Quadrupole(QuadWindowExpansionStrategy::Even(1)) + } +} + +fn scan_range_subsplit( + start: usize, + end: usize, + strategy: &QuadWindowExpansionStrategy, +) -> Vec<(usize, usize)> { + let out = match strategy { + QuadWindowExpansionStrategy::None => { + vec![(start, end)] + }, + QuadWindowExpansionStrategy::Even(num_splits) => { + let sub_subwindow_width = (end - start) / (num_splits + 1); + let mut out = Vec::new(); + for sub_subwindow in 0..num_splits.clone() { + let sub_subwindow_scan_start = + start + (sub_subwindow_width * sub_subwindow); + let sub_subwindow_scan_end = + start + (sub_subwindow_width * (sub_subwindow + 2)); + + out.push((sub_subwindow_scan_start, sub_subwindow_scan_end)) + } + out + }, + QuadWindowExpansionStrategy::Uniform((span, step)) => { + let mut curr_start = start.clone(); + let mut curr_end = start + span; + let mut out = Vec::new(); + while curr_end < end { + out.push((curr_start, curr_end)); + curr_start += step; + curr_end += step; + } + if curr_start < end { + out.push((curr_start, end)); + } + out + }, + }; + + debug_assert!( + out.iter().all(|(s, e)| s < e), + "Invalid scan range: {:?}", + out + ); + debug_assert!( + out.iter().all(|(s, e)| *s >= start && *e <= end), + "Invalid scan range: {:?}", + out + ); + out +} + +fn expand_window_settings( + window_groups: &[SqlWindowGroup], + quadrupole_settings: &[QuadrupoleSettings], + strategy: &QuadWindowExpansionStrategy, +) -> Vec { + let mut expanded_quadrupole_settings: Vec = vec![]; + for window_group in window_groups { + let window = window_group.window_group; + let frame = window_group.frame; + let group = &quadrupole_settings[window as usize - 1]; + let window_group_start = + group.scan_starts.iter().min().unwrap().clone(); // SqlReader cannot return empty vecs, so always succeeds + let window_group_end = group.scan_ends.iter().max().unwrap().clone(); // SqlReader cannot return empty vecs, so always succeeds + for (sws, swe) in + scan_range_subsplit(window_group_start, window_group_end, &strategy) + { + let mut mz_min = std::f64::MAX; + let mut mz_max = std::f64::MIN; + let mut nce_sum = 0.0; + let mut total_scan_width = 0.0; + for i in 0..group.len() { + let gss = group.scan_starts[i]; + let gse = group.scan_ends[i]; + if (swe <= gse) || (gss <= sws) { + continue; + } + let half_isolation_width = group.isolation_width[i] / 2.0; + let isolation_mz = group.isolation_mz[i]; + mz_min = mz_min.min(isolation_mz - half_isolation_width); + mz_max = mz_max.max(isolation_mz + half_isolation_width); + let scan_width = (gse.min(swe) - gss.max(sws)) as f64; + nce_sum += group.collision_energy[i] * scan_width; + total_scan_width += scan_width + } + let sub_quad_settings = QuadrupoleSettings { + index: frame, + scan_starts: vec![sws], + scan_ends: vec![swe], + isolation_mz: vec![(mz_min + mz_max) / 2.0], + isolation_width: vec![mz_min - mz_max], + collision_energy: vec![nce_sum / total_scan_width], + }; + expanded_quadrupole_settings.push(sub_quad_settings) + } + } + expanded_quadrupole_settings +} + +fn expand_quadrupole_settings( + window_groups: &[SqlWindowGroup], + quadrupole_settings: &[QuadrupoleSettings], + strategy: &QuadWindowExpansionStrategy, +) -> Vec { + let mut expanded_quadrupole_settings: Vec = vec![]; + for window_group in window_groups { + let window = window_group.window_group; + let frame = window_group.frame; + let group = &quadrupole_settings[window as usize - 1]; + for sub_window in 0..group.isolation_mz.len() { + let subwindow_scan_start = group.scan_starts[sub_window]; + let subwindow_scan_end = group.scan_ends[sub_window]; + for (sws, swe) in scan_range_subsplit( + subwindow_scan_start, + subwindow_scan_end, + &strategy, + ) { + let sub_quad_settings = QuadrupoleSettings { + index: frame, + scan_starts: vec![sws], + scan_ends: vec![swe], + isolation_mz: vec![group.isolation_mz[sub_window]], + isolation_width: vec![group.isolation_width[sub_window]], + collision_energy: vec![group.collision_energy[sub_window]], + }; + expanded_quadrupole_settings.push(sub_quad_settings) + } + } + } + expanded_quadrupole_settings +} diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index 336634a..7f905cc 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -2,13 +2,15 @@ mod minitdf; mod tdf; use core::fmt; -use minitdf::MiniTDFSpectrumReader; +use minitdf::{MiniTDFSpectrumReader, MiniTDFSpectrumReaderError}; use rayon::iter::{IntoParallelIterator, ParallelIterator}; use std::path::{Path, PathBuf}; -use tdf::TDFSpectrumReader; +use tdf::{TDFSpectrumReader, TDFSpectrumReaderError}; use crate::ms_data::Spectrum; +use super::FrameWindowSplittingStrategy; + pub struct SpectrumReader { spectrum_reader: Box, } @@ -20,17 +22,15 @@ impl fmt::Debug for SpectrumReader { } impl SpectrumReader { - pub fn new(path: impl AsRef) -> Self { - let spectrum_reader: Box = - match path.as_ref().extension().and_then(|e| e.to_str()) { - Some("ms2") => Box::new(MiniTDFSpectrumReader::new(path)), - Some("d") => Box::new(TDFSpectrumReader::new(path)), - _ => panic!(), - }; - Self { spectrum_reader } + pub fn build() -> SpectrumReaderBuilder { + SpectrumReaderBuilder::default() } - pub fn get(&self, index: usize) -> Spectrum { + pub fn new(path: impl AsRef) -> Result { + Self::build().with_path(path).finalize() + } + + pub fn get(&self, index: usize) -> Result { self.spectrum_reader.get(index) } @@ -42,12 +42,19 @@ impl SpectrumReader { self.spectrum_reader.len() } - pub fn get_all(&self) -> Vec { - let mut spectra: Vec = (0..self.len()) + pub fn get_all(&self) -> Vec> { + let mut spectra: Vec> = (0..self + .len()) .into_par_iter() .map(|index| self.get(index)) .collect(); - spectra.sort_by_key(|x| x.precursor.unwrap().index); + spectra.sort_by_key(|x| match x { + Ok(spectrum) => match spectrum.precursor { + Some(precursor) => precursor.index, + None => spectrum.index, + }, + Err(_) => 0, + }); spectra } @@ -56,9 +63,89 @@ impl SpectrumReader { } } +#[derive(Debug, Default, Clone)] +pub struct SpectrumReaderBuilder { + path: PathBuf, + config: SpectrumReaderConfig, +} + +impl SpectrumReaderBuilder { + pub fn with_path(&self, path: impl AsRef) -> Self { + Self { + path: path.as_ref().to_path_buf(), + ..self.clone() + } + } + + pub fn with_config(&self, config: SpectrumReaderConfig) -> Self { + Self { + config: config, + ..self.clone() + } + } + + pub fn finalize(&self) -> Result { + let spectrum_reader: Box = + match self.path.extension().and_then(|e| e.to_str()) { + Some("ms2") => { + Box::new(MiniTDFSpectrumReader::new(self.path.clone())?) + }, + Some("d") => Box::new(TDFSpectrumReader::new( + self.path.clone(), + self.config.clone(), + )?), + _ => { + return Err(SpectrumReaderError::SpectrumReaderFileError( + self.path.clone(), + )) + }, + }; + let mut reader = SpectrumReader { spectrum_reader }; + if self.config.spectrum_processing_params.calibrate { + reader.calibrate(); + } + Ok(reader) + } +} + trait SpectrumReaderTrait: Sync { - fn get(&self, index: usize) -> Spectrum; + fn get(&self, index: usize) -> Result; fn get_path(&self) -> PathBuf; fn len(&self) -> usize; fn calibrate(&mut self); } + +#[derive(Debug, thiserror::Error)] +pub enum SpectrumReaderError { + #[error("{0}")] + MiniTDFSpectrumReaderError(#[from] MiniTDFSpectrumReaderError), + #[error("{0}")] + TDFSpectrumReaderError(#[from] TDFSpectrumReaderError), + #[error("File {0} not valid")] + SpectrumReaderFileError(PathBuf), +} + +#[derive(Debug, Clone)] +pub struct SpectrumProcessingParams { + smoothing_window: u32, + centroiding_window: u32, + calibration_tolerance: f64, + calibrate: bool, +} + +impl Default for SpectrumProcessingParams { + fn default() -> Self { + Self { + smoothing_window: 1, + centroiding_window: 1, + calibration_tolerance: 0.1, + calibrate: false, + } + } +} + +#[derive(Debug, Default, Clone)] +pub struct SpectrumReaderConfig { + pub spectrum_processing_params: SpectrumProcessingParams, + pub frame_splitting_params: FrameWindowSplittingStrategy, +} diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs index 83c50bd..e5cc23c 100644 --- a/src/io/readers/spectrum_reader/minitdf.rs +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -4,17 +4,21 @@ use crate::{ io::readers::{ file_readers::{ parquet_reader::{ - precursors::ParquetPrecursor, ReadableParquetTable, + precursors::ParquetPrecursor, ParquetError, + ReadableParquetTable, + }, + sql_reader::SqlError, + tdf_blob_reader::{ + IndexedTdfBlobReader, IndexedTdfBlobReaderError, }, - tdf_blob_reader::IndexedTdfBlobReader, }, - PrecursorReader, + PrecursorReader, PrecursorReaderError, }, ms_data::Spectrum, utils::find_extension, }; -use super::SpectrumReaderTrait; +use super::{SpectrumReaderError, SpectrumReaderTrait}; #[derive(Debug)] pub struct MiniTDFSpectrumReader { @@ -25,43 +29,54 @@ pub struct MiniTDFSpectrumReader { } impl MiniTDFSpectrumReader { - pub fn new(path: impl AsRef) -> Self { - let parquet_file_name = - find_extension(&path, "ms2spectrum.parquet").unwrap(); - let precursor_reader = - PrecursorReader::new(&parquet_file_name).unwrap(); - let offsets = ParquetPrecursor::from_parquet_file(&parquet_file_name) - .unwrap() + pub fn new( + path: impl AsRef, + ) -> Result { + let parquet_file_name = find_extension(&path, "ms2spectrum.parquet") + .ok_or(MiniTDFSpectrumReaderError::FileNotFound( + "analysis.tdf".to_string(), + ))?; + let precursor_reader = PrecursorReader::build() + .with_path(&parquet_file_name) + .finalize()?; + let offsets = ParquetPrecursor::from_parquet_file(&parquet_file_name)? .iter() .map(|x| x.offset as usize) .collect(); let collision_energies = - ParquetPrecursor::from_parquet_file(&parquet_file_name) - .unwrap() + ParquetPrecursor::from_parquet_file(&parquet_file_name)? .iter() .map(|x| x.collision_energy) .collect(); - let bin_file_name = find_extension(&path, "bin").unwrap(); - let blob_reader = - IndexedTdfBlobReader::new(&bin_file_name, offsets).unwrap(); - Self { + let bin_file_name = find_extension(&path, "bin").ok_or( + MiniTDFSpectrumReaderError::FileNotFound( + "analysis.tdf".to_string(), + ), + )?; + let blob_reader = IndexedTdfBlobReader::new(&bin_file_name, offsets)?; + let reader = Self { path: path.as_ref().to_path_buf(), precursor_reader, blob_reader, collision_energies, - } + }; + Ok(reader) } -} -impl SpectrumReaderTrait for MiniTDFSpectrumReader { - fn get(&self, index: usize) -> Spectrum { + fn _get( + &self, + index: usize, + ) -> Result { let mut spectrum = Spectrum::default(); spectrum.index = index; - let blob = self.blob_reader.get(index).unwrap(); + let blob = self.blob_reader.get(index)?; if !blob.is_empty() { let size: usize = blob.len(); - let spectrum_data: Vec = - (0..size).map(|i| blob.get(i).unwrap()).collect(); + let spectrum_data: Vec = (0..size) + .map(|i| { + blob.get(i).ok_or(MiniTDFSpectrumReaderError::BlobError) + }) + .collect::, _>>()?; let scan_count: usize = blob.len() / 3; let tof_indices_bytes: &[u32] = &spectrum_data[..scan_count as usize * 2]; @@ -75,7 +90,10 @@ impl SpectrumReaderTrait for MiniTDFSpectrumReader { intensity_values.iter().map(|&x| x as f64).collect(); spectrum.mz_values = mz_values.to_vec(); } - let precursor = self.precursor_reader.get(index).unwrap(); + let precursor = self + .precursor_reader + .get(index) + .ok_or(MiniTDFSpectrumReaderError::NoPrecursor)?; spectrum.precursor = Some(precursor); spectrum.index = precursor.index; spectrum.collision_energy = self.collision_energies[index]; @@ -87,7 +105,13 @@ impl SpectrumReaderTrait for MiniTDFSpectrumReader { } else { 2.0 + (precursor.mz - 700.0) / 100.0 }; //FIX? - spectrum + Ok(spectrum) + } +} + +impl SpectrumReaderTrait for MiniTDFSpectrumReader { + fn get(&self, index: usize) -> Result { + Ok(self._get(index)?) } fn len(&self) -> usize { @@ -100,3 +124,21 @@ impl SpectrumReaderTrait for MiniTDFSpectrumReader { fn calibrate(&mut self) {} } + +#[derive(Debug, thiserror::Error)] +pub enum MiniTDFSpectrumReaderError { + #[error("{0}")] + SqlError(#[from] SqlError), + #[error("{0}")] + PrecursorReaderError(#[from] PrecursorReaderError), + #[error("{0}")] + ParquetError(#[from] ParquetError), + #[error("{0}")] + IndexedTdfBlobReaderError(#[from] IndexedTdfBlobReaderError), + #[error("{0}")] + FileNotFound(String), + #[error("No precursor")] + NoPrecursor, + #[error("BlobError")] + BlobError, +} diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index 511730e..7f29748 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -2,25 +2,22 @@ mod dda; mod dia; mod raw_spectra; -use raw_spectra::{RawSpectrum, RawSpectrumReader}; +use raw_spectra::{RawSpectrum, RawSpectrumReader, RawSpectrumReaderError}; use rayon::iter::{IntoParallelIterator, ParallelIterator}; use std::path::{Path, PathBuf}; use crate::{ domain_converters::{ConvertableDomain, Tof2MzConverter}, io::readers::{ - file_readers::sql_reader::SqlReader, FrameReader, MetadataReader, - PrecursorReader, + file_readers::sql_reader::{SqlError, SqlReader}, + FrameReader, FrameReaderError, MetadataReader, MetadataReaderError, + PrecursorReader, PrecursorReaderError, }, ms_data::Spectrum, utils::find_extension, }; -use super::SpectrumReaderTrait; - -const SMOOTHING_WINDOW: u32 = 1; -const CENTROIDING_WINDOW: u32 = 1; -const CALIBRATION_TOLERANCE: f64 = 0.1; +use super::{SpectrumReaderConfig, SpectrumReaderError, SpectrumReaderTrait}; #[derive(Debug)] pub struct TDFSpectrumReader { @@ -28,50 +25,79 @@ pub struct TDFSpectrumReader { precursor_reader: PrecursorReader, mz_reader: Tof2MzConverter, raw_spectrum_reader: RawSpectrumReader, + config: SpectrumReaderConfig, } impl TDFSpectrumReader { - pub fn new(path_name: impl AsRef) -> Self { - let frame_reader: FrameReader = FrameReader::new(&path_name).unwrap(); - let sql_path = find_extension(&path_name, "analysis.tdf").unwrap(); - let metadata = MetadataReader::new(&sql_path).unwrap(); + pub fn new( + path_name: impl AsRef, + config: SpectrumReaderConfig, + ) -> Result { + let frame_reader: FrameReader = FrameReader::new(&path_name)?; + let sql_path = find_extension(&path_name, "analysis.tdf").ok_or( + TDFSpectrumReaderError::FileNotFound("analysis.tdf".to_string()), + )?; + let metadata = MetadataReader::new(&sql_path)?; let mz_reader: Tof2MzConverter = metadata.mz_converter; - let tdf_sql_reader = SqlReader::open(&sql_path).unwrap(); - let precursor_reader = PrecursorReader::new(&sql_path).unwrap(); + let tdf_sql_reader = SqlReader::open(&sql_path)?; + let precursor_reader = PrecursorReader::build() + .with_path(&sql_path) + .with_config(config.frame_splitting_params) + .finalize()?; let acquisition_type = frame_reader.get_acquisition(); let raw_spectrum_reader = RawSpectrumReader::new( &tdf_sql_reader, frame_reader, acquisition_type, - ); - Self { + config.frame_splitting_params, + )?; + let reader = Self { path: path_name.as_ref().to_path_buf(), precursor_reader, mz_reader, raw_spectrum_reader, - } + config, + }; + Ok(reader) } - pub fn read_single_raw_spectrum(&self, index: usize) -> RawSpectrum { - let raw_spectrum = self.raw_spectrum_reader.get(index); - raw_spectrum - .smooth(SMOOTHING_WINDOW) - .centroid(CENTROIDING_WINDOW) + pub fn read_single_raw_spectrum( + &self, + index: usize, + ) -> Result { + let raw_spectrum = self + .raw_spectrum_reader + .get(index)? + .smooth(self.config.spectrum_processing_params.smoothing_window) + .centroid( + self.config.spectrum_processing_params.centroiding_window, + ); + Ok(raw_spectrum) } -} -impl SpectrumReaderTrait for TDFSpectrumReader { - fn get(&self, index: usize) -> Spectrum { - let raw_spectrum = self.read_single_raw_spectrum(index); + fn _get(&self, index: usize) -> Result { + let raw_spectrum = self.read_single_raw_spectrum(index)?; let spectrum = raw_spectrum.finalize( - self.precursor_reader.get(index).unwrap(), + self.precursor_reader + .get(index) + .ok_or(TDFSpectrumReaderError::NoPrecursor)?, &self.mz_reader, ); - spectrum + Ok(spectrum) + } +} + +impl SpectrumReaderTrait for TDFSpectrumReader { + fn get(&self, index: usize) -> Result { + Ok(self._get(index)?) } fn len(&self) -> usize { - self.precursor_reader.len() + debug_assert_eq!( + self.precursor_reader.len(), + self.raw_spectrum_reader.len() + ); + self.raw_spectrum_reader.len() } fn get_path(&self) -> PathBuf { @@ -82,13 +108,19 @@ impl SpectrumReaderTrait for TDFSpectrumReader { let hits: Vec<(f64, u32)> = (0..self.precursor_reader.len()) .into_par_iter() .map(|index| { - let spectrum = self.read_single_raw_spectrum(index); + // TODO + let spectrum = self.read_single_raw_spectrum(index).unwrap(); let precursor = self.precursor_reader.get(index).unwrap(); let precursor_mz: f64 = precursor.mz; let mut result: Vec<(f64, u32)> = vec![]; for &tof_index in spectrum.tof_indices.iter() { let mz = self.mz_reader.convert(tof_index); - if (mz - precursor_mz).abs() < CALIBRATION_TOLERANCE { + if (mz - precursor_mz).abs() + < self + .config + .spectrum_processing_params + .calibration_tolerance + { let hit = (precursor_mz, tof_index); result.push(hit); } @@ -104,3 +136,21 @@ impl SpectrumReaderTrait for TDFSpectrumReader { } } } + +#[derive(Debug, thiserror::Error)] +pub enum TDFSpectrumReaderError { + #[error("{0}")] + SqlError(#[from] SqlError), + #[error("{0}")] + PrecursorReaderError(#[from] PrecursorReaderError), + #[error("{0}")] + MetadaReaderError(#[from] MetadataReaderError), + #[error("{0}")] + FrameReaderError(#[from] FrameReaderError), + #[error("{0}")] + RawSpectrumReaderError(#[from] RawSpectrumReaderError), + #[error("{0}")] + FileNotFound(String), + #[error("No precursor")] + NoPrecursor, +} diff --git a/src/io/readers/spectrum_reader/tdf/dda.rs b/src/io/readers/spectrum_reader/tdf/dda.rs index 93ab962..2434192 100644 --- a/src/io/readers/spectrum_reader/tdf/dda.rs +++ b/src/io/readers/spectrum_reader/tdf/dda.rs @@ -1,14 +1,17 @@ use crate::{ io::readers::{ file_readers::sql_reader::{ - pasef_frame_msms::SqlPasefFrameMsMs, ReadableSqlTable, SqlReader, + pasef_frame_msms::SqlPasefFrameMsMs, ReadableSqlTable, SqlError, + SqlReader, }, - FrameReader, + FrameReader, FrameReaderError, }, utils::vec_utils::{argsort, group_and_sum}, }; -use super::raw_spectra::{RawSpectrum, RawSpectrumReaderTrait}; +use super::raw_spectra::{ + RawSpectrum, RawSpectrumReaderError, RawSpectrumReaderTrait, +}; #[derive(Debug)] pub struct DDARawSpectrumReader { @@ -19,13 +22,15 @@ pub struct DDARawSpectrumReader { } impl DDARawSpectrumReader { - pub fn new(tdf_sql_reader: &SqlReader, frame_reader: FrameReader) -> Self { - let pasef_frames = - SqlPasefFrameMsMs::from_sql_reader(&tdf_sql_reader).unwrap(); + pub fn new( + tdf_sql_reader: &SqlReader, + frame_reader: FrameReader, + ) -> Result { + let pasef_frames = SqlPasefFrameMsMs::from_sql_reader(&tdf_sql_reader)?; let pasef_precursors = &pasef_frames.iter().map(|x| x.precursor).collect(); let order: Vec = argsort(&pasef_precursors); - let max_precursor = pasef_precursors.iter().max().unwrap(); + let max_precursor = pasef_precursors.iter().max().unwrap(); // SqlReader cannot return empty vecs, so always succeeds let mut offsets: Vec = Vec::with_capacity(max_precursor + 1); offsets.push(0); for (offset, &index) in order.iter().enumerate().take(order.len() - 1) { @@ -35,12 +40,13 @@ impl DDARawSpectrumReader { } } offsets.push(order.len()); - Self { + let reader = Self { order, offsets, pasef_frames, frame_reader, - } + }; + Ok(reader) } pub fn iterate_over_pasef_frames( @@ -53,10 +59,11 @@ impl DDARawSpectrumReader { .iter() .map(|&x| &self.pasef_frames[x]) } -} -impl RawSpectrumReaderTrait for DDARawSpectrumReader { - fn get(&self, index: usize) -> RawSpectrum { + fn _get( + &self, + index: usize, + ) -> Result { let mut collision_energy = 0.0; let mut isolation_mz = 0.0; let mut isolation_width = 0.0; @@ -67,7 +74,7 @@ impl RawSpectrumReaderTrait for DDARawSpectrumReader { isolation_mz = pasef_frame.isolation_mz; isolation_width = pasef_frame.isolation_width; let frame_index: usize = pasef_frame.frame - 1; - let frame = self.frame_reader.get(frame_index).unwrap(); + let frame = self.frame_reader.get(frame_index)?; if frame.intensities.len() == 0 { continue; } @@ -94,6 +101,24 @@ impl RawSpectrumReaderTrait for DDARawSpectrumReader { isolation_mz, isolation_width, }; - raw_spectrum + Ok(raw_spectrum) } } + +impl RawSpectrumReaderTrait for DDARawSpectrumReader { + fn get(&self, index: usize) -> Result { + Ok(self._get(index)?) + } + + fn len(&self) -> usize { + self.offsets.len() - 1 + } +} + +#[derive(Debug, thiserror::Error)] +pub enum DDARawSpectrumReaderError { + #[error("{0}")] + SqlError(#[from] SqlError), + #[error("{0}")] + FrameReaderError(#[from] FrameReaderError), +} diff --git a/src/io/readers/spectrum_reader/tdf/dia.rs b/src/io/readers/spectrum_reader/tdf/dia.rs index e0342e4..3313acf 100644 --- a/src/io/readers/spectrum_reader/tdf/dia.rs +++ b/src/io/readers/spectrum_reader/tdf/dia.rs @@ -1,15 +1,17 @@ +use crate::io::readers::quad_settings_reader::FrameWindowSplittingStrategy; +use crate::io::readers::FrameReaderError; use crate::{ io::readers::{ - file_readers::sql_reader::{ - frame_groups::SqlWindowGroup, ReadableSqlTable, SqlReader, - }, - FrameReader, QuadrupoleSettingsReader, + file_readers::sql_reader::{SqlError, SqlReader}, + FrameReader, QuadrupoleSettingsReader, QuadrupoleSettingsReaderError, }, ms_data::QuadrupoleSettings, utils::vec_utils::group_and_sum, }; -use super::raw_spectra::{RawSpectrum, RawSpectrumReaderTrait}; +use super::raw_spectra::{ + RawSpectrum, RawSpectrumReaderError, RawSpectrumReaderTrait, +}; #[derive(Debug)] pub struct DIARawSpectrumReader { @@ -18,45 +20,36 @@ pub struct DIARawSpectrumReader { } impl DIARawSpectrumReader { - pub fn new(tdf_sql_reader: &SqlReader, frame_reader: FrameReader) -> Self { - let window_groups = - SqlWindowGroup::from_sql_reader(&tdf_sql_reader).unwrap(); - let quadrupole_settings = - QuadrupoleSettingsReader::new(&tdf_sql_reader.get_path()).unwrap(); - let mut expanded_quadrupole_settings: Vec = vec![]; - for window_group in window_groups { - let window = window_group.window_group; - let frame = window_group.frame; - let group = &quadrupole_settings[window as usize - 1]; - for sub_window in 0..group.isolation_mz.len() { - let sub_quad_settings = QuadrupoleSettings { - index: frame, - scan_starts: vec![group.scan_starts[sub_window]], - scan_ends: vec![group.scan_ends[sub_window]], - isolation_mz: vec![group.isolation_mz[sub_window]], - isolation_width: vec![group.isolation_width[sub_window]], - collision_energy: vec![group.collision_energy[sub_window]], - }; - expanded_quadrupole_settings.push(sub_quad_settings) - } - } - Self { + pub fn new( + tdf_sql_reader: &SqlReader, + frame_reader: FrameReader, + splitting_strategy: FrameWindowSplittingStrategy, + ) -> Result { + let expanded_quadrupole_settings = + QuadrupoleSettingsReader::from_splitting( + tdf_sql_reader.get_path(), + splitting_strategy, + )?; + let reader = Self { expanded_quadrupole_settings, frame_reader, - } + }; + Ok(reader) } -} -impl RawSpectrumReaderTrait for DIARawSpectrumReader { - fn get(&self, index: usize) -> RawSpectrum { + fn _get( + &self, + index: usize, + ) -> Result { let quad_settings = &self.expanded_quadrupole_settings[index]; + let collision_energy = quad_settings.collision_energy[0]; let isolation_mz = quad_settings.isolation_mz[0]; let isolation_width = quad_settings.isolation_width[0]; let scan_start = quad_settings.scan_starts[0]; let scan_end = quad_settings.scan_ends[0]; let frame_index = quad_settings.index - 1; - let frame = self.frame_reader.get(frame_index).unwrap(); + let frame = self.frame_reader.get(frame_index)?; let offset_start = frame.scan_offsets[scan_start] as usize; let offset_end = frame.scan_offsets[scan_end] as usize; let tof_indices = &frame.tof_indices[offset_start..offset_end]; @@ -73,6 +66,26 @@ impl RawSpectrumReaderTrait for DIARawSpectrumReader { isolation_mz, isolation_width, }; - raw_spectrum + Ok(raw_spectrum) } } + +impl RawSpectrumReaderTrait for DIARawSpectrumReader { + fn get(&self, index: usize) -> Result { + Ok(self._get(index)?) + } + + fn len(&self) -> usize { + self.expanded_quadrupole_settings.len() + } +} + +#[derive(Debug, thiserror::Error)] +pub enum DIARawSpectrumReaderError { + #[error("{0}")] + SqlError(#[from] SqlError), + #[error("{0}")] + QuadrupoleSettingsReaderError(#[from] QuadrupoleSettingsReaderError), + #[error("{0}")] + FrameReaderError(#[from] FrameReaderError), +} diff --git a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs index 7172940..95b4a46 100644 --- a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs +++ b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs @@ -2,12 +2,18 @@ use core::fmt; use crate::{ domain_converters::{ConvertableDomain, Tof2MzConverter}, - io::readers::{file_readers::sql_reader::SqlReader, FrameReader}, + io::readers::{ + file_readers::sql_reader::SqlReader, + quad_settings_reader::FrameWindowSplittingStrategy, FrameReader, + }, ms_data::{AcquisitionType, Precursor, Spectrum}, utils::vec_utils::{filter_with_mask, find_sparse_local_maxima_mask}, }; -use super::{dda::DDARawSpectrumReader, dia::DIARawSpectrumReader}; +use super::{ + dda::{DDARawSpectrumReader, DDARawSpectrumReaderError}, + dia::{DIARawSpectrumReader, DIARawSpectrumReaderError}, +}; #[derive(Debug, PartialEq, Default, Clone)] pub(crate) struct RawSpectrum { @@ -91,27 +97,55 @@ impl RawSpectrumReader { tdf_sql_reader: &SqlReader, frame_reader: FrameReader, acquisition_type: AcquisitionType, - ) -> Self { + splitting_strategy: FrameWindowSplittingStrategy, + ) -> Result { let raw_spectrum_reader: Box = match acquisition_type { AcquisitionType::DDAPASEF => Box::new( - DDARawSpectrumReader::new(tdf_sql_reader, frame_reader), - ), - AcquisitionType::DIAPASEF => Box::new( - DIARawSpectrumReader::new(tdf_sql_reader, frame_reader), + DDARawSpectrumReader::new(tdf_sql_reader, frame_reader)?, ), - _ => panic!(), + AcquisitionType::DIAPASEF => { + Box::new(DIARawSpectrumReader::new( + tdf_sql_reader, + frame_reader, + splitting_strategy, + )?) + }, + acquisition_type => { + return Err(RawSpectrumReaderError::UnsupportedAcquisition( + format!("{:?}", acquisition_type), + )) + }, }; - Self { + let reader = Self { raw_spectrum_reader, - } + }; + Ok(reader) } - pub fn get(&self, index: usize) -> RawSpectrum { + pub fn get( + &self, + index: usize, + ) -> Result { self.raw_spectrum_reader.get(index) } + + pub fn len(&self) -> usize { + self.raw_spectrum_reader.len() + } } pub trait RawSpectrumReaderTrait: Sync { - fn get(&self, index: usize) -> RawSpectrum; + fn get(&self, index: usize) -> Result; + fn len(&self) -> usize; +} + +#[derive(Debug, thiserror::Error)] +pub enum RawSpectrumReaderError { + #[error("{0}")] + DDARawSpectrumReaderError(#[from] DDARawSpectrumReaderError), + #[error("{0}")] + DIARawSpectrumReaderError(#[from] DIARawSpectrumReaderError), + #[error("Invalid acquistion type for Raw spectrum reader: {0}")] + UnsupportedAcquisition(String), } diff --git a/src/io/writers/mgf.rs b/src/io/writers/mgf.rs index 0ad5ef0..715a5ed 100644 --- a/src/io/writers/mgf.rs +++ b/src/io/writers/mgf.rs @@ -30,6 +30,7 @@ pub struct MGFEntry; impl MGFEntry { pub fn write_header(spectrum: &Spectrum) -> String { + // TODO let precursor = spectrum.precursor.unwrap(); let title = precursor.index; let intensity = precursor.intensity.unwrap_or(0.0); diff --git a/src/ms_data/quadrupole.rs b/src/ms_data/quadrupole.rs index b9d2185..0d96f61 100644 --- a/src/ms_data/quadrupole.rs +++ b/src/ms_data/quadrupole.rs @@ -8,3 +8,9 @@ pub struct QuadrupoleSettings { pub isolation_width: Vec, pub collision_energy: Vec, } + +impl QuadrupoleSettings { + pub fn len(&self) -> usize { + self.isolation_mz.len() + } +} diff --git a/src/ms_data/spectra.rs b/src/ms_data/spectra.rs index 7ffb9f7..36e2c33 100644 --- a/src/ms_data/spectra.rs +++ b/src/ms_data/spectra.rs @@ -17,6 +17,7 @@ impl Spectrum { let top_n = if n == 0 { self.len() } else { n }; let mut indexed: Vec<(f64, usize)> = self.intensities.iter().cloned().zip(0..).collect(); + // TODO indexed.sort_by(|a, b| { b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal) }); diff --git a/src/utils/vec_utils.rs b/src/utils/vec_utils.rs index 724fc3c..3ee53c0 100644 --- a/src/utils/vec_utils.rs +++ b/src/utils/vec_utils.rs @@ -12,8 +12,8 @@ pub fn group_and_sum + Copy>( return (vec![], vec![]); } let order: Vec = argsort(&groups); - let mut new_groups: Vec = vec![]; - let mut new_values: Vec = vec![]; + let mut new_groups: Vec = Vec::with_capacity(order.len()); + let mut new_values: Vec = Vec::with_capacity(order.len()); let mut current_group: T = groups[order[0]]; let mut current_value: U = values[order[0]]; for &index in &order[1..] { diff --git a/tests/frame_readers.rs b/tests/frame_readers.rs index c67d7ab..b6fa001 100644 --- a/tests/frame_readers.rs +++ b/tests/frame_readers.rs @@ -104,4 +104,48 @@ fn tdf_reader_frames2() { } } -// TODO test for DIA +#[test] +fn tdf_reader_frames_dia() { + let file_name = "dia_test.d"; + let file_path = get_local_directory() + .join(file_name) + .to_str() + .unwrap() + .to_string(); + let frames: Vec = FrameReader::new(&file_path) + .unwrap() + .get_all_ms2() + .into_iter() + .map(|x| x.unwrap()) + .collect(); + + assert_eq!(frames.len(), 4); + for i in 0..frames.len() { + assert_eq!(frames[i].scan_offsets.len(), 710); + assert_eq!(frames[i].scan_offsets[0], 0); + assert_eq!( + frames[i].scan_offsets.last().unwrap(), + &frames[i].intensities.len() + ); + assert_eq!(frames[i].tof_indices.len(), frames[i].intensities.len()); + } + assert_eq!(&frames[0].tof_indices[0], &251695u32); + assert_eq!(&frames[0].intensities[0], &503392u32); + assert_eq!(&frames[0].tof_indices.len(), &754376); + assert_eq!(&frames[0].intensities.len(), &754376); + + assert_eq!(&frames[1].tof_indices[0], &1006071u32); + assert_eq!(&frames[1].intensities[0], &2012144u32); + assert_eq!(&frames[1].tof_indices.len(), &1257057); + assert_eq!(&frames[1].intensities.len(), &1257057); + + assert_eq!(&frames[2].tof_indices[0], &4022866u32); + assert_eq!(&frames[2].intensities[0], &8045734u32); + assert_eq!(&frames[2].tof_indices.len(), &2262419); + assert_eq!(&frames[2].intensities.len(), &2262419); + + assert_eq!(&frames[3].tof_indices[0], &6285285u32); + assert_eq!(&frames[3].intensities[0], &12570572u32); + assert_eq!(&frames[3].tof_indices.len(), &2765100); + assert_eq!(&frames[3].intensities.len(), &2765100); +} diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index 3473d18..8f6f198 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -1,6 +1,9 @@ use std::path::Path; use timsrust::{ - io::readers::SpectrumReader, + io::readers::{ + FrameWindowSplittingStrategy, QuadWindowExpansionStrategy, + SpectrumProcessingParams, SpectrumReader, SpectrumReaderConfig, + }, ms_data::{Precursor, Spectrum}, }; @@ -18,7 +21,11 @@ fn minitdf_reader() { .to_str() .unwrap() .to_string(); - let spectra: Vec = SpectrumReader::new(file_path).get_all(); + let spectra: Vec> = SpectrumReader::build() + .with_path(file_path) + .finalize() + .unwrap() + .get_all(); let expected: Vec = vec![ Spectrum { mz_values: vec![100.0, 200.002, 300.03, 400.4], @@ -55,8 +62,8 @@ fn minitdf_reader() { isolation_width: 3.0, }, ]; - for i in 0..spectra.len() { - assert_eq!(spectra[i], expected[i]); + for (i, spectrum) in spectra.into_iter().enumerate() { + assert_eq!(spectrum.unwrap(), expected[i]); } } @@ -68,7 +75,11 @@ fn tdf_reader_dda() { .to_str() .unwrap() .to_string(); - let spectra: Vec = SpectrumReader::new(file_path).get_all(); + let spectra: Vec> = SpectrumReader::build() + .with_path(file_path) + .finalize() + .unwrap() + .get_all(); let expected: Vec = vec![ Spectrum { mz_values: vec![199.7633445943076], @@ -122,7 +133,64 @@ fn tdf_reader_dda() { isolation_width: 2.0, }, ]; - for i in 0..spectra.len() { - assert_eq!(spectra[i], expected[i]); + for (i, spectrum) in spectra.into_iter().enumerate() { + assert_eq!(spectrum.unwrap(), expected[i]); + } +} + +#[test] +fn test_dia_even() { + let file_name = "dia_test.d"; + let file_path = get_local_directory() + .join(file_name) + .to_str() + .unwrap() + .to_string(); + for i in 1..3 { + let spectra = SpectrumReader::build() + .with_path(&file_path) + .with_config(SpectrumReaderConfig { + frame_splitting_params: + FrameWindowSplittingStrategy::Quadrupole( + QuadWindowExpansionStrategy::Even(i), + ), + spectrum_processing_params: SpectrumProcessingParams::default(), + }) + .finalize() + .unwrap() + .get_all(); + // 4 frames, 2 windows in each, i splits/window + assert_eq!(spectra.len(), 4 * 2 * i); + } +} + +#[test] +fn test_dia_uniform() { + let file_name = "dia_test.d"; + let file_path = get_local_directory() + .join(file_name) + .to_str() + .unwrap() + .to_string(); + for i in [100, 200, 300] { + let spectra = SpectrumReader::build() + .with_path(&file_path) + .with_config(SpectrumReaderConfig { + frame_splitting_params: FrameWindowSplittingStrategy::Window( + QuadWindowExpansionStrategy::Uniform((i, i)), + ), + spectrum_processing_params: SpectrumProcessingParams::default(), + }) + .finalize() + .unwrap() + .get_all(); + for f in spectra.iter() { + println!("{:?}", f.as_ref().unwrap().precursor); + } + // Not all frames have scan windows from 0 to 709 ... so ... I need to think + // on how to express this in the test + // assert_eq!(frames.len(), 4 * ((709 / i) + 1)); + assert!(spectra.len() > (709 / i)); + assert!(spectra.len() < 3 * ((709 / i) + 1)); } }