From 266fa86b206a2998e61e6c0acce23953864dbc9e Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Thu, 1 Aug 2024 11:22:25 +0200 Subject: [PATCH 1/5] FEAT: provided actual frames to FrameReader, rather than SqlFrames --- src/io/readers/frame_reader.rs | 88 ++++++++++++++++++++++------------ 1 file changed, 58 insertions(+), 30 deletions(-) diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index af9b713..83b6efd 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -26,10 +26,9 @@ use super::{ pub struct FrameReader { path: PathBuf, tdf_bin_reader: TdfBlobReader, - sql_frames: Vec, + frames: Vec, acquisition: AcquisitionType, - window_groups: Vec, - quadrupole_settings: Vec>, + offsets: Vec, } impl FrameReader { @@ -50,6 +49,7 @@ impl FrameReader { } else { AcquisitionType::Unknown }; + // TODO should be refactored out to quadrupole reader let mut window_groups = vec![0; sql_frames.len()]; let quadrupole_settings; if acquisition == AcquisitionType::DIAPASEF { @@ -64,36 +64,48 @@ impl FrameReader { } else { quadrupole_settings = vec![]; } + let quadrupole_settings = quadrupole_settings + .into_iter() + .map(|x| Arc::new(x)) + .collect(); + let frames = (0..sql_frames.len()) + .into_par_iter() + .map(|index| { + get_frame_without_data( + index, + &sql_frames, + acquisition, + &window_groups, + &quadrupole_settings, + ) + }) + .collect(); + let offsets = sql_frames.iter().map(|x| x.binary_offset).collect(); let reader = Self { path: path.as_ref().to_path_buf(), tdf_bin_reader, - sql_frames, + frames, acquisition, - window_groups, - quadrupole_settings: quadrupole_settings - .into_iter() - .map(|x| Arc::new(x)) - .collect(), + offsets, }; Ok(reader) } - pub fn parallel_filter<'a, F: Fn(&SqlFrame) -> bool + Sync + Send + 'a>( + pub fn parallel_filter<'a, F: Fn(&Frame) -> bool + Sync + Send + 'a>( &'a self, predicate: F, ) -> impl ParallelIterator> + 'a { (0..self.len()) .into_par_iter() - .filter(move |x| predicate(&self.sql_frames[*x])) + .filter(move |x| predicate(&self.frames[*x])) .map(move |x| self.get(x)) } pub fn get(&self, index: usize) -> Result { - let mut frame: Frame = Frame::default(); - let sql_frame = &self.sql_frames[index]; - frame.index = sql_frame.id; - let blob = self.tdf_bin_reader.get(sql_frame.binary_offset)?; + let mut frame = self.frames[index].clone(); + let offset = self.offsets[index]; + let blob = self.tdf_bin_reader.get(offset)?; let scan_count: usize = blob.get(0).ok_or(FrameReaderError::CorruptFrame)? as usize; let peak_count: usize = (blob.len() - scan_count) / 2; @@ -105,18 +117,6 @@ impl FrameReader { &blob, &frame.scan_offsets, )?; - frame.ms_level = MSLevel::read_from_msms_type(sql_frame.msms_type); - frame.rt = sql_frame.rt; - frame.acquisition_type = self.acquisition; - frame.intensity_correction_factor = 1.0 / sql_frame.accumulation_time; - if (self.acquisition == AcquisitionType::DIAPASEF) - & (frame.ms_level == MSLevel::MS2) - { - let window_group = self.window_groups[index]; - frame.window_group = window_group; - frame.quadrupole_settings = - self.quadrupole_settings[window_group as usize - 1].clone(); - } Ok(frame) } @@ -125,11 +125,13 @@ impl FrameReader { } pub fn get_all_ms1(&self) -> Vec> { - self.parallel_filter(|x| x.msms_type == 0).collect() + self.parallel_filter(|x| x.ms_level == MSLevel::MS1) + .collect() } pub fn get_all_ms2(&self) -> Vec> { - self.parallel_filter(|x| x.msms_type != 0).collect() + self.parallel_filter(|x| x.ms_level == MSLevel::MS2) + .collect() } pub fn get_acquisition(&self) -> AcquisitionType { @@ -137,7 +139,7 @@ impl FrameReader { } pub fn len(&self) -> usize { - self.sql_frames.len() + self.frames.len() } pub fn get_path(&self) -> PathBuf { @@ -199,6 +201,32 @@ fn read_tof_indices( Ok(tof_indices) } +fn get_frame_without_data( + index: usize, + sql_frames: &Vec, + acquisition: AcquisitionType, + window_groups: &Vec, + quadrupole_settings: &Vec>, +) -> Frame { + let mut frame: Frame = Frame::default(); + let sql_frame = &sql_frames[index]; + frame.index = sql_frame.id; + frame.ms_level = MSLevel::read_from_msms_type(sql_frame.msms_type); + frame.rt = sql_frame.rt; + frame.acquisition_type = acquisition; + frame.intensity_correction_factor = 1.0 / sql_frame.accumulation_time; + if (acquisition == AcquisitionType::DIAPASEF) + & (frame.ms_level == MSLevel::MS2) + { + // TODO should be refactored out to quadrupole reader + let window_group = window_groups[index]; + frame.window_group = window_group; + frame.quadrupole_settings = + quadrupole_settings[window_group as usize - 1].clone(); + } + frame +} + #[derive(Debug, thiserror::Error)] pub enum FrameReaderError { #[error("{0}")] From 4b8d04b501006361d9fda803ae4a3ba666a3cc48 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Thu, 1 Aug 2024 11:27:49 +0200 Subject: [PATCH 2/5] CHORE: removed unused deps --- Cargo.lock | 1 - Cargo.toml | 1 - 2 files changed, 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 27b720b..5119052 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1124,7 +1124,6 @@ name = "timsrust" version = "0.4.0" dependencies = [ "bytemuck", - "byteorder", "criterion", "linreg", "memmap2", diff --git a/Cargo.toml b/Cargo.toml index a758bd6..eec9e16 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,6 @@ keywords = ["MS", "LC-TIMS-TOF", "PASEF"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -byteorder = "1.4.3" zstd = "0.13.2" rusqlite = { version = "0.31.0", features = ["bundled"] } rayon = "1.10.0" From 345623a3ff27a74198a486539722318825ce8d85 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 26 Aug 2024 11:03:01 +0200 Subject: [PATCH 3/5] FEAT: Provided option for slim versions that only use tdf or minitdf --- Cargo.toml | 9 +- benches/speed_performance.rs | 26 +- src/errors.rs | 10 +- src/io/readers.rs | 6 + src/io/readers/file_readers.rs | 2 + src/io/readers/precursor_reader.rs | 11 + src/io/readers/spectrum_reader.rs | 19 +- src/io/readers/spectrum_reader/minitdf.rs | 3 - src/lib.rs | 23 +- tests/frame_readers.rs | 286 +++++++++++----------- tests/spectrum_readers.rs | 15 +- 11 files changed, 242 insertions(+), 168 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index eec9e16..f69025d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,13 +15,18 @@ keywords = ["MS", "LC-TIMS-TOF", "PASEF"] [dependencies] zstd = "0.13.2" -rusqlite = { version = "0.31.0", features = ["bundled"] } rayon = "1.10.0" linreg = "0.2.0" bytemuck = "1.13.1" -parquet = "42.0.0" thiserror = "1.0.0" memmap2 = "0.9.3" +rusqlite = { version = "0.31.0", features = ["bundled"], optional = true} +parquet = { version = "42.0.0", optional = true } + +[features] +tdf = ["rusqlite"] +minitdf = ["parquet"] +default = ["tdf", "minitdf"] [dev-dependencies] criterion = { version = "0.5.1", features = ["html_reports"] } diff --git a/benches/speed_performance.rs b/benches/speed_performance.rs index 3beeeac..b0778fb 100644 --- a/benches/speed_performance.rs +++ b/benches/speed_performance.rs @@ -1,8 +1,8 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use rayon::iter::ParallelIterator; -use timsrust::io::readers::{ - FrameReader, SpectrumReader, SpectrumReaderConfig, -}; +#[cfg(feature = "tdf")] +use timsrust::readers::FrameReader; +use timsrust::readers::{SpectrumReader, SpectrumReaderConfig}; const DDA_TEST: &str = "/mnt/c/Users/Sander.Willems/Documents/data/tims05_300SPD/20230505_TIMS05_PaSk_MA_HeLa_6min_ddaP_S1-C10_1_2323.d/"; @@ -11,14 +11,17 @@ const DIA_TEST: &str = const SYP_TEST: &str = "/mnt/c/Users/Sander.Willems/Documents/data/20230505_TIMS05_PaSk_SA_HeLa_6min_syP_5scans_30Da_S1-D4_1_2330.d/"; +#[cfg(feature = "tdf")] fn read_all_frames(frame_reader: &FrameReader) { frame_reader.get_all(); } +#[cfg(feature = "tdf")] fn read_all_ms1_frames(frame_reader: &FrameReader) { frame_reader.get_all_ms1(); } +#[cfg(feature = "tdf")] fn read_all_ms2_frames(frame_reader: &FrameReader) { frame_reader.get_all_ms2(); } @@ -27,13 +30,13 @@ fn read_all_spectra(spectrum_reader: &SpectrumReader) { spectrum_reader.get_all(); } -fn criterion_benchmark_dda(c: &mut Criterion) { +#[cfg(feature = "tdf")] +fn criterion_benchmark_dda_frames(c: &mut Criterion) { // c.bench_function("fib 20", |b| b.iter(|| fibonacci(black_box(20)))); let mut group = c.benchmark_group("sample-size-example"); group.significance_level(0.001).sample_size(10); let d_folder_name: &str = DDA_TEST; let frame_reader = FrameReader::new(d_folder_name).unwrap(); - let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap(); group.bench_function("DDA read_all_frames 6m", |b| { b.iter(|| read_all_frames(black_box(&frame_reader))) }); @@ -43,12 +46,22 @@ fn criterion_benchmark_dda(c: &mut Criterion) { group.bench_function("DDA read_all_ms2_frames 6m", |b| { b.iter(|| read_all_ms2_frames(black_box(&frame_reader))) }); + group.finish(); +} + +fn criterion_benchmark_dda_spectra(c: &mut Criterion) { + // c.bench_function("fib 20", |b| b.iter(|| fibonacci(black_box(20)))); + let mut group = c.benchmark_group("sample-size-example"); + group.significance_level(0.001).sample_size(10); + let d_folder_name: &str = DDA_TEST; + let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap(); group.bench_function("DDA read_all_spectra 6m", |b| { b.iter(|| read_all_spectra(black_box(&spectrum_reader))) }); group.finish(); } +#[cfg(feature = "tdf")] fn criterion_benchmark_dia(c: &mut Criterion) { // c.bench_function("fib 20", |b| b.iter(|| fibonacci(black_box(20)))); let mut group = c.benchmark_group("sample-size-example"); @@ -68,6 +81,7 @@ fn criterion_benchmark_dia(c: &mut Criterion) { group.finish(); } +#[cfg(feature = "tdf")] fn criterion_benchmark_syp(c: &mut Criterion) { // c.bench_function("fib 20", |b| b.iter(|| fibonacci(black_box(20)))); let mut group = c.benchmark_group("sample-size-example"); @@ -89,7 +103,7 @@ fn criterion_benchmark_syp(c: &mut Criterion) { criterion_group!( benches, - criterion_benchmark_dda, + criterion_benchmark_dda_spectra, // criterion_benchmark_dia, // criterion_benchmark_syp ); diff --git a/src/errors.rs b/src/errors.rs index f8c713b..7758b0b 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -1,19 +1,23 @@ +#[cfg(feature = "tdf")] use crate::io::readers::{ - FrameReaderError, MetadataReaderError, PrecursorReaderError, - QuadrupoleSettingsReaderError, SpectrumReaderError, + FrameReaderError, MetadataReaderError, QuadrupoleSettingsReaderError, }; +use crate::io::readers::{PrecursorReaderError, SpectrumReaderError}; /// An error that is produced by timsrust (uses [thiserror]). #[derive(thiserror::Error, Debug)] -pub enum Error { +pub enum TimsRustError { + #[cfg(feature = "tdf")] #[error("{0}")] FrameReaderError(#[from] FrameReaderError), #[error("{0}")] SpectrumReaderError(#[from] SpectrumReaderError), + #[cfg(feature = "tdf")] #[error("{0}")] MetadataReaderError(#[from] MetadataReaderError), #[error("{0}")] PrecursorReaderError(#[from] PrecursorReaderError), + #[cfg(feature = "tdf")] #[error("{0}")] QuadrupoleSettingsReaderError(#[from] QuadrupoleSettingsReaderError), } diff --git a/src/io/readers.rs b/src/io/readers.rs index 03d5248..c13f808 100644 --- a/src/io/readers.rs +++ b/src/io/readers.rs @@ -1,12 +1,18 @@ pub(crate) mod file_readers; +#[cfg(feature = "tdf")] mod frame_reader; +#[cfg(feature = "tdf")] mod metadata_reader; mod precursor_reader; +#[cfg(feature = "tdf")] mod quad_settings_reader; mod spectrum_reader; +#[cfg(feature = "tdf")] pub use frame_reader::*; +#[cfg(feature = "tdf")] pub use metadata_reader::*; pub use precursor_reader::*; +#[cfg(feature = "tdf")] pub use quad_settings_reader::*; pub use spectrum_reader::*; diff --git a/src/io/readers/file_readers.rs b/src/io/readers/file_readers.rs index 38aa955..9d6b37e 100644 --- a/src/io/readers/file_readers.rs +++ b/src/io/readers/file_readers.rs @@ -1,3 +1,5 @@ +#[cfg(feature = "minitdf")] pub mod parquet_reader; +#[cfg(feature = "tdf")] pub mod sql_reader; pub mod tdf_blob_reader; diff --git a/src/io/readers/precursor_reader.rs b/src/io/readers/precursor_reader.rs index e4750c5..05bb1e7 100644 --- a/src/io/readers/precursor_reader.rs +++ b/src/io/readers/precursor_reader.rs @@ -1,14 +1,19 @@ +#[cfg(feature = "minitdf")] mod minitdf; +#[cfg(feature = "tdf")] mod tdf; use core::fmt; use std::path::{Path, PathBuf}; +#[cfg(feature = "minitdf")] use minitdf::{MiniTDFPrecursorReader, MiniTDFPrecursorReaderError}; +#[cfg(feature = "tdf")] use tdf::{TDFPrecursorReader, TDFPrecursorReaderError}; use crate::ms_data::Precursor; +#[cfg(feature = "tdf")] use super::quad_settings_reader::FrameWindowSplittingStrategy; pub struct PrecursorReader { @@ -42,6 +47,7 @@ impl PrecursorReader { #[derive(Debug, Default, Clone)] pub struct PrecursorReaderBuilder { path: PathBuf, + #[cfg(feature = "tdf")] config: FrameWindowSplittingStrategy, } @@ -53,6 +59,7 @@ impl PrecursorReaderBuilder { } } + #[cfg(feature = "tdf")] pub fn with_config(&self, config: FrameWindowSplittingStrategy) -> Self { Self { config: config, @@ -63,9 +70,11 @@ impl PrecursorReaderBuilder { pub fn finalize(&self) -> Result { let precursor_reader: Box = match self.path.extension().and_then(|e| e.to_str()) { + #[cfg(feature = "minitdf")] Some("parquet") => { Box::new(MiniTDFPrecursorReader::new(self.path.clone())?) }, + #[cfg(feature = "tdf")] Some("tdf") => Box::new(TDFPrecursorReader::new( self.path.clone(), self.config.clone(), @@ -88,8 +97,10 @@ trait PrecursorReaderTrait: Sync { #[derive(Debug, thiserror::Error)] pub enum PrecursorReaderError { + #[cfg(feature = "minitdf")] #[error("{0}")] MiniTDFPrecursorReaderError(#[from] MiniTDFPrecursorReaderError), + #[cfg(feature = "tdf")] #[error("{0}")] TDFPrecursorReaderError(#[from] TDFPrecursorReaderError), #[error("File {0} not valid")] diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index 7f905cc..56fa327 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -1,14 +1,20 @@ +#[cfg(feature = "minitdf")] mod minitdf; +#[cfg(feature = "tdf")] mod tdf; use core::fmt; + +#[cfg(feature = "minitdf")] use minitdf::{MiniTDFSpectrumReader, MiniTDFSpectrumReaderError}; use rayon::iter::{IntoParallelIterator, ParallelIterator}; use std::path::{Path, PathBuf}; +#[cfg(feature = "tdf")] use tdf::{TDFSpectrumReader, TDFSpectrumReaderError}; use crate::ms_data::Spectrum; +#[cfg(feature = "tdf")] use super::FrameWindowSplittingStrategy; pub struct SpectrumReader { @@ -87,9 +93,11 @@ impl SpectrumReaderBuilder { pub fn finalize(&self) -> Result { let spectrum_reader: Box = match self.path.extension().and_then(|e| e.to_str()) { + #[cfg(feature = "minitdf")] Some("ms2") => { Box::new(MiniTDFSpectrumReader::new(self.path.clone())?) }, + #[cfg(feature = "tdf")] Some("d") => Box::new(TDFSpectrumReader::new( self.path.clone(), self.config.clone(), @@ -117,8 +125,10 @@ trait SpectrumReaderTrait: Sync { #[derive(Debug, thiserror::Error)] pub enum SpectrumReaderError { + #[cfg(feature = "minitdf")] #[error("{0}")] MiniTDFSpectrumReaderError(#[from] MiniTDFSpectrumReaderError), + #[cfg(feature = "tdf")] #[error("{0}")] TDFSpectrumReaderError(#[from] TDFSpectrumReaderError), #[error("File {0} not valid")] @@ -127,10 +137,10 @@ pub enum SpectrumReaderError { #[derive(Debug, Clone)] pub struct SpectrumProcessingParams { - smoothing_window: u32, - centroiding_window: u32, - calibration_tolerance: f64, - calibrate: bool, + pub smoothing_window: u32, + pub centroiding_window: u32, + pub calibration_tolerance: f64, + pub calibrate: bool, } impl Default for SpectrumProcessingParams { @@ -147,5 +157,6 @@ impl Default for SpectrumProcessingParams { #[derive(Debug, Default, Clone)] pub struct SpectrumReaderConfig { pub spectrum_processing_params: SpectrumProcessingParams, + #[cfg(feature = "tdf")] pub frame_splitting_params: FrameWindowSplittingStrategy, } diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs index e5cc23c..e1b5da9 100644 --- a/src/io/readers/spectrum_reader/minitdf.rs +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -7,7 +7,6 @@ use crate::{ precursors::ParquetPrecursor, ParquetError, ReadableParquetTable, }, - sql_reader::SqlError, tdf_blob_reader::{ IndexedTdfBlobReader, IndexedTdfBlobReaderError, }, @@ -127,8 +126,6 @@ impl SpectrumReaderTrait for MiniTDFSpectrumReader { #[derive(Debug, thiserror::Error)] pub enum MiniTDFSpectrumReaderError { - #[error("{0}")] - SqlError(#[from] SqlError), #[error("{0}")] PrecursorReaderError(#[from] PrecursorReaderError), #[error("{0}")] diff --git a/src/lib.rs b/src/lib.rs index b519699..61bdd28 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,10 +21,23 @@ //! * *.ms2spectrum.bin //! * *.ms2spectrum.parquet -pub mod domain_converters; -mod errors; -pub mod io; -pub mod ms_data; -mod utils; +pub(crate) mod domain_converters; +pub(crate) mod errors; +pub(crate) mod io; +pub(crate) mod ms_data; +pub(crate) mod utils; +pub mod converters { + //! Allows conversions between domains (e.g. Time of Flight and m/z) + pub use crate::domain_converters::*; +} +pub mod readers { + //! Readers for all data from Bruker compatible files. + pub use crate::io::readers::*; +} +pub mod writers { + //! Writers to generic file formats. + pub use crate::io::writers::*; +} pub use crate::errors::*; +pub use crate::ms_data::*; diff --git a/tests/frame_readers.rs b/tests/frame_readers.rs index b6fa001..5324360 100644 --- a/tests/frame_readers.rs +++ b/tests/frame_readers.rs @@ -1,151 +1,157 @@ -use std::{path::Path, sync::Arc}; -use timsrust::{ - io::readers::FrameReader, - ms_data::{AcquisitionType, Frame, MSLevel, QuadrupoleSettings}, -}; +#[cfg(feature = "tdf")] +mod tests { + use std::{path::Path, sync::Arc}; + use timsrust::{ + readers::FrameReader, AcquisitionType, Frame, MSLevel, + QuadrupoleSettings, + }; -fn get_local_directory() -> &'static Path { - Path::new(std::file!()) - .parent() - .expect("Failed to get parent directory") -} + fn get_local_directory() -> &'static Path { + Path::new(std::file!()) + .parent() + .expect("Failed to get parent directory") + } -#[test] -fn tdf_reader_frames1() { - let file_name = "test.d"; - let file_path = get_local_directory() - .join(file_name) - .to_str() - .unwrap() - .to_string(); - let frames: Vec = FrameReader::new(&file_path) - .unwrap() - .get_all_ms1() - .into_iter() - .map(|x| x.unwrap()) - .collect(); - let expected: Vec = vec![ - Frame { - scan_offsets: vec![0, 1, 3, 6, 10], - tof_indices: (0..10).collect(), - intensities: (0..10).map(|x| (x + 1) * 2).collect(), - index: 1, - rt: 0.1, - ms_level: MSLevel::MS1, - quadrupole_settings: Arc::new(QuadrupoleSettings::default()), - acquisition_type: AcquisitionType::DDAPASEF, - intensity_correction_factor: 1.0 / 100.0, - window_group: 0, - }, - // Frame::default(), - Frame { - scan_offsets: vec![0, 9, 19, 30, 42], - tof_indices: (36..78).collect(), - intensities: (36..78).map(|x| (x + 1) * 2).collect(), - index: 3, - rt: 0.3, - ms_level: MSLevel::MS1, - quadrupole_settings: Arc::new(QuadrupoleSettings::default()), - acquisition_type: AcquisitionType::DDAPASEF, - intensity_correction_factor: 1.0 / 100.0, - window_group: 0, - }, - // Frame::default(), - ]; - for i in 0..expected.len() { - assert_eq!(&frames[i], &expected[i]) + #[test] + fn tdf_reader_frames1() { + let file_name = "test.d"; + let file_path = get_local_directory() + .join(file_name) + .to_str() + .unwrap() + .to_string(); + let frames: Vec = FrameReader::new(&file_path) + .unwrap() + .get_all_ms1() + .into_iter() + .map(|x| x.unwrap()) + .collect(); + let expected: Vec = vec![ + Frame { + scan_offsets: vec![0, 1, 3, 6, 10], + tof_indices: (0..10).collect(), + intensities: (0..10).map(|x| (x + 1) * 2).collect(), + index: 1, + rt: 0.1, + ms_level: MSLevel::MS1, + quadrupole_settings: Arc::new(QuadrupoleSettings::default()), + acquisition_type: AcquisitionType::DDAPASEF, + intensity_correction_factor: 1.0 / 100.0, + window_group: 0, + }, + // Frame::default(), + Frame { + scan_offsets: vec![0, 9, 19, 30, 42], + tof_indices: (36..78).collect(), + intensities: (36..78).map(|x| (x + 1) * 2).collect(), + index: 3, + rt: 0.3, + ms_level: MSLevel::MS1, + quadrupole_settings: Arc::new(QuadrupoleSettings::default()), + acquisition_type: AcquisitionType::DDAPASEF, + intensity_correction_factor: 1.0 / 100.0, + window_group: 0, + }, + // Frame::default(), + ]; + for i in 0..expected.len() { + assert_eq!(&frames[i], &expected[i]) + } } -} -#[test] -fn tdf_reader_frames2() { - let file_name = "test.d"; - let file_path = get_local_directory() - .join(file_name) - .to_str() - .unwrap() - .to_string(); - let frames: Vec = FrameReader::new(&file_path) - .unwrap() - .get_all_ms2() - .into_iter() - .map(|x| x.unwrap()) - .collect(); - let expected: Vec = vec![ - // Frame::default(), - Frame { - scan_offsets: vec![0, 5, 11, 18, 26], - tof_indices: (10..36).collect(), - intensities: (10..36).map(|x| (x + 1) * 2).collect(), - index: 2, - rt: 0.2, - ms_level: MSLevel::MS2, - quadrupole_settings: Arc::new(QuadrupoleSettings::default()), - acquisition_type: AcquisitionType::DDAPASEF, - intensity_correction_factor: 1.0 / 100.0, - window_group: 0, - }, - // Frame::default(), - Frame { - scan_offsets: vec![0, 13, 27, 42, 58], - tof_indices: (78..136).collect(), - intensities: (78..136).map(|x| (x + 1) * 2).collect(), - index: 4, - rt: 0.4, - ms_level: MSLevel::MS2, - quadrupole_settings: Arc::new(QuadrupoleSettings::default()), - acquisition_type: AcquisitionType::DDAPASEF, - intensity_correction_factor: 1.0 / 100.0, - window_group: 0, - }, - ]; - for i in 0..expected.len() { - assert_eq!(&frames[i], &expected[i]) + #[test] + fn tdf_reader_frames2() { + let file_name = "test.d"; + let file_path = get_local_directory() + .join(file_name) + .to_str() + .unwrap() + .to_string(); + let frames: Vec = FrameReader::new(&file_path) + .unwrap() + .get_all_ms2() + .into_iter() + .map(|x| x.unwrap()) + .collect(); + let expected: Vec = vec![ + // Frame::default(), + Frame { + scan_offsets: vec![0, 5, 11, 18, 26], + tof_indices: (10..36).collect(), + intensities: (10..36).map(|x| (x + 1) * 2).collect(), + index: 2, + rt: 0.2, + ms_level: MSLevel::MS2, + quadrupole_settings: Arc::new(QuadrupoleSettings::default()), + acquisition_type: AcquisitionType::DDAPASEF, + intensity_correction_factor: 1.0 / 100.0, + window_group: 0, + }, + // Frame::default(), + Frame { + scan_offsets: vec![0, 13, 27, 42, 58], + tof_indices: (78..136).collect(), + intensities: (78..136).map(|x| (x + 1) * 2).collect(), + index: 4, + rt: 0.4, + ms_level: MSLevel::MS2, + quadrupole_settings: Arc::new(QuadrupoleSettings::default()), + acquisition_type: AcquisitionType::DDAPASEF, + intensity_correction_factor: 1.0 / 100.0, + window_group: 0, + }, + ]; + for i in 0..expected.len() { + assert_eq!(&frames[i], &expected[i]) + } } -} -#[test] -fn tdf_reader_frames_dia() { - let file_name = "dia_test.d"; - let file_path = get_local_directory() - .join(file_name) - .to_str() - .unwrap() - .to_string(); - let frames: Vec = FrameReader::new(&file_path) - .unwrap() - .get_all_ms2() - .into_iter() - .map(|x| x.unwrap()) - .collect(); + #[test] + fn tdf_reader_frames_dia() { + let file_name = "dia_test.d"; + let file_path = get_local_directory() + .join(file_name) + .to_str() + .unwrap() + .to_string(); + let frames: Vec = FrameReader::new(&file_path) + .unwrap() + .get_all_ms2() + .into_iter() + .map(|x| x.unwrap()) + .collect(); - assert_eq!(frames.len(), 4); - for i in 0..frames.len() { - assert_eq!(frames[i].scan_offsets.len(), 710); - assert_eq!(frames[i].scan_offsets[0], 0); - assert_eq!( - frames[i].scan_offsets.last().unwrap(), - &frames[i].intensities.len() - ); - assert_eq!(frames[i].tof_indices.len(), frames[i].intensities.len()); - } - assert_eq!(&frames[0].tof_indices[0], &251695u32); - assert_eq!(&frames[0].intensities[0], &503392u32); - assert_eq!(&frames[0].tof_indices.len(), &754376); - assert_eq!(&frames[0].intensities.len(), &754376); + assert_eq!(frames.len(), 4); + for i in 0..frames.len() { + assert_eq!(frames[i].scan_offsets.len(), 710); + assert_eq!(frames[i].scan_offsets[0], 0); + assert_eq!( + frames[i].scan_offsets.last().unwrap(), + &frames[i].intensities.len() + ); + assert_eq!( + frames[i].tof_indices.len(), + frames[i].intensities.len() + ); + } + assert_eq!(&frames[0].tof_indices[0], &251695u32); + assert_eq!(&frames[0].intensities[0], &503392u32); + assert_eq!(&frames[0].tof_indices.len(), &754376); + assert_eq!(&frames[0].intensities.len(), &754376); - assert_eq!(&frames[1].tof_indices[0], &1006071u32); - assert_eq!(&frames[1].intensities[0], &2012144u32); - assert_eq!(&frames[1].tof_indices.len(), &1257057); - assert_eq!(&frames[1].intensities.len(), &1257057); + assert_eq!(&frames[1].tof_indices[0], &1006071u32); + assert_eq!(&frames[1].intensities[0], &2012144u32); + assert_eq!(&frames[1].tof_indices.len(), &1257057); + assert_eq!(&frames[1].intensities.len(), &1257057); - assert_eq!(&frames[2].tof_indices[0], &4022866u32); - assert_eq!(&frames[2].intensities[0], &8045734u32); - assert_eq!(&frames[2].tof_indices.len(), &2262419); - assert_eq!(&frames[2].intensities.len(), &2262419); + assert_eq!(&frames[2].tof_indices[0], &4022866u32); + assert_eq!(&frames[2].intensities[0], &8045734u32); + assert_eq!(&frames[2].tof_indices.len(), &2262419); + assert_eq!(&frames[2].intensities.len(), &2262419); - assert_eq!(&frames[3].tof_indices[0], &6285285u32); - assert_eq!(&frames[3].intensities[0], &12570572u32); - assert_eq!(&frames[3].tof_indices.len(), &2765100); - assert_eq!(&frames[3].intensities.len(), &2765100); + assert_eq!(&frames[3].tof_indices[0], &6285285u32); + assert_eq!(&frames[3].intensities[0], &12570572u32); + assert_eq!(&frames[3].tof_indices.len(), &2765100); + assert_eq!(&frames[3].intensities.len(), &2765100); + } } diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index 8f6f198..9546105 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -1,10 +1,11 @@ use std::path::Path; +#[cfg(feature = "tdf")] +use timsrust::readers::{ + FrameWindowSplittingStrategy, QuadWindowExpansionStrategy, +}; use timsrust::{ - io::readers::{ - FrameWindowSplittingStrategy, QuadWindowExpansionStrategy, - SpectrumProcessingParams, SpectrumReader, SpectrumReaderConfig, - }, - ms_data::{Precursor, Spectrum}, + readers::{SpectrumProcessingParams, SpectrumReader, SpectrumReaderConfig}, + Precursor, Spectrum, }; fn get_local_directory() -> &'static Path { @@ -13,6 +14,7 @@ fn get_local_directory() -> &'static Path { .expect("Failed to get parent directory") } +#[cfg(feature = "minitdf")] #[test] fn minitdf_reader() { let file_name = "test2.ms2"; @@ -67,6 +69,7 @@ fn minitdf_reader() { } } +#[cfg(feature = "tdf")] #[test] fn tdf_reader_dda() { let file_name = "test.d"; @@ -138,6 +141,7 @@ fn tdf_reader_dda() { } } +#[cfg(feature = "tdf")] #[test] fn test_dia_even() { let file_name = "dia_test.d"; @@ -164,6 +168,7 @@ fn test_dia_even() { } } +#[cfg(feature = "tdf")] #[test] fn test_dia_uniform() { let file_name = "dia_test.d"; From 7f3d716c94823c02fc9613493c8dc101c18b7e89 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 26 Aug 2024 13:01:59 +0200 Subject: [PATCH 4/5] FEAT: expose dia_windows through frame reader --- src/io/readers/frame_reader.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index 83b6efd..6460a83 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -29,6 +29,7 @@ pub struct FrameReader { frames: Vec, acquisition: AcquisitionType, offsets: Vec, + dia_windows: Option>>, } impl FrameReader { @@ -87,6 +88,10 @@ impl FrameReader { frames, acquisition, offsets, + dia_windows: match acquisition { + AcquisitionType::DIAPASEF => Some(quadrupole_settings), + _ => None, + }, }; Ok(reader) } @@ -102,6 +107,10 @@ impl FrameReader { .map(move |x| self.get(x)) } + pub fn get_dia_windows(&self) -> Option>> { + self.dia_windows.clone() + } + pub fn get(&self, index: usize) -> Result { let mut frame = self.frames[index].clone(); let offset = self.offsets[index]; From a0f86f1efc1b4d5ebbf94f5f29f243fb39a5e63c Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 26 Aug 2024 13:02:19 +0200 Subject: [PATCH 5/5] FEAT: added rt, im and mz min/mx values to metadata --- src/io/readers/metadata_reader.rs | 56 +++++++++++++++++++++++-------- src/ms_data/metadata.rs | 6 ++++ 2 files changed, 48 insertions(+), 14 deletions(-) diff --git a/src/io/readers/metadata_reader.rs b/src/io/readers/metadata_reader.rs index 1e47668..8944001 100644 --- a/src/io/readers/metadata_reader.rs +++ b/src/io/readers/metadata_reader.rs @@ -23,38 +23,59 @@ impl MetadataReader { SqlMetadata::from_sql_reader(&tdf_sql_reader)?; let compression_type = parse_value(&sql_metadata, "TimsCompressionType")?; + let (mz_min, mz_max) = get_mz_bounds(&sql_metadata)?; + let (im_min, im_max) = get_im_bounds(&sql_metadata)?; + let rt_values: Vec = + tdf_sql_reader.read_column_from_table("Time", "Frames")?; + let rt_min = rt_values + .iter() + .filter(|&&v| !v.is_nan()) // Filter out NaN values + .cloned() + .min_by(|a, b| a.partial_cmp(b).unwrap()) + .unwrap(); + let rt_max = rt_values + .iter() + .filter(|&&v| !v.is_nan()) // Filter out NaN values + .cloned() + .max_by(|a, b| a.partial_cmp(b).unwrap()) + .unwrap(); let metadata = Metadata { path: path.as_ref().to_path_buf(), - rt_converter: get_rt_converter(&tdf_sql_reader)?, + rt_converter: Frame2RtConverter::from_values(rt_values), im_converter: get_im_converter(&sql_metadata, &tdf_sql_reader)?, mz_converter: get_mz_converter(&sql_metadata)?, + lower_rt: rt_min, + upper_rt: rt_max, + lower_im: im_min, + upper_im: im_max, + lower_mz: mz_min, + upper_mz: mz_max, compression_type, }; Ok(metadata) } } -fn get_rt_converter( - tdf_sql_reader: &SqlReader, -) -> Result { - let rt_values: Vec = - tdf_sql_reader.read_column_from_table("Time", "Frames")?; - Ok(Frame2RtConverter::from_values(rt_values)) -} - -fn get_mz_converter( +fn get_mz_bounds( sql_metadata: &HashMap, -) -> Result { +) -> Result<(f64, f64), MetadataReaderError> { let software = sql_metadata.get("AcquisitionSoftware").ok_or( MetadataReaderError::KeyNotFound("AcquisitionSoftware".to_string()), )?; - let tof_max_index: u32 = parse_value(sql_metadata, "DigitizerNumSamples")?; let mut mz_min: f64 = parse_value(sql_metadata, "MzAcqRangeLower")?; let mut mz_max: f64 = parse_value(sql_metadata, "MzAcqRangeUpper")?; if software == OTOF_CONTROL { mz_min -= 5.0; mz_max += 5.0; } + Ok((mz_min, mz_max)) +} + +fn get_mz_converter( + sql_metadata: &HashMap, +) -> Result { + let (mz_min, mz_max) = get_mz_bounds(sql_metadata)?; + let tof_max_index: u32 = parse_value(sql_metadata, "DigitizerNumSamples")?; Ok(Tof2MzConverter::from_boundaries( mz_min, mz_max, @@ -62,6 +83,14 @@ fn get_mz_converter( )) } +fn get_im_bounds( + sql_metadata: &HashMap, +) -> Result<(f64, f64), MetadataReaderError> { + let im_min: f64 = parse_value(sql_metadata, "OneOverK0AcqRangeLower")?; + let im_max: f64 = parse_value(sql_metadata, "OneOverK0AcqRangeUpper")?; + Ok((im_min, im_max)) +} + fn get_im_converter( sql_metadata: &HashMap, tdf_sql_reader: &SqlReader, @@ -69,8 +98,7 @@ fn get_im_converter( let scan_counts: Vec = tdf_sql_reader.read_column_from_table("NumScans", "Frames")?; let scan_max_index = *scan_counts.iter().max().unwrap(); // SqlReader cannot return empty vecs, so always succeeds - let im_min: f64 = parse_value(sql_metadata, "OneOverK0AcqRangeLower")?; - let im_max: f64 = parse_value(sql_metadata, "OneOverK0AcqRangeUpper")?; + let (im_min, im_max) = get_im_bounds(sql_metadata)?; Ok(Scan2ImConverter::from_boundaries( im_min, im_max, diff --git a/src/ms_data/metadata.rs b/src/ms_data/metadata.rs index 14d1a9a..8e78364 100644 --- a/src/ms_data/metadata.rs +++ b/src/ms_data/metadata.rs @@ -13,4 +13,10 @@ pub struct Metadata { pub im_converter: Scan2ImConverter, pub mz_converter: Tof2MzConverter, pub compression_type: u8, + pub lower_rt: f64, + pub upper_rt: f64, + pub lower_im: f64, + pub upper_im: f64, + pub lower_mz: f64, + pub upper_mz: f64, }