Skip to content

Feature/window splitting try2 #21

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/io/readers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@ mod metadata_reader;
mod precursor_reader;
mod quad_settings_reader;
mod spectrum_reader;
mod tdf_utils;

pub use frame_reader::*;
pub use metadata_reader::*;
pub use precursor_reader::*;
pub use quad_settings_reader::*;
pub use spectrum_reader::*;
pub use tdf_utils::QuadWindowExpansionStrategy;
10 changes: 8 additions & 2 deletions src/io/readers/frame_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ use super::{
},
tdf_blob_reader::{TdfBlob, TdfBlobReader, TdfBlobReaderError},
},
QuadrupoleSettingsReader, QuadrupoleSettingsReaderError,
FrameWindowSplittingStrategy, QuadrupoleSettingsReader,
QuadrupoleSettingsReaderError,
};

#[derive(Debug)]
Expand All @@ -30,10 +31,14 @@ pub struct FrameReader {
acquisition: AcquisitionType,
window_groups: Vec<u8>,
quadrupole_settings: Vec<Arc<QuadrupoleSettings>>,
pub splitting_strategy: FrameWindowSplittingStrategy,
}

impl FrameReader {
pub fn new(path: impl AsRef<Path>) -> Result<Self, FrameReaderError> {
pub fn new(
path: impl AsRef<Path>,
config: FrameWindowSplittingStrategy,
) -> Result<Self, FrameReaderError> {
let sql_path = find_extension(&path, "analysis.tdf").ok_or(
FrameReaderError::FileNotFound("analysis.tdf".to_string()),
)?;
Expand Down Expand Up @@ -74,6 +79,7 @@ impl FrameReader {
.into_iter()
.map(|x| Arc::new(x))
.collect(),
splitting_strategy: config,
};
Ok(reader)
}
Expand Down
18 changes: 14 additions & 4 deletions src/io/readers/precursor_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ use tdf::{TDFPrecursorReader, TDFPrecursorReaderError};

use crate::ms_data::Precursor;

use super::FrameWindowSplittingStrategy;

pub struct PrecursorReader {
precursor_reader: Box<dyn PrecursorReaderTrait>,
}
Expand All @@ -20,11 +22,19 @@ impl fmt::Debug for PrecursorReader {
}

impl PrecursorReader {
pub fn new(path: impl AsRef<Path>) -> Result<Self, PrecursorReaderError> {
pub fn new(
path: impl AsRef<Path>,
config: Option<FrameWindowSplittingStrategy>,
) -> Result<Self, PrecursorReaderError> {
let tmp = path.as_ref().extension().and_then(|e| e.to_str());
let precursor_reader: Box<dyn PrecursorReaderTrait> =
match path.as_ref().extension().and_then(|e| e.to_str()) {
Some("parquet") => Box::new(MiniTDFPrecursorReader::new(path)?),
Some("tdf") => Box::new(TDFPrecursorReader::new(path)?),
match (tmp, config) {
(Some("parquet"), None) => {
Box::new(MiniTDFPrecursorReader::new(path)?)
},
(Some("tdf"), strat) => {
Box::new(TDFPrecursorReader::new(path, strat)?)
},
_ => panic!(),
};
let reader = Self { precursor_reader };
Expand Down
34 changes: 27 additions & 7 deletions src/io/readers/precursor_reader/tdf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@ use dda::{DDATDFPrecursorReader, DDATDFPrecursorReaderError};
use dia::{DIATDFPrecursorReader, DIATDFPrecursorReaderError};

use crate::{
io::readers::file_readers::sql_reader::{SqlError, SqlReader},
io::readers::{
file_readers::sql_reader::{SqlError, SqlReader},
FrameWindowSplittingStrategy,
},
ms_data::{AcquisitionType, Precursor},
};

Expand All @@ -20,6 +23,7 @@ pub struct TDFPrecursorReader {
impl TDFPrecursorReader {
pub fn new(
path: impl AsRef<Path>,
splitting_strategy: Option<FrameWindowSplittingStrategy>,
) -> Result<Self, TDFPrecursorReaderError> {
let sql_path = path.as_ref();
let tdf_sql_reader = SqlReader::open(sql_path)?;
Expand All @@ -33,16 +37,32 @@ impl TDFPrecursorReader {
AcquisitionType::Unknown
};
let precursor_reader: Box<dyn PrecursorReaderTrait> =
match acquisition_type {
AcquisitionType::DDAPASEF => {
match (acquisition_type, splitting_strategy) {
(AcquisitionType::DDAPASEF, None) => {
Box::new(DDATDFPrecursorReader::new(path)?)
},
AcquisitionType::DIAPASEF => {
Box::new(DIATDFPrecursorReader::new(path)?)
(
AcquisitionType::DDAPASEF,
Some(FrameWindowSplittingStrategy::None),
) => {
// Not 100% sure when this happens ...
// By this I mean generating a Some(None)
// ./tests/frame_readers.rs:60:25 generates it.
// JSPP - 2024-Jul-16
Box::new(DDATDFPrecursorReader::new(path)?)
},
(AcquisitionType::DIAPASEF, Some(splitting_strat)) => {
Box::new(DIATDFPrecursorReader::new(path, splitting_strat)?)
},
(AcquisitionType::DIAPASEF, None) => {
Box::new(DIATDFPrecursorReader::new(
path,
FrameWindowSplittingStrategy::None,
)?)
},
acquisition_type => {
(acq_type, acq_config) => {
return Err(TDFPrecursorReaderError::UnknownPrecursorType(
format!("{:?}", acquisition_type),
format!("{:?} + {:?}", acquisition_type, acq_config),
))
},
};
Expand Down
36 changes: 19 additions & 17 deletions src/io/readers/precursor_reader/tdf/dia.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
use std::path::Path;

use crate::io::readers::tdf_utils::{
expand_quadrupole_settings, expand_window_settings,
};
use crate::io::readers::FrameWindowSplittingStrategy;
use crate::{
domain_converters::{
ConvertableDomain, Frame2RtConverter, Scan2ImConverter,
Expand All @@ -26,6 +30,7 @@ pub struct DIATDFPrecursorReader {
impl DIATDFPrecursorReader {
pub fn new(
path: impl AsRef<Path>,
splitting_strat: FrameWindowSplittingStrategy,
) -> Result<Self, DIATDFPrecursorReaderError> {
let sql_path = path.as_ref();
let tdf_sql_reader = SqlReader::open(sql_path)?;
Expand All @@ -35,23 +40,20 @@ impl DIATDFPrecursorReader {
let window_groups = SqlWindowGroup::from_sql_reader(&tdf_sql_reader)?;
let quadrupole_settings =
QuadrupoleSettingsReader::new(tdf_sql_reader.get_path())?;
let mut expanded_quadrupole_settings: Vec<QuadrupoleSettings> = vec![];
for window_group in window_groups {
let window = window_group.window_group;
let frame = window_group.frame;
let group = &quadrupole_settings[window as usize - 1];
for sub_window in 0..group.isolation_mz.len() {
let sub_quad_settings = QuadrupoleSettings {
index: frame,
scan_starts: vec![group.scan_starts[sub_window]],
scan_ends: vec![group.scan_ends[sub_window]],
isolation_mz: vec![group.isolation_mz[sub_window]],
isolation_width: vec![group.isolation_width[sub_window]],
collision_energy: vec![group.collision_energy[sub_window]],
};
expanded_quadrupole_settings.push(sub_quad_settings)
}
}
let expanded_quadrupole_settings = match splitting_strat {
FrameWindowSplittingStrategy::None => quadrupole_settings,
FrameWindowSplittingStrategy::Quadrupole(x) => {
expand_quadrupole_settings(
&window_groups,
&quadrupole_settings,
&x,
)
},
FrameWindowSplittingStrategy::Window(x) => {
expand_window_settings(&window_groups, &quadrupole_settings, &x)
},
};

let reader = Self {
expanded_quadrupole_settings,
rt_converter,
Expand Down
36 changes: 34 additions & 2 deletions src/io/readers/spectrum_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,56 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator};
use std::path::{Path, PathBuf};
use tdf::TDFSpectrumReader;

use crate::io::readers::tdf_utils::QuadWindowExpansionStrategy;
use crate::ms_data::Spectrum;

pub struct SpectrumReader {
spectrum_reader: Box<dyn SpectrumReaderTrait>,
}

#[derive(Debug)]
pub struct SpectrumProcessingParams {
smoothing_window: u32,
centroiding_window: u32,
calibration_tolerance: f64,
}

impl Default for SpectrumProcessingParams {
fn default() -> Self {
Self {
smoothing_window: 1,
centroiding_window: 1,
calibration_tolerance: 0.1,
}
}
}

#[derive(Debug, Clone, Copy, Default)]
pub enum FrameWindowSplittingStrategy {
#[default]
None,
Quadrupole(QuadWindowExpansionStrategy),
Window(QuadWindowExpansionStrategy),
}

#[derive(Debug, Default)]
pub struct SpectrumReaderConfig {
pub spectrum_processing_params: SpectrumProcessingParams,
pub frame_splitting_params: FrameWindowSplittingStrategy,
}

impl fmt::Debug for SpectrumReader {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "SpectrumReader {{ /* fields omitted */ }}")
}
}

impl SpectrumReader {
pub fn new(path: impl AsRef<Path>) -> Self {
pub fn new(path: impl AsRef<Path>, config: SpectrumReaderConfig) -> Self {
let spectrum_reader: Box<dyn SpectrumReaderTrait> =
match path.as_ref().extension().and_then(|e| e.to_str()) {
Some("ms2") => Box::new(MiniTDFSpectrumReader::new(path)),
Some("d") => Box::new(TDFSpectrumReader::new(path)),
Some("d") => Box::new(TDFSpectrumReader::new(path, config)),
_ => panic!(),
};
Self { spectrum_reader }
Expand Down
2 changes: 1 addition & 1 deletion src/io/readers/spectrum_reader/minitdf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ impl MiniTDFSpectrumReader {
let parquet_file_name =
find_extension(&path, "ms2spectrum.parquet").unwrap();
let precursor_reader =
PrecursorReader::new(&parquet_file_name).unwrap();
PrecursorReader::new(&parquet_file_name, None).unwrap();
let offsets = ParquetPrecursor::from_parquet_file(&parquet_file_name)
.unwrap()
.iter()
Expand Down
40 changes: 28 additions & 12 deletions src/io/readers/spectrum_reader/tdf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,28 +16,34 @@ use crate::{
utils::find_extension,
};

use super::SpectrumReaderTrait;

const SMOOTHING_WINDOW: u32 = 1;
const CENTROIDING_WINDOW: u32 = 1;
const CALIBRATION_TOLERANCE: f64 = 0.1;
use super::{SpectrumReaderConfig, SpectrumReaderTrait};

#[derive(Debug)]
pub struct TDFSpectrumReader {
path: PathBuf,
precursor_reader: PrecursorReader,
mz_reader: Tof2MzConverter,
raw_spectrum_reader: RawSpectrumReader,
config: SpectrumReaderConfig,
}

impl TDFSpectrumReader {
pub fn new(path_name: impl AsRef<Path>) -> Self {
let frame_reader: FrameReader = FrameReader::new(&path_name).unwrap();
pub fn new(
path_name: impl AsRef<Path>,
config: SpectrumReaderConfig,
) -> Self {
let frame_reader: FrameReader =
FrameReader::new(&path_name, config.frame_splitting_params)
.unwrap();
let sql_path = find_extension(&path_name, "analysis.tdf").unwrap();
let metadata = MetadataReader::new(&sql_path).unwrap();
let mz_reader: Tof2MzConverter = metadata.mz_converter;
let tdf_sql_reader = SqlReader::open(&sql_path).unwrap();
let precursor_reader = PrecursorReader::new(&sql_path).unwrap();
let precursor_reader = PrecursorReader::new(
&sql_path,
Some(config.frame_splitting_params),
)
.unwrap();
let acquisition_type = frame_reader.get_acquisition();
let raw_spectrum_reader = RawSpectrumReader::new(
&tdf_sql_reader,
Expand All @@ -49,14 +55,15 @@ impl TDFSpectrumReader {
precursor_reader,
mz_reader,
raw_spectrum_reader,
config,
}
}

pub fn read_single_raw_spectrum(&self, index: usize) -> RawSpectrum {
let raw_spectrum = self.raw_spectrum_reader.get(index);
raw_spectrum
.smooth(SMOOTHING_WINDOW)
.centroid(CENTROIDING_WINDOW)
.smooth(self.config.spectrum_processing_params.smoothing_window)
.centroid(self.config.spectrum_processing_params.centroiding_window)
}
}

Expand All @@ -71,7 +78,11 @@ impl SpectrumReaderTrait for TDFSpectrumReader {
}

fn len(&self) -> usize {
self.precursor_reader.len()
debug_assert_eq!(
self.precursor_reader.len(),
self.raw_spectrum_reader.len()
);
self.raw_spectrum_reader.len()
}

fn get_path(&self) -> PathBuf {
Expand All @@ -88,7 +99,12 @@ impl SpectrumReaderTrait for TDFSpectrumReader {
let mut result: Vec<(f64, u32)> = vec![];
for &tof_index in spectrum.tof_indices.iter() {
let mz = self.mz_reader.convert(tof_index);
if (mz - precursor_mz).abs() < CALIBRATION_TOLERANCE {
if (mz - precursor_mz).abs()
< self
.config
.spectrum_processing_params
.calibration_tolerance
{
let hit = (precursor_mz, tof_index);
result.push(hit);
}
Expand Down
4 changes: 4 additions & 0 deletions src/io/readers/spectrum_reader/tdf/dda.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,4 +96,8 @@ impl RawSpectrumReaderTrait for DDARawSpectrumReader {
};
raw_spectrum
}

fn len(&self) -> usize {
self.offsets.len() - 1
}
}
Loading
Loading