Skip to content

Develop sw #22

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions benches/speed_performance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ fn criterion_benchmark_dda(c: &mut Criterion) {
group.significance_level(0.001).sample_size(10);
let d_folder_name: &str = DDA_TEST;
let frame_reader = FrameReader::new(d_folder_name).unwrap();
let spectrum_reader = SpectrumReader::new(d_folder_name);
let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap();
group.bench_function("DDA read_all_frames 6m", |b| {
b.iter(|| read_all_frames(black_box(&frame_reader)))
});
Expand All @@ -56,7 +56,7 @@ fn criterion_benchmark_dia(c: &mut Criterion) {
group.significance_level(0.001).sample_size(10);
let d_folder_name: &str = DIA_TEST;
let frame_reader = FrameReader::new(d_folder_name).unwrap();
let spectrum_reader = SpectrumReader::new(d_folder_name);
let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap();
group.bench_function("DIA read_all_frames 6m", |b| {
b.iter(|| read_all_frames(black_box(&frame_reader)))
});
Expand All @@ -75,7 +75,7 @@ fn criterion_benchmark_syp(c: &mut Criterion) {
group.significance_level(0.001).sample_size(10);
let d_folder_name: &str = SYP_TEST;
let frame_reader = FrameReader::new(d_folder_name).unwrap();
let spectrum_reader = SpectrumReader::new(d_folder_name);
let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap();
group.bench_function("SYP read_all_frames 6m", |b| {
b.iter(|| read_all_frames(black_box(&frame_reader)))
});
Expand Down
2 changes: 1 addition & 1 deletion src/io/readers/file_readers/tdf_blob_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ impl IndexedTdfBlobReader {
pub fn new(
file_name: impl AsRef<Path>,
binary_offsets: Vec<usize>,
) -> Result<Self, TdfBlobReaderError> {
) -> Result<Self, IndexedTdfBlobReaderError> {
let blob_reader = TdfBlobReader::new(file_name)?;
let reader = Self {
binary_offsets,
Expand Down
10 changes: 6 additions & 4 deletions src/io/readers/precursor_reader/tdf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,11 @@ impl TDFPrecursorReader {
Box::new(DIATDFPrecursorReader::new(path)?)
},
acquisition_type => {
return Err(TDFPrecursorReaderError::UnknownPrecursorType(
format!("{:?}", acquisition_type),
))
return Err(
TDFPrecursorReaderError::UnsupportedAcquisition(
format!("{:?}", acquisition_type),
),
)
},
};
let reader = Self { precursor_reader };
Expand All @@ -70,5 +72,5 @@ pub enum TDFPrecursorReaderError {
#[error("{0}")]
DIATDFPrecursorReaderError(#[from] DIATDFPrecursorReaderError),
#[error("Invalid acquistion type for precursor reader: {0}")]
UnknownPrecursorType(String),
UnsupportedAcquisition(String),
}
21 changes: 15 additions & 6 deletions src/io/readers/spectrum_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ mod minitdf;
mod tdf;

use core::fmt;
use minitdf::MiniTDFSpectrumReader;
use minitdf::{MiniTDFSpectrumReader, MiniTDFSpectrumReaderError};
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use std::path::{Path, PathBuf};
use tdf::TDFSpectrumReader;
use tdf::{TDFSpectrumReader, TDFSpectrumReaderError};

use crate::ms_data::Spectrum;

Expand All @@ -20,14 +20,15 @@ impl fmt::Debug for SpectrumReader {
}

impl SpectrumReader {
pub fn new(path: impl AsRef<Path>) -> Self {
pub fn new(path: impl AsRef<Path>) -> Result<Self, SpectrumReaderError> {
let spectrum_reader: Box<dyn SpectrumReaderTrait> =
match path.as_ref().extension().and_then(|e| e.to_str()) {
Some("ms2") => Box::new(MiniTDFSpectrumReader::new(path)),
Some("d") => Box::new(TDFSpectrumReader::new(path)),
Some("ms2") => Box::new(MiniTDFSpectrumReader::new(path)?),
Some("d") => Box::new(TDFSpectrumReader::new(path)?),
_ => panic!(),
};
Self { spectrum_reader }
let reader = Self { spectrum_reader };
Ok(reader)
}

pub fn get(&self, index: usize) -> Spectrum {
Expand Down Expand Up @@ -62,3 +63,11 @@ trait SpectrumReaderTrait: Sync {
fn len(&self) -> usize;
fn calibrate(&mut self);
}

#[derive(Debug, thiserror::Error)]
pub enum SpectrumReaderError {
#[error("{0}")]
MiniTDFSpectrumReaderError(#[from] MiniTDFSpectrumReaderError),
#[error("{0}")]
TDFSpectrumReaderError(#[from] TDFSpectrumReaderError),
}
57 changes: 40 additions & 17 deletions src/io/readers/spectrum_reader/minitdf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,15 @@ use crate::{
io::readers::{
file_readers::{
parquet_reader::{
precursors::ParquetPrecursor, ReadableParquetTable,
precursors::ParquetPrecursor, ParquetError,
ReadableParquetTable,
},
sql_reader::SqlError,
tdf_blob_reader::{
IndexedTdfBlobReader, IndexedTdfBlobReaderError,
},
tdf_blob_reader::IndexedTdfBlobReader,
},
PrecursorReader,
PrecursorReader, PrecursorReaderError,
},
ms_data::Spectrum,
utils::find_extension,
Expand All @@ -25,31 +29,36 @@ pub struct MiniTDFSpectrumReader {
}

impl MiniTDFSpectrumReader {
pub fn new(path: impl AsRef<Path>) -> Self {
let parquet_file_name =
find_extension(&path, "ms2spectrum.parquet").unwrap();
let precursor_reader =
PrecursorReader::new(&parquet_file_name).unwrap();
let offsets = ParquetPrecursor::from_parquet_file(&parquet_file_name)
.unwrap()
pub fn new(
path: impl AsRef<Path>,
) -> Result<Self, MiniTDFSpectrumReaderError> {
let parquet_file_name = find_extension(&path, "ms2spectrum.parquet")
.ok_or(MiniTDFSpectrumReaderError::FileNotFound(
"analysis.tdf".to_string(),
))?;
let precursor_reader = PrecursorReader::new(&parquet_file_name)?;
let offsets = ParquetPrecursor::from_parquet_file(&parquet_file_name)?
.iter()
.map(|x| x.offset as usize)
.collect();
let collision_energies =
ParquetPrecursor::from_parquet_file(&parquet_file_name)
.unwrap()
ParquetPrecursor::from_parquet_file(&parquet_file_name)?
.iter()
.map(|x| x.collision_energy)
.collect();
let bin_file_name = find_extension(&path, "bin").unwrap();
let blob_reader =
IndexedTdfBlobReader::new(&bin_file_name, offsets).unwrap();
Self {
let bin_file_name = find_extension(&path, "bin").ok_or(
MiniTDFSpectrumReaderError::FileNotFound(
"analysis.tdf".to_string(),
),
)?;
let blob_reader = IndexedTdfBlobReader::new(&bin_file_name, offsets)?;
let reader = Self {
path: path.as_ref().to_path_buf(),
precursor_reader,
blob_reader,
collision_energies,
}
};
Ok(reader)
}
}

Expand Down Expand Up @@ -100,3 +109,17 @@ impl SpectrumReaderTrait for MiniTDFSpectrumReader {

fn calibrate(&mut self) {}
}

#[derive(Debug, thiserror::Error)]
pub enum MiniTDFSpectrumReaderError {
#[error("{0}")]
SqlError(#[from] SqlError),
#[error("{0}")]
PrecursorReaderError(#[from] PrecursorReaderError),
#[error("{0}")]
ParquetError(#[from] ParquetError),
#[error("{0}")]
IndexedTdfBlobReaderError(#[from] IndexedTdfBlobReaderError),
#[error("{0}")]
FileNotFound(String),
}
46 changes: 34 additions & 12 deletions src/io/readers/spectrum_reader/tdf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@ mod dda;
mod dia;
mod raw_spectra;

use raw_spectra::{RawSpectrum, RawSpectrumReader};
use raw_spectra::{RawSpectrum, RawSpectrumReader, RawSpectrumReaderError};
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use std::path::{Path, PathBuf};

use crate::{
domain_converters::{ConvertableDomain, Tof2MzConverter},
io::readers::{
file_readers::sql_reader::SqlReader, FrameReader, MetadataReader,
PrecursorReader,
file_readers::sql_reader::{SqlError, SqlReader},
FrameReader, FrameReaderError, MetadataReader, MetadataReaderError,
PrecursorReader, PrecursorReaderError,
},
ms_data::Spectrum,
utils::find_extension,
Expand All @@ -31,25 +32,30 @@ pub struct TDFSpectrumReader {
}

impl TDFSpectrumReader {
pub fn new(path_name: impl AsRef<Path>) -> Self {
let frame_reader: FrameReader = FrameReader::new(&path_name).unwrap();
let sql_path = find_extension(&path_name, "analysis.tdf").unwrap();
let metadata = MetadataReader::new(&sql_path).unwrap();
pub fn new(
path_name: impl AsRef<Path>,
) -> Result<Self, TDFSpectrumReaderError> {
let frame_reader: FrameReader = FrameReader::new(&path_name)?;
let sql_path = find_extension(&path_name, "analysis.tdf").ok_or(
TDFSpectrumReaderError::FileNotFound("analysis.tdf".to_string()),
)?;
let metadata = MetadataReader::new(&sql_path)?;
let mz_reader: Tof2MzConverter = metadata.mz_converter;
let tdf_sql_reader = SqlReader::open(&sql_path).unwrap();
let precursor_reader = PrecursorReader::new(&sql_path).unwrap();
let tdf_sql_reader = SqlReader::open(&sql_path)?;
let precursor_reader = PrecursorReader::new(&sql_path)?;
let acquisition_type = frame_reader.get_acquisition();
let raw_spectrum_reader = RawSpectrumReader::new(
&tdf_sql_reader,
frame_reader,
acquisition_type,
);
Self {
)?;
let reader = Self {
path: path_name.as_ref().to_path_buf(),
precursor_reader,
mz_reader,
raw_spectrum_reader,
}
};
Ok(reader)
}

pub fn read_single_raw_spectrum(&self, index: usize) -> RawSpectrum {
Expand Down Expand Up @@ -104,3 +110,19 @@ impl SpectrumReaderTrait for TDFSpectrumReader {
}
}
}

#[derive(Debug, thiserror::Error)]
pub enum TDFSpectrumReaderError {
#[error("{0}")]
SqlError(#[from] SqlError),
#[error("{0}")]
PrecursorReaderError(#[from] PrecursorReaderError),
#[error("{0}")]
MetadaReaderError(#[from] MetadataReaderError),
#[error("{0}")]
FrameReaderError(#[from] FrameReaderError),
#[error("{0}")]
RawSpectrumReaderError(#[from] RawSpectrumReaderError),
#[error("{0}")]
FileNotFound(String),
}
24 changes: 17 additions & 7 deletions src/io/readers/spectrum_reader/tdf/dda.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use crate::{
io::readers::{
file_readers::sql_reader::{
pasef_frame_msms::SqlPasefFrameMsMs, ReadableSqlTable, SqlReader,
pasef_frame_msms::SqlPasefFrameMsMs, ReadableSqlTable, SqlError,
SqlReader,
},
FrameReader,
},
Expand All @@ -19,13 +20,15 @@ pub struct DDARawSpectrumReader {
}

impl DDARawSpectrumReader {
pub fn new(tdf_sql_reader: &SqlReader, frame_reader: FrameReader) -> Self {
let pasef_frames =
SqlPasefFrameMsMs::from_sql_reader(&tdf_sql_reader).unwrap();
pub fn new(
tdf_sql_reader: &SqlReader,
frame_reader: FrameReader,
) -> Result<Self, DDARawSpectrumReaderError> {
let pasef_frames = SqlPasefFrameMsMs::from_sql_reader(&tdf_sql_reader)?;
let pasef_precursors =
&pasef_frames.iter().map(|x| x.precursor).collect();
let order: Vec<usize> = argsort(&pasef_precursors);
let max_precursor = pasef_precursors.iter().max().unwrap();
let max_precursor = pasef_precursors.iter().max().unwrap(); // SqlReader cannot return empty vecs, so always succeeds
let mut offsets: Vec<usize> = Vec::with_capacity(max_precursor + 1);
offsets.push(0);
for (offset, &index) in order.iter().enumerate().take(order.len() - 1) {
Expand All @@ -35,12 +38,13 @@ impl DDARawSpectrumReader {
}
}
offsets.push(order.len());
Self {
let reader = Self {
order,
offsets,
pasef_frames,
frame_reader,
}
};
Ok(reader)
}

pub fn iterate_over_pasef_frames(
Expand Down Expand Up @@ -97,3 +101,9 @@ impl RawSpectrumReaderTrait for DDARawSpectrumReader {
raw_spectrum
}
}

#[derive(Debug, thiserror::Error)]
pub enum DDARawSpectrumReaderError {
#[error("{0}")]
SqlError(#[from] SqlError),
}
27 changes: 19 additions & 8 deletions src/io/readers/spectrum_reader/tdf/dia.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use crate::{
io::readers::{
file_readers::sql_reader::{
frame_groups::SqlWindowGroup, ReadableSqlTable, SqlReader,
frame_groups::SqlWindowGroup, ReadableSqlTable, SqlError, SqlReader,
},
FrameReader, QuadrupoleSettingsReader,
FrameReader, QuadrupoleSettingsReader, QuadrupoleSettingsReaderError,
},
ms_data::QuadrupoleSettings,
utils::vec_utils::group_and_sum,
Expand All @@ -18,11 +18,13 @@ pub struct DIARawSpectrumReader {
}

impl DIARawSpectrumReader {
pub fn new(tdf_sql_reader: &SqlReader, frame_reader: FrameReader) -> Self {
let window_groups =
SqlWindowGroup::from_sql_reader(&tdf_sql_reader).unwrap();
pub fn new(
tdf_sql_reader: &SqlReader,
frame_reader: FrameReader,
) -> Result<Self, DIARawSpectrumReaderError> {
let window_groups = SqlWindowGroup::from_sql_reader(&tdf_sql_reader)?;
let quadrupole_settings =
QuadrupoleSettingsReader::new(&tdf_sql_reader.get_path()).unwrap();
QuadrupoleSettingsReader::new(&tdf_sql_reader.get_path())?;
let mut expanded_quadrupole_settings: Vec<QuadrupoleSettings> = vec![];
for window_group in window_groups {
let window = window_group.window_group;
Expand All @@ -40,10 +42,11 @@ impl DIARawSpectrumReader {
expanded_quadrupole_settings.push(sub_quad_settings)
}
}
Self {
let reader = Self {
expanded_quadrupole_settings,
frame_reader,
}
};
Ok(reader)
}
}

Expand Down Expand Up @@ -76,3 +79,11 @@ impl RawSpectrumReaderTrait for DIARawSpectrumReader {
raw_spectrum
}
}

#[derive(Debug, thiserror::Error)]
pub enum DIARawSpectrumReaderError {
#[error("{0}")]
SqlError(#[from] SqlError),
#[error("{0}")]
QuadrupoleSettingsReaderError(#[from] QuadrupoleSettingsReaderError),
}
Loading
Loading