diff --git a/Cargo.lock b/Cargo.lock index 612b5a4..8d6ee9f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "adler2" @@ -907,9 +907,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.88" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c3a7fc5db1e57d5a779a352c8cdb57b29aa4c40cc69c3a68a7fedc815fbf2f9" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" dependencies = [ "unicode-ident", ] @@ -1039,7 +1039,7 @@ checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.100", ] [[package]] @@ -1091,9 +1091,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.79" +version = "2.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89132cd0bf050864e1d38dc3bbc07a0eb8e7530af26344d3d2bbbef83499f590" +checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" dependencies = [ "proc-macro2", "quote", @@ -1102,22 +1102,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.64" +version = "2.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84" +checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.64" +version = "2.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3" +checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.100", ] [[package]] @@ -1240,7 +1240,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.100", "wasm-bindgen-shared", ] @@ -1262,7 +1262,7 @@ checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.100", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -1400,7 +1400,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.100", ] [[package]] diff --git a/src/errors.rs b/src/errors.rs index dfa4b0b..f6d5ba2 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -2,7 +2,10 @@ use crate::io::readers::{ FrameReaderError, MetadataReaderError, QuadrupoleSettingsReaderError, }; -use crate::{io::readers::PrecursorReaderError, readers::SpectrumReaderError}; +use crate::{ + io::readers::PrecursorReaderError, + readers::{SpectrumReaderError, TimsTofDataError}, +}; /// An error that is produced by timsrust (uses [thiserror]). #[derive(thiserror::Error, Debug)] @@ -20,4 +23,6 @@ pub enum TimsRustError { #[cfg(feature = "tdf")] #[error("{0}")] QuadrupoleSettingsReaderError(#[from] QuadrupoleSettingsReaderError), + #[error("{0}")] + TimsTofDataError(#[from] TimsTofDataError), } diff --git a/src/io/readers.rs b/src/io/readers.rs index b33f6a3..b63717f 100644 --- a/src/io/readers.rs +++ b/src/io/readers.rs @@ -7,7 +7,9 @@ mod precursor_reader; #[cfg(feature = "tdf")] mod quad_settings_reader; mod spectrum_reader; +#[cfg(feature = "tdf")] mod timstof; +mod timstof_path; #[cfg(feature = "tdf")] pub use frame_reader::*; @@ -18,3 +20,4 @@ pub use precursor_reader::*; pub use quad_settings_reader::*; pub use spectrum_reader::*; pub use timstof::*; +pub use timstof_path::*; diff --git a/src/io/readers/file_readers/sql_reader.rs b/src/io/readers/file_readers/sql_reader.rs index af24d0a..5c09931 100644 --- a/src/io/readers/file_readers/sql_reader.rs +++ b/src/io/readers/file_readers/sql_reader.rs @@ -9,7 +9,7 @@ use std::collections::HashMap; use rusqlite::{types::FromSql, Connection}; -use crate::readers::{TimsTofPathError, TimsTofPathLike}; +use crate::readers::{TimsTofPath, TimsTofPathError, TimsTofPathLike}; #[derive(Debug)] pub struct SqlReader { @@ -19,6 +19,12 @@ pub struct SqlReader { impl SqlReader { pub fn open(path: impl TimsTofPathLike) -> Result { let path = path.to_timstof_path()?; + Self::new_from_path(&path) + } + + pub(crate) fn new_from_path( + path: &TimsTofPath, + ) -> Result { let connection = Connection::open(&path.tdf()?)?; Ok(Self { connection }) } diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index 8417f3c..30d481f 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -10,12 +10,12 @@ use super::{ file_readers::{ sql_reader::{ frame_groups::SqlWindowGroup, frames::SqlFrame, ReadableSqlTable, - SqlReader, SqlReaderError, + SqlReaderError, }, tdf_blob_reader::{TdfBlob, TdfBlobReader, TdfBlobReaderError}, }, - MetadataReader, MetadataReaderError, QuadrupoleSettingsReader, - QuadrupoleSettingsReaderError, TimsTofPathLike, + QuadrupoleSettingsReaderError, TimsTofData, TimsTofDataError, + TimsTofPathLike, }; #[derive(Debug)] @@ -34,21 +34,28 @@ pub struct FrameReader { impl FrameReader { pub fn new(path: impl TimsTofPathLike) -> Result { - let compression_type = - match MetadataReader::new(&path)?.compression_type { - 2 => 2, - #[cfg(feature = "timscompress")] - 3 => 3, - compression_type => { - return Err(FrameReaderError::CompressionTypeError( - compression_type, - )) - }, - }; + let mut timstofdata = TimsTofData::new(&path)?; + Self::new_from_timstofdata(&mut timstofdata) + } - let tdf_sql_reader = SqlReader::open(&path)?; - let sql_frames = SqlFrame::from_sql_reader(&tdf_sql_reader)?; - let tdf_bin_reader = TdfBlobReader::new(&path)?; + pub fn new_from_timstofdata( + timstofdata: &mut TimsTofData, + ) -> Result { + let compression_type = match timstofdata.get_metadata().compression_type + { + 2 => 2, + #[cfg(feature = "timscompress")] + 3 => 3, + compression_type => { + return Err(FrameReaderError::CompressionTypeError( + compression_type, + )) + }, + }; + let sql_frames = + SqlFrame::from_sql_reader(&timstofdata.get_sql_reader())?; + let tdf_bin_reader = + TdfBlobReader::new(&timstofdata.get_timstof_path())?; #[cfg(feature = "timscompress")] let compressed_reader = CompressedTdfBlobReader::new( &path.as_ref().to_path_buf().join("analysis.tdf_bin"), @@ -63,22 +70,20 @@ impl FrameReader { }; // TODO should be refactored out to quadrupole reader let mut window_groups = vec![0; sql_frames.len()]; - let quadrupole_settings; + let mut quadrupole_settings = &vec![]; if acquisition == AcquisitionType::DIAPASEF { for window_group in - SqlWindowGroup::from_sql_reader(&tdf_sql_reader)? + SqlWindowGroup::from_sql_reader(&timstofdata.get_sql_reader())? { window_groups[window_group.frame - 1] = window_group.window_group; } - quadrupole_settings = QuadrupoleSettingsReader::new(&path)?; - } else { - quadrupole_settings = vec![]; - } + quadrupole_settings = timstofdata.get_quad_settings()?; + }; // TODO move Arc to quad settings reader? let quadrupole_settings = quadrupole_settings - .into_iter() - .map(|x| Arc::new(x)) + .iter() + .map(|x| Arc::new(x.clone())) .collect(); let frames = (0..sql_frames.len()) .into_par_iter() @@ -321,12 +326,6 @@ pub enum FrameReaderError { TimscompressError, #[error("{0}")] TdfBlobReaderError(#[from] TdfBlobReaderError), - #[error("{0}")] - MetadataReaderError(#[from] MetadataReaderError), - #[error("{0}")] - FileNotFound(String), - #[error("{0}")] - SqlReaderError(#[from] SqlReaderError), #[error("Corrupt Frame")] CorruptFrame, #[error("{0}")] @@ -335,4 +334,8 @@ pub enum FrameReaderError { IndexOutOfBounds, #[error("Compression type {0} not understood")] CompressionTypeError(u8), + #[error("{0}")] + TimsTofDataError(#[from] TimsTofDataError), + #[error("{0}")] + SqlReaderError(#[from] SqlReaderError), } diff --git a/src/io/readers/metadata_reader.rs b/src/io/readers/metadata_reader.rs index a5ba1db..5d31916 100644 --- a/src/io/readers/metadata_reader.rs +++ b/src/io/readers/metadata_reader.rs @@ -21,6 +21,12 @@ impl MetadataReader { path: impl TimsTofPathLike, ) -> Result { let tdf_sql_reader = SqlReader::open(path)?; + Self::new_from_sql_reader(&tdf_sql_reader) + } + + pub(crate) fn new_from_sql_reader( + tdf_sql_reader: &SqlReader, + ) -> Result { let sql_metadata: HashMap = SqlMetadata::from_sql_reader(&tdf_sql_reader)?; let compression_type = diff --git a/src/io/readers/timstof.rs b/src/io/readers/timstof.rs index 4b8e65a..e03ff6c 100644 --- a/src/io/readers/timstof.rs +++ b/src/io/readers/timstof.rs @@ -1,140 +1,110 @@ -use std::{ - fs, io, - path::{Path, PathBuf}, +use crate::{Metadata, QuadrupoleSettings}; + +use super::{ + file_readers::sql_reader::{SqlReader, SqlReaderError}, + FrameReader, FrameReaderError, MetadataReader, MetadataReaderError, + PrecursorReader, PrecursorReaderError, QuadrupoleSettingsReader, + QuadrupoleSettingsReaderError, SpectrumReader, SpectrumReaderError, + TimsTofPath, TimsTofPathError, TimsTofPathLike, }; -#[derive(Debug, Clone, PartialEq, Eq, Hash, Copy)] -pub enum TimsTofFileType { - #[cfg(feature = "minitdf")] - MiniTDF, - #[cfg(feature = "tdf")] - TDF, +pub struct TimsTofData { + timstof_path: TimsTofPath, + metadata: Metadata, + sql_reader: SqlReader, + frame_reader: Option, + spectrum_reader: Option, + precursor_reader: Option, + quad_settings: Option>, } -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct TimsTofPath { - path: PathBuf, - file_type: TimsTofFileType, -} - -impl TimsTofPath { - pub fn new(path: impl AsRef) -> Result { - let path = path.as_ref().canonicalize()?; - #[cfg(feature = "tdf")] - if tdf(&path).is_ok() & tdf_bin(&path).is_ok() { - return Ok(Self { - path, - file_type: TimsTofFileType::TDF, - }); - } +impl TimsTofData { + pub fn new(path: impl TimsTofPathLike) -> Result { + let timstof_path = TimsTofPath::new(&path)?; #[cfg(feature = "minitdf")] - if ms2_bin(&path).is_ok() & ms2_parquet(&path).is_ok() { - return Ok(Self { - path, - file_type: TimsTofFileType::MiniTDF, - }); - } - match path.parent() { - Some(parent) => match Self::new(parent) { - Ok(result) => Ok(result), - Err(_) => Err(TimsTofPathError::UnknownType(path)), - }, - None => return Err(TimsTofPathError::UnknownType(path)), + { + use super::TimsTofFileType; + if timstof_path.file_type() == TimsTofFileType::MiniTDF { + return Err(TimsTofPathError::UnknownType( + path.as_ref().to_path_buf(), + ))?; + } } + let sql_reader = SqlReader::new_from_path(&timstof_path)?; + let metadata = MetadataReader::new_from_sql_reader(&sql_reader)?; + Ok(Self { + timstof_path, + metadata, + sql_reader, + frame_reader: None, + spectrum_reader: None, + precursor_reader: None, + quad_settings: None, + }) } - pub fn tdf(&self) -> Result { - tdf(self) - } - - pub fn tdf_bin(&self) -> Result { - tdf_bin(self) + pub fn get_timstof_path(&self) -> &TimsTofPath { + &self.timstof_path } - pub fn ms2_bin(&self) -> Result { - ms2_bin(self) + pub(crate) fn get_sql_reader(&self) -> &SqlReader { + &self.sql_reader } - pub fn ms2_parquet(&self) -> Result { - ms2_parquet(self) + pub fn get_metadata(&self) -> &Metadata { + &self.metadata } - pub fn file_type(&self) -> TimsTofFileType { - self.file_type + pub fn get_quad_settings( + &mut self, + ) -> Result<&Vec, QuadrupoleSettingsReaderError> { + if self.quad_settings.is_none() { + let quad_settings = QuadrupoleSettingsReader::from_sql_settings( + &self.get_sql_reader(), + )?; + self.quad_settings = Some(quad_settings); + } + Ok(self.quad_settings.as_ref().expect("Always initialized")) } -} - -fn tdf(path: impl AsRef) -> Result { - find_extension(path, "analysis.tdf") -} - -fn tdf_bin(path: impl AsRef) -> Result { - find_extension(path, "analysis.tdf_bin") -} -fn ms2_bin(path: impl AsRef) -> Result { - // match find_extension(path, "ms2.bin") { - // Ok(result) => Ok(result), - // Err(_) => find_extension(path, "ms2spectrum.bin"), - // } - // find_extension(path, "ms2.bin") - find_extension(path, "ms2spectrum.bin") -} - -fn ms2_parquet(path: impl AsRef) -> Result { - // match find_extension(path, "ms2.parquet") { - // Ok(result) => Ok(result), - // Err(_) => find_extension(path, "ms2spectrum.parquet"), - // } - // find_extension(path, "ms2.parquet") - find_extension(path, "ms2spectrum.parquet") -} - -fn find_extension( - path: impl AsRef, - extension: &str, -) -> Result { - let extension_lower = extension.to_lowercase(); - for entry in fs::read_dir(&path)? { - if let Ok(entry) = entry { - let file_path = entry.path(); - if let Some(file_name) = - file_path.file_name().and_then(|name| name.to_str()) - { - if file_name.to_lowercase().ends_with(&extension_lower) { - return Ok(file_path); - } - } + pub fn get_frame_reader( + &mut self, + ) -> Result<&FrameReader, FrameReaderError> { + if self.frame_reader.is_none() { + self.frame_reader = Some(FrameReader::new_from_timstofdata(self)?); } + Ok(self.frame_reader.as_ref().expect("Always initialized")) } - Err(TimsTofPathError::Extension( - extension.to_string(), - path.as_ref().to_path_buf(), - )) -} -impl AsRef for TimsTofPath { - fn as_ref(&self) -> &Path { - &self.path + // TODO, reuse TimsTofData and allow bulder pattern + pub fn get_precursor_reader( + &mut self, + ) -> Result<&PrecursorReader, PrecursorReaderError> { + if self.precursor_reader.is_none() { + self.precursor_reader = + Some(PrecursorReader::new(&self.timstof_path)?); + } + Ok(self.precursor_reader.as_ref().expect("Always initialized")) } -} - -pub trait TimsTofPathLike: AsRef { - fn to_timstof_path(&self) -> Result; -} -impl> TimsTofPathLike for T { - fn to_timstof_path(&self) -> Result { - TimsTofPath::new(&self) + // TODO, reuse TimsTofData and allow bulder pattern + pub fn get_spectrum_reader( + &mut self, + ) -> Result<&SpectrumReader, SpectrumReaderError> { + if self.spectrum_reader.is_none() { + self.spectrum_reader = + Some(SpectrumReader::new(&self.timstof_path)?); + } + Ok(self.spectrum_reader.as_ref().expect("Always initialized")) } } -#[derive(Debug, thiserror::Error)] -pub enum TimsTofPathError { - #[error("Extension {0} not found for {1}")] - Extension(String, PathBuf), +#[derive(thiserror::Error, Debug)] +pub enum TimsTofDataError { + #[error("{0}")] + MetadataReaderError(#[from] MetadataReaderError), + #[error("{0}")] + TimsTofPathError(#[from] TimsTofPathError), #[error("{0}")] - IO(#[from] io::Error), - #[error("No valid type found for {0}")] - UnknownType(PathBuf), + SqlReaderError(#[from] SqlReaderError), } diff --git a/src/io/readers/timstof_path.rs b/src/io/readers/timstof_path.rs new file mode 100644 index 0000000..4b8e65a --- /dev/null +++ b/src/io/readers/timstof_path.rs @@ -0,0 +1,140 @@ +use std::{ + fs, io, + path::{Path, PathBuf}, +}; + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Copy)] +pub enum TimsTofFileType { + #[cfg(feature = "minitdf")] + MiniTDF, + #[cfg(feature = "tdf")] + TDF, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct TimsTofPath { + path: PathBuf, + file_type: TimsTofFileType, +} + +impl TimsTofPath { + pub fn new(path: impl AsRef) -> Result { + let path = path.as_ref().canonicalize()?; + #[cfg(feature = "tdf")] + if tdf(&path).is_ok() & tdf_bin(&path).is_ok() { + return Ok(Self { + path, + file_type: TimsTofFileType::TDF, + }); + } + #[cfg(feature = "minitdf")] + if ms2_bin(&path).is_ok() & ms2_parquet(&path).is_ok() { + return Ok(Self { + path, + file_type: TimsTofFileType::MiniTDF, + }); + } + match path.parent() { + Some(parent) => match Self::new(parent) { + Ok(result) => Ok(result), + Err(_) => Err(TimsTofPathError::UnknownType(path)), + }, + None => return Err(TimsTofPathError::UnknownType(path)), + } + } + + pub fn tdf(&self) -> Result { + tdf(self) + } + + pub fn tdf_bin(&self) -> Result { + tdf_bin(self) + } + + pub fn ms2_bin(&self) -> Result { + ms2_bin(self) + } + + pub fn ms2_parquet(&self) -> Result { + ms2_parquet(self) + } + + pub fn file_type(&self) -> TimsTofFileType { + self.file_type + } +} + +fn tdf(path: impl AsRef) -> Result { + find_extension(path, "analysis.tdf") +} + +fn tdf_bin(path: impl AsRef) -> Result { + find_extension(path, "analysis.tdf_bin") +} + +fn ms2_bin(path: impl AsRef) -> Result { + // match find_extension(path, "ms2.bin") { + // Ok(result) => Ok(result), + // Err(_) => find_extension(path, "ms2spectrum.bin"), + // } + // find_extension(path, "ms2.bin") + find_extension(path, "ms2spectrum.bin") +} + +fn ms2_parquet(path: impl AsRef) -> Result { + // match find_extension(path, "ms2.parquet") { + // Ok(result) => Ok(result), + // Err(_) => find_extension(path, "ms2spectrum.parquet"), + // } + // find_extension(path, "ms2.parquet") + find_extension(path, "ms2spectrum.parquet") +} + +fn find_extension( + path: impl AsRef, + extension: &str, +) -> Result { + let extension_lower = extension.to_lowercase(); + for entry in fs::read_dir(&path)? { + if let Ok(entry) = entry { + let file_path = entry.path(); + if let Some(file_name) = + file_path.file_name().and_then(|name| name.to_str()) + { + if file_name.to_lowercase().ends_with(&extension_lower) { + return Ok(file_path); + } + } + } + } + Err(TimsTofPathError::Extension( + extension.to_string(), + path.as_ref().to_path_buf(), + )) +} + +impl AsRef for TimsTofPath { + fn as_ref(&self) -> &Path { + &self.path + } +} + +pub trait TimsTofPathLike: AsRef { + fn to_timstof_path(&self) -> Result; +} + +impl> TimsTofPathLike for T { + fn to_timstof_path(&self) -> Result { + TimsTofPath::new(&self) + } +} + +#[derive(Debug, thiserror::Error)] +pub enum TimsTofPathError { + #[error("Extension {0} not found for {1}")] + Extension(String, PathBuf), + #[error("{0}")] + IO(#[from] io::Error), + #[error("No valid type found for {0}")] + UnknownType(PathBuf), +}