From 4fcf16dbbeaa92c48eaa924443db2e899a2bc55f Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 21 Aug 2025 17:40:51 -0500 Subject: [PATCH 1/6] Changed parallelization in IsoTracker --- mzLib/FlashLFQ/FlashLfqEngine.cs | 147 +++++++++--------- mzLib/FlashLFQ/IsoTracker/XIC.cs | 6 +- mzLib/FlashLFQ/IsoTracker/XICGroups.cs | 36 +++-- .../PeakIndexing/IndexingEngine.cs | 1 - mzLib/Test/FlashLFQ/TestIsoTracker.cs | 24 ++- 5 files changed, 107 insertions(+), 107 deletions(-) diff --git a/mzLib/FlashLFQ/FlashLfqEngine.cs b/mzLib/FlashLFQ/FlashLfqEngine.cs index 9a3794615..b50fb91a0 100644 --- a/mzLib/FlashLFQ/FlashLfqEngine.cs +++ b/mzLib/FlashLFQ/FlashLfqEngine.cs @@ -1826,92 +1826,85 @@ private void QuantifyIsobaricPeaks() double lastReportedProgress = 0; double currentProgress = 0; + foreach(var idGroup in PeptideGroupsForIsoTracker) + { + List xicGroup = new List(); + var mostCommonChargeIdGroup = idGroup.Identifications.GroupBy(p => p.PrecursorChargeState).MaxBy(p => p.Count()); + var id = mostCommonChargeIdGroup.First(); - Parallel.ForEach(Partitioner.Create(0, PeptideGroupsForIsoTracker.Count), - new ParallelOptions { MaxDegreeOfParallelism = FlashParams.MaxThreads }, - (range, loopState) => + // Try to get the primitive window for the XIC , from the (firstOne - 2) -> (lastOne + 2) + double rightWindow = mostCommonChargeIdGroup.Select(p => p.Ms2RetentionTimeInMinutes).Max() + 2; + double leftWindow = mostCommonChargeIdGroup.Select(p => p.Ms2RetentionTimeInMinutes).Min() - 2; + + //generate XIC from each file + foreach (var spectraFile in SpectraFileInfoList) { - for (int i = range.Item1; i < range.Item2; i++) + var xicIds = mostCommonChargeIdGroup.Where(p => p.FileInfo.Equals(spectraFile)).ToList(); + + //If in this run, there is no any ID, we will borrow the ID from other run to build the XIC. + if (xicIds.Count == 0) { - var idGroup = PeptideGroupsForIsoTracker[i]; - List xicGroup = new List(); - var mostCommonChargeIdGroup = idGroup.Identifications.GroupBy(p => p.PrecursorChargeState).OrderBy(p => p.Count()).Last(); - var id = mostCommonChargeIdGroup.First(); + xicIds = mostCommonChargeIdGroup.Where(p => !p.IsDecoy).ToList(); + } - // Try to get the primitive window for the XIC , from the (firstOne - 2) -> (lastOne + 2) - double rightWindow = mostCommonChargeIdGroup.Select(p => p.Ms2RetentionTimeInMinutes).Max() + 2; - double leftWindow = mostCommonChargeIdGroup.Select(p => p.Ms2RetentionTimeInMinutes).Min() - 2; + XIC xic = BuildXIC(xicIds, spectraFile, false, leftWindow, rightWindow); + if (xic != null && xic.Ms1Peaks.Count() > 5) // each XIC should have at least 5 points + { + xicGroup.Add(xic); + } + } - //generate XIC from each file - foreach (var spectraFile in SpectraFileInfoList) - { - var xicIds = mostCommonChargeIdGroup.Where(p => p.FileInfo.Equals(spectraFile)).ToList(); + // In order to eliminate the bad XIC case that only one id for each file. + // We need to check if the XICGroup has more than one ID in one file. + if ( (!FlashParams.RequireMultipleIdsInOneFiles || MoreThanOneID(xicGroup)) && xicGroup.Count > 1) + { + // Step 1: Find the XIC with most IDs then, set as reference XIC + xicGroup.OrderByDescending(p => p.Ids.Count()).First().Reference = true; - //If in this run, there is no any ID, we will borrow the ID from other run to build the XIC. - if (xicIds.Count == 0) - { - xicIds = mostCommonChargeIdGroup.Where(p => !p.IsDecoy).ToList(); - } + //Step 2: Build the XICGroups + XICGroups xicGroups = new XICGroups(xicGroup, maxThreads: FlashParams.MaxThreads); - XIC xic = BuildXIC(xicIds, spectraFile, false, leftWindow, rightWindow); - if (xic != null && xic.Ms1Peaks.Count() > 5) // each XIC should have at least 5 points - { - xicGroup.Add(xic); - } - } + //Step 3: Build the peakPairChrom dictionary + //The shared peak dictionary, each sharedPeak included serval ChromatographicPeak for each run [SharedPeak <-> ChromatographicPeaks] + Dictionary> sharedPeaksDict = new Dictionary>(); - // In order to eliminate the bad XIC case that only one id for each file. - // We need to check if the XICGroup has more than one ID in one file. - if ( (!FlashParams.RequireMultipleIdsInOneFiles || MoreThanOneID(xicGroup)) && xicGroup.Count > 1) + //Step 4: Iterate the shared peaks in the XICGroups + foreach (var peak in xicGroups.SharedPeaks) + { + double peakWindow = peak.Width; + if (peakWindow > 0.001) //make sure we have enough length of the window (0.001 min) for peak searching { - // Step 1: Find the XIC with most IDs then, set as reference XIC - xicGroup.OrderByDescending(p => p.Ids.Count()).First().Reference = true; - - //Step 2: Build the XICGroups - XICGroups xICGroups = new XICGroups(xicGroup); + List chromPeaksInSharedPeak = new List(); + CollectChromPeakInRuns(peak, chromPeaksInSharedPeak, xicGroups); - //Step 3: Build the peakPairChrom dictionary - //The shared peak dictionary, each sharedPeak included serval ChromatographicPeak for each run [SharedPeak <-> ChromatographicPeaks] - Dictionary> sharedPeaksDict = new Dictionary>(); + var allChromPeakInDict = sharedPeaksDict.SelectMany(pair => pair.Value).ToList(); - //Step 4: Iterate the shared peaks in the XICGroups - foreach (var peak in xICGroups.SharedPeaks) + // ensuring no duplicates are added and only non-null peaks are considered. + if (chromPeaksInSharedPeak.Where(p => p != null).Any() && + !chromPeaksInSharedPeak.Any(chromPeak => chromPeak != null && allChromPeakInDict.Any(peakInDict => peakInDict != null && peakInDict.Equals(chromPeak)))) { - double peakWindow = peak.Width; - if (peakWindow > 0.001) //make sure we have enough length of the window (0.001 min) for peak searching - { - List chromPeaksInSharedPeak = new List(); - CollectChromPeakInRuns(peak, chromPeaksInSharedPeak, xICGroups); - - var allChromPeakInDict = sharedPeaksDict.SelectMany(pair => pair.Value).ToList(); - - // ensuring no duplicates are added and only non-null peaks are considered. - if (chromPeaksInSharedPeak.Where(p => p != null).Any() && - !chromPeaksInSharedPeak.Any(chromPeak => chromPeak != null && allChromPeakInDict.Any(peakInDict => peakInDict != null && peakInDict.Equals(chromPeak)))) - { - sharedPeaksDict[peak] = chromPeaksInSharedPeak; - } - } + sharedPeaksDict[peak] = chromPeaksInSharedPeak; } - IsobaricPeptideDict.TryAdd(id.ModifiedSequence, sharedPeaksDict); } + } + IsobaricPeptideDict.TryAdd(id.ModifiedSequence, sharedPeaksDict); + } - // report search progress (proteins searched so far out of total proteins in database) - if (!FlashParams.Silent) - { - Interlocked.Increment(ref isoGroupsSearched); + // report search progress (proteins searched so far out of total proteins in database) + if (!FlashParams.Silent) + { + Interlocked.Increment(ref isoGroupsSearched); - double percentProgress = ((double)isoGroupsSearched / PeptideGroupsForIsoTracker.Count * 100); - currentProgress = Math.Max(percentProgress, currentProgress); + double percentProgress = ((double)isoGroupsSearched / PeptideGroupsForIsoTracker.Count * 100); + currentProgress = Math.Max(percentProgress, currentProgress); - if (currentProgress > lastReportedProgress + 10) - { - Console.WriteLine("{0:0.}% of isobaric species quantified", currentProgress); - lastReportedProgress = currentProgress; - } - } + if (currentProgress > lastReportedProgress + 10) + { + Console.WriteLine("{0:0.}% of isobaric species quantified", currentProgress); + lastReportedProgress = currentProgress; } - }); + } + } if (!FlashParams.Silent) Console.WriteLine("Finished quantifying isobaric species!"); } @@ -2109,7 +2102,7 @@ internal void AddIsoPeaks() //remove the repeated peaks from FlashLFQ with the same identification list, the priority is IsoTrack > MSMS foreach (var peak in allIsoTrackerPeaksInFile) { - _results.Peaks[fileInfo].RemoveAll(p => IDsEqual(p.Identifications,peak.Identifications)); + _results.Peaks[fileInfo].RemoveAll(p => IDsEqual(p.Identifications, peak.Identifications)); } // Add the peaks into the result dictionary, and remove the duplicated peaks. @@ -2139,19 +2132,25 @@ internal void AddIsoPeaks() /// /// /// - private bool IDsEqual(List idList1, List idList2) + public bool IDsEqual(List idList1, List idList2) { if (idList1.Count != idList2.Count) { return false; } - var sortedIdList1 = idList1.OrderBy(id => id.ModifiedSequence).ToList(); - var sortedIdList2 = idList2.OrderBy(id => id.ModifiedSequence).ToList(); + if (idList1.Count == 1) + { + return idList1[0].ModifiedSequence.Equals(idList2[0].ModifiedSequence); + } + + // Sort both lists in place by modified sequence + idList1.Sort((a, b) => a.ModifiedSequence.CompareTo(b.ModifiedSequence)); + idList2.Sort((a, b) => a.ModifiedSequence.CompareTo(b.ModifiedSequence)); - for (int i = 0; i < sortedIdList1.Count; i++) + for (int i = 0; i < idList1.Count; i++) { - if (!sortedIdList1[i].ModifiedSequence.Equals(sortedIdList2[i].ModifiedSequence)) + if (!idList1[i].ModifiedSequence.Equals(idList2[i].ModifiedSequence)) { return false; } diff --git a/mzLib/FlashLFQ/IsoTracker/XIC.cs b/mzLib/FlashLFQ/IsoTracker/XIC.cs index 2646a0052..dab771a45 100644 --- a/mzLib/FlashLFQ/IsoTracker/XIC.cs +++ b/mzLib/FlashLFQ/IsoTracker/XIC.cs @@ -115,12 +115,12 @@ internal void BuildLinearSpline() } /// - /// calculate the retention time shift among the reference. Then store the value in the RtShift property. + /// Calculate the retention time shift among the reference. Then store the value in the RtShift property. /// /// The reference XIC /// The number of the timePoint for X-correlation /// The retention to shift to align to the reference - public double AlignXICs(XIC referenceXIC, int resolution = 1000) + public double AlignXICs(XIC referenceXIC, int resolution = 600) { var referSpline = referenceXIC.LinearSpline; var toAlignSpline = this.LinearSpline; @@ -132,7 +132,7 @@ public double AlignXICs(XIC referenceXIC, int resolution = 1000) // Create two intensity arrays of the reference and target XICs by interpolating the time point. // The number of timePoints depend on the resolution. - // If the resolution is 1000, the timePoint is 1/1000 = 0.001s. + // If the resolution is 600, the timePoint is 60/600 = 0.1s. Complex[] reference = new Complex[(int)((FinalTime - initialTime) * resolution + 2)]; Complex[] toAlign = new Complex[(int)((FinalTime - initialTime) * resolution + 2)]; int index = 0; diff --git a/mzLib/FlashLFQ/IsoTracker/XICGroups.cs b/mzLib/FlashLFQ/IsoTracker/XICGroups.cs index f74a14520..6deaa65cb 100644 --- a/mzLib/FlashLFQ/IsoTracker/XICGroups.cs +++ b/mzLib/FlashLFQ/IsoTracker/XICGroups.cs @@ -1,6 +1,8 @@ using System.Collections; +using System.Collections.Concurrent; using System.Collections.Generic; using System.Linq; +using System.Threading.Tasks; namespace FlashLFQ.IsoTracker { @@ -9,6 +11,17 @@ namespace FlashLFQ.IsoTracker /// public class XICGroups : IEnumerable { + /// + /// The resolution used for XIC alignment. Default is 600, equating to 600 points per minute. + /// This yields a time resolution of 0.1 seconds. + /// + public const int AlignmentResolution = 1000; + + /// + /// Defines the tolerance for locating shared extrema. + /// + public double SharedExtremaTolerance = 60.0 / AlignmentResolution + 0.005; // 60 seconds, divided by the Alignment resolution, plus a small tolerance + /// /// The reference XIC for alignment /// @@ -18,10 +31,6 @@ public class XICGroups : IEnumerable /// public List XICs; /// - /// The retention time shift of each XIC - /// - public Dictionary RTDict; - /// /// The shared extrema in the XICs, time project to the reference XIC /// public List SharedExtrema; @@ -41,23 +50,20 @@ public class XICGroups : IEnumerable /// The xICs list /// The require number ratio to define the shared peaks, if threshold is 0.5, at lease 50% need to be tracked /// The tolerance window to pick up shared exterma - public XICGroups(List xICs, double sharedPeakThreshold = 0.55 , double tolerance = 0.10, double cutOff = 30000) + public XICGroups(List xICs, double sharedPeakThreshold = 0.55 , double tolerance = 0.10, double cutOff = 30000, int maxThreads = 1) { XICs = xICs; - //this.ids = ids; ReferenceXIC = XICs.First(p => p.Reference); // set up the XIC reference - RTDict = new Dictionary(); // build a dictionary to store the retention time shift of each XIC - int xicID = 0; - foreach (var xic in XICs) + Parallel.ForEach(XICs, new ParallelOptions { MaxDegreeOfParallelism = maxThreads }, xic => { - RTDict.Add(xicID, xic.AlignXICs(ReferenceXIC)); //xic.AlignXICs(reference) - xic.FindExtrema(); - xicID++; - } + xic.AlignXICs(ReferenceXIC, AlignmentResolution); // align the XICs to the reference + xic.FindExtrema(); // find the extrema in each XIC + }); + SharedExtrema = new List(); - FindSharedExtrema(sharedPeakThreshold, tolerance, cutOff); // find the sharedExtrema + FindSharedExtrema(sharedPeakThreshold, SharedExtremaTolerance, cutOff); // find the sharedExtrema ProjectExtremaInRef(ReferenceXIC, SharedExtrema); // project the sharedExtrema in the reference XIC SharedPeaks = BuildSharedPeaks(); // build the indexed peaks @@ -92,8 +98,6 @@ public void FindSharedExtrema(double count_threshold, double tolerance, double c SharedExtrema = group_min.Concat(group_max).ToList(); SharedExtrema.Sort((p1, p2) => p1.RetentionTime.CompareTo(p2.RetentionTime)); // sort the shared extrema by the retention time SharePeakTrimming(); - - } /// diff --git a/mzLib/MassSpectrometry/PeakIndexing/IndexingEngine.cs b/mzLib/MassSpectrometry/PeakIndexing/IndexingEngine.cs index 2c74c2d88..fff186072 100644 --- a/mzLib/MassSpectrometry/PeakIndexing/IndexingEngine.cs +++ b/mzLib/MassSpectrometry/PeakIndexing/IndexingEngine.cs @@ -2,7 +2,6 @@ using System; using System.Collections.Generic; using System.Linq; -using static MassSpectrometry.IsoDecAlgorithm; #nullable enable namespace MassSpectrometry diff --git a/mzLib/Test/FlashLFQ/TestIsoTracker.cs b/mzLib/Test/FlashLFQ/TestIsoTracker.cs index 59720e4ff..bef6894ea 100644 --- a/mzLib/Test/FlashLFQ/TestIsoTracker.cs +++ b/mzLib/Test/FlashLFQ/TestIsoTracker.cs @@ -401,7 +401,7 @@ public static void TestPeakAlignment() var xic2 = new XIC(peaks2, 100, spectraFile, false); var xic3 = new XIC(peaks3, 100, spectraFile, false); - double rtTolerance = 0.0011; + double rtTolerance = 0.011; // Assert Assert.AreEqual(-0.1, xic2.AlignXICs(xic1), rtTolerance); // Peak 2 should be shifted to the left by 0.1 Assert.AreEqual(0.1, xic3.AlignXICs(xic1), rtTolerance); // Peak 3 should be shifted to the right by 0.1 @@ -514,11 +514,8 @@ public static void TestXICGroupConstructor() Assert.AreEqual(xic, xicGroup.XICs[0]); Assert.AreEqual(xic2, xicGroup.XICs[1]); - - Assert.IsNotNull(xicGroup.RTDict); - Assert.AreEqual(xicGroup.RTDict.Count, 2); - Assert.AreEqual(xicGroup.RTDict[0], 0.0); //reference XIC Rt is 0 - Assert.AreEqual(xicGroup.RTDict[1], 0.0); //non-reference XIC Rt is 0 + Assert.AreEqual(xicGroup.XICs[1].RtShift, 0.0); //reference XIC Rt is 0 + Assert.AreEqual(xicGroup.XICs[1].RtShift, 0.0); //non-reference XIC Rt is 0 Assert.IsNotNull(xicGroup.IdList); Assert.AreEqual(xicGroup.IdList.Count, 2); @@ -574,9 +571,9 @@ public static void TestXICGroup_RtDict() var xicGroup = new XICGroups(new List { xic1, xic2, xic3 }); // Assert - Assert.AreEqual(xicGroup.RTDict[0], xic1.AlignXICs(xic1)); - Assert.AreEqual(xicGroup.RTDict[1], xic2.AlignXICs(xic1)); - Assert.AreEqual(xicGroup.RTDict[2], xic3.AlignXICs(xic1)); + Assert.That(0, Is.EqualTo(xic1.AlignXICs(xic1)).Within(0.1) ); + Assert.That(-0.1, Is.EqualTo(xic2.AlignXICs(xic1)).Within(0.1) ); + Assert.That(0.1, Is.EqualTo(xic3.AlignXICs(xic1)).Within(0.1) ); } [Test] @@ -660,9 +657,9 @@ public static void TestXICGroup_Tracking() // Assert RT shift - Assert.AreEqual(xicGroup.RTDict[0], 0, 0.001); - Assert.AreEqual(xicGroup.RTDict[1], -3, 0.001); - Assert.AreEqual(xicGroup.RTDict[2], 3, 0.001); + Assert.AreEqual(xicGroup.XICs[0].RtShift, 0, 0.01); + Assert.AreEqual(xicGroup.XICs[1].RtShift, -3, 0.01); + Assert.AreEqual(xicGroup.XICs[2].RtShift, 3, 0.01); // Assert the sharedPeak projection, the time point should be the same as the reference XIC Assert.AreEqual(xicGroup.SharedExtrema.Count, xicGroup.ExtremaInRef.Count); @@ -1033,12 +1030,13 @@ public static void TestIsoSequence_Ambiguous() Assert.AreEqual(peptidesList.Count, isobaricPeptideNum); Assert.AreEqual(peaksList.Count, isobaricPeakNum); List expectedSequence = new List { "DIVENYFM[Common Variable:Oxidation on M]R", "DIVENYF[Common Variable:Oxidation on M]MR", "DIVENY[Common Variable:Oxidation on M]FMR" }; + expectedSequence.Sort(); //Check the detectionType of each peak, in this case all peaks is IsoTrack_Ambiguous //The output sequence should be the same as the expected sequence foreach (var peak in peaksList) { - var peakSeq = peak.Split('\t')[2].Split('|').ToList(); + var peakSeq = peak.Split('\t')[2].Split('|').OrderBy(s => s).ToList(); var detectionType = peak.Split('\t')[16]; Assert.AreEqual(peakSeq, expectedSequence); CollectionAssert.AreEqual(detectionType, "IsoTrack_Ambiguous"); From cc6fa44a27797bb5afa4ffd94887a0c0814b7851 Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 21 Aug 2025 19:29:11 -0500 Subject: [PATCH 2/6] implemented binary search in BuildXIC --- mzLib/FlashLFQ/FlashLfqEngine.cs | 30 +++++++++++++++++++----------- mzLib/mzLib.nuspec | 2 +- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/mzLib/FlashLFQ/FlashLfqEngine.cs b/mzLib/FlashLFQ/FlashLfqEngine.cs index b50fb91a0..048ed403b 100644 --- a/mzLib/FlashLFQ/FlashLfqEngine.cs +++ b/mzLib/FlashLFQ/FlashLfqEngine.cs @@ -1935,17 +1935,25 @@ internal XIC BuildXIC(List ids, SpectraFileInfo spectraFile, boo PpmTolerance isotopeTolerance = new PpmTolerance(FlashParams.PpmTolerance); ScanInfo[] ms1ScanInfos = peakIndexingEngine.ScanInfoArray; - ScanInfo startScan = ms1ScanInfos - .Where(p => p.RetentionTime < start) - .OrderBy(p => p.RetentionTime) - .LastOrDefault() - ?? ms1ScanInfos.OrderBy(p => p.RetentionTime).First(); // If the start time is before the first scan, use the first scan - - ScanInfo endScan = ms1ScanInfos - .Where(p => p.RetentionTime > end) - .OrderBy(p => p.RetentionTime) - .FirstOrDefault() - ?? ms1ScanInfos.OrderBy(p => p.RetentionTime).Last(); // If the end time is after the last scan, use the last scan + // Binary search for start and end scans using a custom comparer for RetentionTime + var scanInfoComparer = Comparer.Create((a, b) => a.RetentionTime.CompareTo(b.RetentionTime)); + + // Create search keys (dummy ScanInfo objects with target retention times) + var startKey = new ScanInfo(0, 0, start, 0); + var endKey = new ScanInfo(0, 0, end, 0); + + // Binary search for start scan (scan with RT <= start) + int startIndex = Array.BinarySearch(ms1ScanInfos, startKey, scanInfoComparer); + if (startIndex < 0) startIndex = ~startIndex - 1; // If exact match not found, get largest index with RT <= start + if (startIndex < 0) startIndex = 0; // Handle case where all scans have RT > start + + // Binary search for end scan (scan with RT >= end) + int endIndex = Array.BinarySearch(ms1ScanInfos, endKey, scanInfoComparer); + if (endIndex < 0) endIndex = ~endIndex; // If exact match not found, get smallest index with RT >= end + if (endIndex >= ms1ScanInfos.Length) endIndex = ms1ScanInfos.Length - 1; // Handle case where all scans have RT < end + + ScanInfo startScan = ms1ScanInfos[startIndex]; + ScanInfo endScan = ms1ScanInfos[endIndex]; // Collect all peaks from the Ms1 scans in the given time window, then build the XIC List peaks = new List(); diff --git a/mzLib/mzLib.nuspec b/mzLib/mzLib.nuspec index 76019a72d..b2cce537d 100644 --- a/mzLib/mzLib.nuspec +++ b/mzLib/mzLib.nuspec @@ -2,7 +2,7 @@ mzLib - 1.0.559 + 9.0.1 mzLib Stef S. Stef S. From 766d6ffb89e2154ca655dae39e780062cc0d9813 Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 21 Aug 2025 19:29:39 -0500 Subject: [PATCH 3/6] Decreased resolution in XicGroups --- mzLib/FlashLFQ/IsoTracker/XICGroups.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mzLib/FlashLFQ/IsoTracker/XICGroups.cs b/mzLib/FlashLFQ/IsoTracker/XICGroups.cs index 6deaa65cb..9b63f3806 100644 --- a/mzLib/FlashLFQ/IsoTracker/XICGroups.cs +++ b/mzLib/FlashLFQ/IsoTracker/XICGroups.cs @@ -15,7 +15,7 @@ public class XICGroups : IEnumerable /// The resolution used for XIC alignment. Default is 600, equating to 600 points per minute. /// This yields a time resolution of 0.1 seconds. /// - public const int AlignmentResolution = 1000; + public const int AlignmentResolution = 600; /// /// Defines the tolerance for locating shared extrema. From 15da18f0fbb60775f4996be5cd87fd4fa87951b1 Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 21 Aug 2025 19:34:04 -0500 Subject: [PATCH 4/6] reverted change to nuspec --- mzLib/mzLib.nuspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mzLib/mzLib.nuspec b/mzLib/mzLib.nuspec index b2cce537d..76019a72d 100644 --- a/mzLib/mzLib.nuspec +++ b/mzLib/mzLib.nuspec @@ -2,7 +2,7 @@ mzLib - 9.0.1 + 1.0.559 mzLib Stef S. Stef S. From 233a000cbeb94c0a511f4d8fa7cc57898a58e39f Mon Sep 17 00:00:00 2001 From: Alex Date: Fri, 22 Aug 2025 00:46:15 -0500 Subject: [PATCH 5/6] Added new method for efficient XIC retrieval, test --- mzLib/FlashLFQ/FlashLfqEngine.cs | 20 ++-- mzLib/FlashLFQ/IsoTracker/XIC.cs | 5 +- .../PeakIndexing/IndexingEngine.cs | 41 ++++++- mzLib/Test/FlashLFQ/IndexingEngineTests.cs | 100 ++++++++++++++++++ 4 files changed, 149 insertions(+), 17 deletions(-) diff --git a/mzLib/FlashLFQ/FlashLfqEngine.cs b/mzLib/FlashLFQ/FlashLfqEngine.cs index 048ed403b..32422b918 100644 --- a/mzLib/FlashLFQ/FlashLfqEngine.cs +++ b/mzLib/FlashLFQ/FlashLfqEngine.cs @@ -1931,7 +1931,7 @@ internal bool MoreThanOneID(List xics) internal XIC BuildXIC(List ids, SpectraFileInfo spectraFile, bool isReference, double start, double end) { Identification id = ids.FirstOrDefault(); - var peakIndexingEngine = IndexingEngineDictionary[spectraFile]; + var peakIndexingEngine = (PeakIndexingEngine)IndexingEngineDictionary[spectraFile]; PpmTolerance isotopeTolerance = new PpmTolerance(FlashParams.PpmTolerance); ScanInfo[] ms1ScanInfos = peakIndexingEngine.ScanInfoArray; @@ -1956,23 +1956,15 @@ internal XIC BuildXIC(List ids, SpectraFileInfo spectraFile, boo ScanInfo endScan = ms1ScanInfos[endIndex]; // Collect all peaks from the Ms1 scans in the given time window, then build the XIC - List peaks = new List(); - for (int j = startScan.ZeroBasedScanIndex; j <= endScan.ZeroBasedScanIndex; j++) - { - double mz = id.PeakfindingMass.ToMz(id.PrecursorChargeState); - IIndexedPeak peak = peakIndexingEngine.GetIndexedPeak(mz , j, isotopeTolerance); - if (peak != null) - { - peaks.Add(peak); - } - } + double mz = id.PeakfindingMass.ToMz(id.PrecursorChargeState); + List peaks = peakIndexingEngine.GetXicFromScanRange(startScan.ZeroBasedScanIndex, endScan.ZeroBasedScanIndex, mz, isotopeTolerance); - XIC xIC = null; if (peaks.Count > 0) { - xIC = new XIC(peaks, id.PeakfindingMass, spectraFile, isReference, ids); + return new XIC(peaks, id.PeakfindingMass, spectraFile, isReference, ids); } - return xIC; + else + return null; } /// diff --git a/mzLib/FlashLFQ/IsoTracker/XIC.cs b/mzLib/FlashLFQ/IsoTracker/XIC.cs index dab771a45..10b67a229 100644 --- a/mzLib/FlashLFQ/IsoTracker/XIC.cs +++ b/mzLib/FlashLFQ/IsoTracker/XIC.cs @@ -125,9 +125,10 @@ public double AlignXICs(XIC referenceXIC, int resolution = 600) var referSpline = referenceXIC.LinearSpline; var toAlignSpline = this.LinearSpline; - double timegap = (Ms1Peaks.Last().RetentionTime - Ms1Peaks[0].RetentionTime) / (Ms1Peaks.Count - 1); + // Timegap is the average time that elapses between each MS1 scan + double timegap = (Ms1Peaks[^1].RetentionTime - Ms1Peaks[0].RetentionTime) / (Ms1Peaks[^1].ZeroBasedScanIndex - Ms1Peaks[0].ZeroBasedScanIndex); double initialTime = Ms1Peaks[0].RetentionTime - 5.0 * timegap; //after the padding, the first peak move ahead 5 timegap - double FinalTime = Ms1Peaks.Last().RetentionTime + 5.0 * timegap; //after the padding, the last peak move back 5 timegap + double FinalTime = Ms1Peaks[^1].RetentionTime + 5.0 * timegap; //after the padding, the last peak move back 5 timegap double time = initialTime; // Create two intensity arrays of the reference and target XICs by interpolating the time point. diff --git a/mzLib/MassSpectrometry/PeakIndexing/IndexingEngine.cs b/mzLib/MassSpectrometry/PeakIndexing/IndexingEngine.cs index fff186072..28639c923 100644 --- a/mzLib/MassSpectrometry/PeakIndexing/IndexingEngine.cs +++ b/mzLib/MassSpectrometry/PeakIndexing/IndexingEngine.cs @@ -1,4 +1,5 @@ -using MzLibUtil; +using MathNet.Numerics.RootFinding; +using MzLibUtil; using System; using System.Collections.Generic; using System.Linq; @@ -105,6 +106,44 @@ public List GetXic(double m, double retentionTime, Tolerance ppmTo return GetXicByScanIndex(m, scanIndex, ppmTolerance, missedScansAllowed, maxPeakHalfWidth, charge, matchedPeaks); } + public List GetXicFromScanRange(int zeroBasedStartIndex, int zeroBasedEndIndex, double m, Tolerance ppmTolerance, + int? charge = null, Dictionary matchedPeaks = null) + { + if (ScanInfoArray == null) throw new MzLibException("Error: Attempt to retrieve XIC before peak indexing was performed"); + if (zeroBasedStartIndex < 0 || zeroBasedEndIndex >= ScanInfoArray.Length) + throw new ArgumentOutOfRangeException("Scan indices are out of range of the indexed scans"); + List xic = new List(); + var allBins = GetBinsInRange(m, ppmTolerance); + if (allBins.Count == 0) + return xic; + + // For each bin, find + store a pointer to the current index + int[] peakPointerArray = allBins.Select(b => BinarySearchForIndexedPeak(b, zeroBasedStartIndex)).ToArray(); + + for (int i = zeroBasedStartIndex; i <= zeroBasedEndIndex; i++) + { + + if (i < 0 || i > ScanInfoArray.Length - 1) + break; + + for (int j = 0; j < peakPointerArray.Length; j++) + { + // Increment all pointers in the pointer array + while (peakPointerArray[j] < allBins[j].Count - 1 && allBins[j][peakPointerArray[j]].ZeroBasedScanIndex < i) + peakPointerArray[j]++; + } + + // Search for the next peak + var nextPeak = GetBestPeakFromBins(allBins, m, i, peakPointerArray, ppmTolerance, charge); + + // Add the peak to the XIC or increment the missed peaks + if (nextPeak.IsNotDefaultOrNull()) + xic.Add(nextPeak); + + } + return xic; + } + /// /// A generic method of peak tracing across the retention time. Finds peaks with a given mz that occur on either side of a given /// retention time. Peak searching iterates backwards through the scans until the peak diff --git a/mzLib/Test/FlashLFQ/IndexingEngineTests.cs b/mzLib/Test/FlashLFQ/IndexingEngineTests.cs index 006691f2a..8a52fce80 100644 --- a/mzLib/Test/FlashLFQ/IndexingEngineTests.cs +++ b/mzLib/Test/FlashLFQ/IndexingEngineTests.cs @@ -486,5 +486,105 @@ public static void TestMassIndexingEngineNullable() var massIndexingEngine = MassIndexingEngine.InitializeMassIndexingEngine(scans.ToArray(), deconParameters); Assert.That(massIndexingEngine, Is.Null); } + + [Test] + public static void TestGetXicFromScanRange() + { + // Setup test data - similar pattern to TestXicStops test + string fileToWrite = "scanRangeTest.mzML"; + string peptide = "PEPTIDE"; + double intensity = 1e6; + + MsDataScan[] scans = new MsDataScan[10]; + double[] intensityMultipliers = { 1, 3, 0, 0, 3, 5, 10, 5, 3, 1 }; + ChemicalFormula cf = new Proteomics.AminoAcidPolymer.Peptide(peptide).GetChemicalFormula(); + IsotopicDistribution dist = IsotopicDistribution.GetDistribution(cf, 0.125, 1e-8); + + // Create spectra with peptide peaks absent in scans 2 and 3 + for (int s = 0; s < scans.Length; s++) + { + double[] mz = dist.Masses.Select(v => v.ToMz(1)).ToArray(); + double[] intensities = dist.Intensities.Select(v => v * intensity * intensityMultipliers[s]).ToArray(); + + // Scans 2 and 3 don't have the target peak + if (s == 2 || s == 3) + { + mz = dist.Masses.Select(v => v.ToMz(2)).ToArray(); + } + + MzSpectrum spectrum = intensityMultipliers[s] == 0 + ? new MzSpectrum(new double[] { }, new double[] { }, false) + : new MzSpectrum(mz, intensities, false); + + + scans[s] = new MsDataScan(massSpectrum: spectrum, oneBasedScanNumber: s + 1, msnOrder: 1, isCentroid: true, + polarity: Polarity.Positive, retentionTime: 1.0 + s / 10.0, scanWindowRange: new MzRange(400, 1600), scanFilter: "f", + mzAnalyzer: MZAnalyzerType.Orbitrap, totalIonCurrent: intensities.Sum(), injectionTime: 1.0, noiseData: null, nativeId: "scan=" + (s + 1)); + } + + // Write the .mzML + Readers.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(new FakeMsDataFile(scans), + Path.Combine(TestContext.CurrentContext.TestDirectory, fileToWrite), false); + + // Initialize indexing engine + SpectraFileInfo file1 = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, fileToWrite), "", 0, 0, 0); + PeakIndexingEngine indexEngine = PeakIndexingEngine.InitializeIndexingEngine(file1); + + // Test 1: Basic scan range functionality + var xic1 = indexEngine.GetXicFromScanRange(0, 9, dist.Masses.First().ToMz(1), new PpmTolerance(20)); + Assert.That(xic1.Count, Is.EqualTo(8), "Should find peaks in all scans except 2 and 3"); + + // Test 2: Partial scan range at start + var xic2 = indexEngine.GetXicFromScanRange(0, 4, dist.Masses.First().ToMz(1), new PpmTolerance(20)); + Assert.That(xic2.Count, Is.EqualTo(3), "Should find peaks in scans 0, 1, and 4 only"); + + // Test 3: Partial scan range at end + var xic3 = indexEngine.GetXicFromScanRange(5, 9, dist.Masses.First().ToMz(1), new PpmTolerance(20)); + Assert.That(xic3.Count, Is.EqualTo(5), "Should find peaks in scans 5-9"); + + // Test 4: Scan range that contains no peaks of interest + var xic4 = indexEngine.GetXicFromScanRange(2, 3, dist.Masses.First().ToMz(1), new PpmTolerance(20)); + Assert.That(xic4.Count, Is.EqualTo(0), "Should find no peaks in scans 2-3"); + + // Test 5: Single scan + var xic5 = indexEngine.GetXicFromScanRange(6, 6, dist.Masses.First().ToMz(1), new PpmTolerance(20)); + Assert.That(xic5.Count, Is.EqualTo(1), "Should find exactly one peak in scan 6"); + Assert.That(xic5.First().ZeroBasedScanIndex, Is.EqualTo(6), "Peak should be from scan 6"); + + // Test 6: Ensuring scan order is preserved + var xic6 = indexEngine.GetXicFromScanRange(4, 8, dist.Masses.First().ToMz(1), new PpmTolerance(20)); + for (int i = 1; i < xic6.Count; i++) + { + Assert.That(xic6[i].ZeroBasedScanIndex, Is.GreaterThan(xic6[i - 1].ZeroBasedScanIndex), + "Peaks should be ordered by scan index"); + } + + // Test 7: Compare with GetXicByScanIndex for the same range + var xicComparison = indexEngine.GetXicByScanIndex(dist.Masses.First().ToMz(1), 6, new PpmTolerance(20), 1); + var xic7 = indexEngine.GetXicFromScanRange(4, 9, dist.Masses.First().ToMz(1), new PpmTolerance(20)); + + Assert.That(xic7.Count, Is.EqualTo(6), "Should find peaks in scans 4-9"); + + // Check the peak counts match between methods for the same effective range + Assert.That(xicComparison.Count, Is.EqualTo(xic7.Count), + "GetXicByScanIndex and GetXicFromScanRange should find the same number of peaks"); + + // Test 8: Out of range indices should throw ArgumentOutOfRangeException + Assert.Throws(() => + indexEngine.GetXicFromScanRange(-1, 5, dist.Masses.First().ToMz(1), new PpmTolerance(20))); + + Assert.Throws(() => + indexEngine.GetXicFromScanRange(0, 10, dist.Masses.First().ToMz(1), new PpmTolerance(20))); + + // Clean up + try + { + File.Delete(Path.Combine(TestContext.CurrentContext.TestDirectory, fileToWrite)); + } + catch + { + // Ignore deletion errors + } + } } } From e518ea89cc45b2b5bd4b3f595981e3370e4d7471 Mon Sep 17 00:00:00 2001 From: Alex Date: Fri, 22 Aug 2025 12:00:43 -0500 Subject: [PATCH 6/6] Added additional logging --- mzLib/FlashLFQ/FlashLfqEngine.cs | 47 ++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/mzLib/FlashLFQ/FlashLfqEngine.cs b/mzLib/FlashLFQ/FlashLfqEngine.cs index 32422b918..f5b8b1712 100644 --- a/mzLib/FlashLFQ/FlashLfqEngine.cs +++ b/mzLib/FlashLFQ/FlashLfqEngine.cs @@ -1821,13 +1821,20 @@ private void CutPeak(ChromatographicPeak peak, double identificationTime) private void QuantifyIsobaricPeaks() { if(!FlashParams.Silent) - Console.WriteLine("Quantifying isobaric species..."); + Console.WriteLine("Quantifying isobaric species for {0} isobar groups...", PeptideGroupsForIsoTracker.Count); int isoGroupsSearched = 0; double lastReportedProgress = 0; double currentProgress = 0; + foreach(var idGroup in PeptideGroupsForIsoTracker) { + Stopwatch stopwatch = null; + if(currentProgress == lastReportedProgress) + { + stopwatch = Stopwatch.StartNew(); + } + List xicGroup = new List(); var mostCommonChargeIdGroup = idGroup.Identifications.GroupBy(p => p.PrecursorChargeState).MaxBy(p => p.Count()); var id = mostCommonChargeIdGroup.First(); @@ -1846,8 +1853,20 @@ private void QuantifyIsobaricPeaks() { xicIds = mostCommonChargeIdGroup.Where(p => !p.IsDecoy).ToList(); } - + Stopwatch xicStopwatch = null; + if (currentProgress == lastReportedProgress) + { + xicStopwatch = Stopwatch.StartNew(); + } XIC xic = BuildXIC(xicIds, spectraFile, false, leftWindow, rightWindow); + if (xicStopwatch != null) + { + xicStopwatch.Stop(); + if (!FlashParams.Silent) + { + Console.WriteLine("XIC for {0} took {1} seconds", id.ModifiedSequence, xicStopwatch.Elapsed.TotalSeconds); + } + } if (xic != null && xic.Ms1Peaks.Count() > 5) // each XIC should have at least 5 points { xicGroup.Add(xic); @@ -1861,8 +1880,21 @@ private void QuantifyIsobaricPeaks() // Step 1: Find the XIC with most IDs then, set as reference XIC xicGroup.OrderByDescending(p => p.Ids.Count()).First().Reference = true; + Stopwatch xicGroupContstructorStopwatch = null; + if (currentProgress == lastReportedProgress) + { + xicGroupContstructorStopwatch = Stopwatch.StartNew(); + } //Step 2: Build the XICGroups XICGroups xicGroups = new XICGroups(xicGroup, maxThreads: FlashParams.MaxThreads); + if (xicGroupContstructorStopwatch != null) + { + xicGroupContstructorStopwatch.Stop(); + if (!FlashParams.Silent) + { + Console.WriteLine("XICGroup construction for {0}, including peak Alignment, took {1} seconds", id.ModifiedSequence, xicGroupContstructorStopwatch.Elapsed.TotalSeconds); + } + } //Step 3: Build the peakPairChrom dictionary //The shared peak dictionary, each sharedPeak included serval ChromatographicPeak for each run [SharedPeak <-> ChromatographicPeaks] @@ -1890,6 +1922,15 @@ private void QuantifyIsobaricPeaks() IsobaricPeptideDict.TryAdd(id.ModifiedSequence, sharedPeaksDict); } + if (stopwatch != null) + { + stopwatch.Stop(); + if (!FlashParams.Silent) + { + Console.WriteLine("IsoTracker for {0} took {1} seconds", id.ModifiedSequence, stopwatch.Elapsed.TotalSeconds); + } + } + // report search progress (proteins searched so far out of total proteins in database) if (!FlashParams.Silent) { @@ -1898,7 +1939,7 @@ private void QuantifyIsobaricPeaks() double percentProgress = ((double)isoGroupsSearched / PeptideGroupsForIsoTracker.Count * 100); currentProgress = Math.Max(percentProgress, currentProgress); - if (currentProgress > lastReportedProgress + 10) + if (currentProgress > lastReportedProgress + 5) { Console.WriteLine("{0:0.}% of isobaric species quantified", currentProgress); lastReportedProgress = currentProgress;