Skip to content

Commit 362570c

Browse files
committed
Merge branch 'IsoTrackerSpeedup' of https://github.com/Alexander-Sol/mzLib into IsoTrackerSpeedup
2 parents e518ea8 + 248e75c commit 362570c

File tree

22 files changed

+5198
-410
lines changed

22 files changed

+5198
-410
lines changed

mzLib/MassSpectrometry/MsDataFile.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ namespace MassSpectrometry
2828
/// </summary>
2929
public abstract class MsDataFile : IEnumerable<MsDataScan>
3030
{
31+
protected readonly object DynamicReadingLock = new();
3132
public MsDataScan[] Scans { get; protected set; }
3233
public SourceFile SourceFile { get; set; }
3334
public int NumSpectra => Scans?.Length ?? 0;
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
using Easy.Common.Extensions;
2+
using System;
3+
using System.Collections.Generic;
4+
using System.Linq;
5+
using System.Text;
6+
using System.Threading.Tasks;
7+
using static System.Runtime.InteropServices.JavaScript.JSType;
8+
using MzLibUtil;
9+
10+
namespace MassSpectrometry
11+
{
12+
/// <summary>
13+
/// This Bspline class is copied from DIA-Umpire code. It is a more generalized spline that allows different smoothing degrees.
14+
/// In theory, it could replace linear and cubic spline where the smoothing degree is explicitly set to 1 and 3 respectively.
15+
/// Only used for RT-based spline for now.
16+
/// </summary>
17+
public class Bspline : XicSpline
18+
{
19+
public int SmoothDegree { get; set; }
20+
public int NumberOfPoints { get; set; }
21+
22+
public Bspline(int smoothDegree, int numberOfPoints)
23+
{
24+
SmoothDegree = smoothDegree;
25+
NumberOfPoints = numberOfPoints;
26+
}
27+
28+
public override (double, double)[] GetXicSplineData(float[] rtArray, float[] intensityArray, double start = -1, double end = -1)
29+
{
30+
List<(double, double)> bsplineCollection = new List<(double, double)>();
31+
int p = SmoothDegree;
32+
int n = rtArray.Length - 1;
33+
int m = rtArray.Length + p;
34+
35+
if (rtArray.Length <= p)
36+
{
37+
throw new MzLibException("The number of points in the input array must be greater than the degree of the Bspline.");
38+
}
39+
40+
float[] bspline_T_ = new float[m + p];
41+
for (int i = 0; i <= n; i++)
42+
{
43+
bspline_T_[i] = 0;
44+
bspline_T_[m - i] = 1;
45+
}
46+
float intv = 1.0f / (m - 2 * p);
47+
for (int i = 1; i <= m - 1; i++)
48+
{
49+
bspline_T_[p + i] = bspline_T_[p + i - 1] + intv;
50+
}
51+
for (int i = 0; i < NumberOfPoints; i++)
52+
{
53+
float t = (float)i / NumberOfPoints;
54+
var pt = getbspline(rtArray, intensityArray, t, n, p, bspline_T_);
55+
bsplineCollection.Add(pt);
56+
}
57+
if (bsplineCollection[bsplineCollection.Count() - 1].Item1 < rtArray[rtArray.Length - 1])
58+
{
59+
bsplineCollection.Add((rtArray[rtArray.Length - 1], intensityArray[intensityArray.Length - 1]));
60+
}
61+
if (bsplineCollection[0].Item1 > rtArray[0])
62+
{
63+
bsplineCollection.Add((rtArray[0], intensityArray[0]));
64+
}
65+
return bsplineCollection.ToArray();
66+
}
67+
68+
public (float, float) getbspline(float[] rtArray, float[] intensityArray, float t, int n, int p, float[] bspline_T_)
69+
{
70+
float x = 0, y = 0;
71+
for (int i = 0; i <= n; i++)
72+
{
73+
float a = bspline_base(i, p, t, bspline_T_);
74+
x += rtArray[i] * a;
75+
y += intensityArray[i] * a;
76+
}
77+
return new(x, y);
78+
}
79+
80+
public float bspline_base(int i, int p, float t, float[] bspline_T_)
81+
{
82+
float n, c1, c2;
83+
float tn1 = 0;
84+
float tn2 = 0;
85+
if (p == 0)
86+
{
87+
if (bspline_T_[i] <= t && t < bspline_T_[i + 1] && bspline_T_[i] < bspline_T_[i + 1])
88+
{
89+
n = 1;
90+
}
91+
else
92+
{
93+
n = 0;
94+
}
95+
}
96+
else
97+
{
98+
if (bspline_T_[i + p] - bspline_T_[i] == 0)
99+
{
100+
c1 = 0;
101+
}
102+
else
103+
{
104+
tn1 = bspline_base(i, p - 1, t, bspline_T_);
105+
c1 = (t - bspline_T_[i]) / (bspline_T_[i + p] - bspline_T_[i]);
106+
}
107+
if (bspline_T_[i + p + 1] - bspline_T_[i + 1] == 0)
108+
{
109+
c2 = 0;
110+
}
111+
else
112+
{
113+
tn2 = bspline_base(i + 1, p - 1, t, bspline_T_);
114+
c2 = (bspline_T_[i + p + 1] - t) / (bspline_T_[i + p + 1] - bspline_T_[i + 1]);
115+
}
116+
n = c1 * tn1 + c2 * tn2;
117+
}
118+
return n;
119+
}
120+
}
121+
}

mzLib/MassSpectrometry/PeakIndexing/PeakSpline/XicSpline.cs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
using System.Text;
77
using System.Threading.Tasks;
88
using MathNet.Numerics;
9+
using System.Numerics;
10+
using MathNet.Numerics.LinearAlgebra;
911

1012
namespace MassSpectrometry
1113
{
@@ -52,16 +54,16 @@ public void SetXicSplineXYData(ExtractedIonChromatogram xic, bool cycle = false,
5254
}
5355
else
5456
{
55-
peakRts = xic.Peaks.Select(p => (float)p.RetentionTime).ToArray();
57+
peakRts = xic.Peaks.Select(p => p.RetentionTime).ToArray();
5658
}
57-
var peakIntensities = xic.Peaks.Select(p => (float)p.Intensity).ToArray();
59+
var peakIntensities = xic.Peaks.Select(p => p.Intensity).ToArray();
5860
xic.XYData = GetXicSplineData(peakRts, peakIntensities, start, end);
5961
}
6062

6163
/// <summary>
6264
/// Check if the input arrays meet the requirements of interpolation.
6365
/// </summary>
64-
protected void CheckArrays(double[] rtArray, double[] intensityArray)
66+
protected void CheckArrays<T>(T[] rtArray, T[] intensityArray) where T : INumber<T>
6567
{
6668
if (rtArray.Length != intensityArray.Length)
6769
{

mzLib/MzIdentML/mzIdentML1_1_0.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ namespace mzIdentML110.Generated
2222
[System.SerializableAttribute()]
2323
[System.Diagnostics.DebuggerStepThroughAttribute()]
2424
[System.ComponentModel.DesignerCategoryAttribute("code")]
25-
[System.Xml.Serialization.XmlTypeAttribute(Namespace = "http://psidev.info/psi/pi/mzIdentML/1.1")]
26-
[System.Xml.Serialization.XmlRootAttribute("MzIdentML", Namespace = "http://psidev.info/psi/pi/mzIdentML/1.1", IsNullable = false)]
25+
[System.Xml.Serialization.XmlTypeAttribute(Namespace = "http://psidev.info/psi/pi/mzIdentML/1.1.0")]
26+
[System.Xml.Serialization.XmlRootAttribute("MzIdentML", Namespace = "http://psidev.info/psi/pi/mzIdentML/1.1.0", IsNullable = false)]
2727
public partial class MzIdentMLType110 : IdentifiableType
2828
{
2929

mzLib/Omics/IBioPolymerWithSetMods.cs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using System.Text;
2+
using System.Text.RegularExpressions;
23
using Chemistry;
34
using MassSpectrometry;
45
using Omics.Digestion;
@@ -197,5 +198,28 @@ public static string DetermineFullSequence(string baseSequence, IDictionary<int,
197198

198199
return subSequence.ToString();
199200
}
201+
202+
public static string ParseSubstitutedFullSequence(string fullSequenceWithSubstitution)
203+
{
204+
string pattern = @"\[\d+\+?\s*nucleotide substitution:\s*([A-Z])->([A-Z]) on ([A-Z])\]";
205+
string parsedSequence = fullSequenceWithSubstitution;
206+
var match = Regex.Match(parsedSequence, pattern);
207+
while (match.Success)
208+
{
209+
string original = match.Groups[1].Value; // Z (original)
210+
string sub = match.Groups[2].Value; // Y (substitute)
211+
int patternIndex = match.Index;
212+
int replaceIndex = parsedSequence.LastIndexOf(original, patternIndex);
213+
if (replaceIndex != -1)
214+
{
215+
// Replace the first occurrence of Z before the pattern with Y
216+
parsedSequence = parsedSequence.Remove(replaceIndex, 1).Insert(replaceIndex, sub);
217+
}
218+
// Remove the pattern
219+
parsedSequence = parsedSequence.Remove(patternIndex, match.Length);
220+
match = Regex.Match(parsedSequence, pattern); // Find the next match
221+
}
222+
return parsedSequence;
223+
}
200224
}
201225
}

mzLib/Readers/Bruker/BrukerFileReader.cs

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
1-
using System.Collections.ObjectModel;
2-
using System.Runtime.InteropServices;
1+
using System.Runtime.InteropServices;
32
using System.Text;
43
using MassSpectrometry;
54
using System.Data.SQLite;
65
using Easy.Common.Extensions;
76
using MzLibUtil;
8-
using UsefulProteomicsDatabases;
97

108
namespace Readers
119
{
@@ -94,10 +92,14 @@ public override SourceFile GetSourceFile()
9492
return new SourceFile(nativeIdFormat, massSpecFileFormat,
9593
null, null, id: null, filePath: fileName);
9694
}
97-
public override MsDataScan GetOneBasedScanFromDynamicConnection(int oneBasedScanNumber, IFilteringParams? filterParams = null)
95+
96+
public override MsDataScan GetOneBasedScanFromDynamicConnection(int oneBasedScanNumber, IFilteringParams? filterParams = null)
9897
{
99-
return GetMsDataScanDynamic(oneBasedScanNumber, filterParams);
100-
}
98+
lock (DynamicReadingLock)
99+
{
100+
return GetMsDataScanDynamic(oneBasedScanNumber, filterParams);
101+
}
102+
}
101103

102104
public override void CloseDynamicConnection()
103105
{

mzLib/Readers/InternalResults/ResultFiles/SpectrumMatchTsvReader.cs

Lines changed: 38 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -16,74 +16,66 @@ public static class SpectrumMatchTsvReader
1616
/// <exception cref="ArgumentOutOfRangeException"></exception>
1717
public static List<T> ReadTsv<T>(string filePath, out List<string> warnings) where T : SpectrumMatchFromTsv
1818
{
19-
List<T> psms = new List<T>();
20-
warnings = new List<string>();
21-
22-
StreamReader reader = null;
19+
string[] lines;
2320
try
2421
{
25-
reader = new StreamReader(filePath);
22+
lines = File.ReadAllLines(filePath);
2623
}
2724
catch (Exception e)
2825
{
2926
throw new MzLibException("Could not read file: " + e.Message, e);
3027
}
3128

32-
int lineCount = 0;
33-
34-
string line;
35-
Dictionary<string, int> parsedHeader = null;
3629
MzLibException? parsingException = null;
37-
38-
while (reader.Peek() > 0)
30+
SupportedFileType type;
31+
try
3932
{
40-
lineCount++;
41-
42-
line = reader.ReadLine();
33+
type = filePath.ParseFileType();
34+
}
35+
catch (MzLibException e)
36+
{
37+
// if the parsing fails due to file path not being in the correct format, assume Psm reader will work.
38+
parsingException = e;
39+
type = SupportedFileType.psmtsv;
40+
}
41+
Dictionary<string, int> parsedHeader = ParseHeader(lines[0]);
42+
int lineCount = lines.Length - 1; // Exclude header
4343

44-
if (lineCount == 1)
45-
{
46-
parsedHeader = ParseHeader(line);
47-
continue;
48-
}
44+
// Pre-allocate result array
45+
T?[] psmsArray = new T[lineCount];
46+
var warningsBag = new System.Collections.Concurrent.ConcurrentBag<string>();
4947

48+
Parallel.For(1, lines.Length, new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount }, i =>
49+
{
50+
var line = lines[i];
5051
try
5152
{
52-
SupportedFileType type;
53-
try
54-
{
55-
type = filePath.ParseFileType();
56-
}
57-
catch (MzLibException e)
53+
T result = type switch
5854
{
59-
// if the parsing fails due to file path not being in the correct format, assume Psm reader will work.
60-
parsingException = e;
61-
type = SupportedFileType.psmtsv;
62-
}
63-
64-
switch (type)
65-
{
66-
case SupportedFileType.osmtsv:
67-
psms.Add((T)(SpectrumMatchFromTsv)new OsmFromTsv(line, Split, parsedHeader));
68-
break;
69-
70-
case SupportedFileType.psmtsv:
71-
default:
72-
psms.Add((T)(SpectrumMatchFromTsv)new PsmFromTsv(line, Split, parsedHeader));
73-
break;
74-
}
55+
SupportedFileType.osmtsv => (T)(SpectrumMatchFromTsv)new OsmFromTsv(line, Split, parsedHeader),
56+
_ => (T)(SpectrumMatchFromTsv)new PsmFromTsv(line, Split, parsedHeader)
57+
};
58+
psmsArray[i - 1] = result; // -1 to align with result array (excluding header)
7559
}
7660
catch (Exception)
7761
{
78-
warnings.Add("Could not read line: " + lineCount);
62+
warningsBag.Add("Could not read line: " + (i + 1)); // plus one to account for header line
7963
}
80-
}
64+
});
8165

82-
reader.Close();
66+
var psms = new List<T>(lineCount);
67+
foreach (var x in psmsArray)
68+
{
69+
if (x is not null)
70+
{
71+
psms.Add(x);
72+
}
73+
}
74+
warnings = warningsBag.ToList();
8375

84-
if (lineCount - 1 != psms.Count)
76+
if (lineCount != psms.Count)
8577
{
86-
warnings.Add("Warning: " + (lineCount - 1 - psms.Count) + " PSMs were not read.");
78+
warnings.Add("Warning: " + (lineCount - psms.Count) + " PSMs were not read.");
8779
}
8880

8981
// if we could not parse type, we assumed PSMs were in the file.

0 commit comments

Comments
 (0)