Skip to content

Commit 0184816

Browse files
committed
cherry-picking last commit of ptm stoich to clean commits after rebasing to master and matching content
1 parent 42e308f commit 0184816

File tree

9 files changed

+43
-68
lines changed

9 files changed

+43
-68
lines changed

mzLib/FlashLFQ/FlashLFQResults.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
using Easy.Common.Extensions;
22
using MathNet.Numerics.Statistics;
3+
using MzLibUtil;
4+
using Proteomics;
35
using System;
46
using System.Collections.Generic;
57
using System.IO;
@@ -14,6 +16,7 @@ public class FlashLfqResults
1416
public readonly Dictionary<string, Peptide> PeptideModifiedSequences;
1517
public readonly Dictionary<string, ProteinGroup> ProteinGroups;
1618
public readonly Dictionary<SpectraFileInfo, List<ChromatographicPeak>> Peaks;
19+
public Dictionary<string, MzLibUtil.UtilProteinGroup> ModInfo { get; private set; }
1720
private readonly HashSet<string> _peptideModifiedSequencesToQuantify;
1821
public string PepResultString { get; set; }
1922
public double MbrQValueThreshold { get; set; }

mzLib/FlashLFQ/FlashLfqEngine.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,9 @@ public FlashLfqResults Run()
280280
// do top3 protein quantification
281281
_results.CalculateProteinResultsMedianPolish(UseSharedPeptidesForProteinQuant);
282282

283+
// calculate ptm occupancy at the peptide level
284+
_results.CalculatePTMOccupancy();
285+
283286
// do Bayesian protein fold-change analysis
284287
if (BayesianProteinQuant)
285288
{

mzLib/FlashLFQ/Peptide.cs

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
using System.Collections.Generic;
1+
using Easy.Common.Extensions;
2+
using System.Collections.Generic;
23
using System.Linq;
34
using System.Text;
45

@@ -67,6 +68,18 @@ public void SetIntensity(SpectraFileInfo fileInfo, double intensity)
6768
}
6869
}
6970

71+
public double GetTotalIntensity()
72+
{
73+
if (Intensities.IsNotNullOrEmpty())
74+
{
75+
return Intensities.Sum(i => i.Value);
76+
}
77+
else
78+
{
79+
return 0;
80+
}
81+
}
82+
7083
public DetectionType GetDetectionType(SpectraFileInfo fileInfo)
7184
{
7285
if (DetectionTypes.TryGetValue(fileInfo, out DetectionType detectionType))

mzLib/MzLibUtil/PositionFrequencyAnalysis.cs

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,10 @@ public UtilProteinGroup(string name, Dictionary<string, UtilProtein> proteins =
137137
}
138138
}
139139
public class PositionFrequencyAnalysis
140-
{
140+
{
141+
142+
public Dictionary<string, UtilProteinGroup> Occupancy { get; private set; }
143+
141144
/// <summary>
142145
/// Calculates the occupancy of post-translational modifications at the peptide level.
143146
/// </summary>
@@ -147,11 +150,7 @@ public class PositionFrequencyAnalysis
147150
/// <returns> A nested dictionary whose key mappings are as follows: string ProteinGroup-> string Protein-> string BaseSequence-> int ModifiedAminoAcidIndex-> string ModificationName-> double Intensity
148151
/// Note: Each BaseSequence dictionary contains a ModifiedAminoAcidIndex key of -1 that then contains a ModificationName key called "Total" that is used to track the total intensity observed for
149152
/// all of the amino acids in that peptide.</returns>
150-
///
151-
152-
public Dictionary<string, UtilProteinGroup> Occupancy { get; private set; }
153-
154-
153+
///
155154
public void ProteinGroupsOccupancyByPeptide(List<(string fullSeq, string baseSeq, List<string> proteinGroup, double intensity)> peptides, bool modOnNTerminus = true, bool modOnCTerminus = true, bool ignoreTerminusMod=false)
156155
{
157156
var proteinGroups = new Dictionary<string, UtilProteinGroup>();
@@ -228,11 +227,5 @@ public void ProteinGroupsOccupancyByProtein(Dictionary<string, string> proteinSe
228227
{
229228
throw new NotImplementedException();
230229
}
231-
232-
public void ChangePeptideToProteinOccupancyIndex(string proteinGroupName, string proteinName, string peptide, int OneBasedStartResidue)
233-
{
234-
Occupancy[proteinGroupName].OccupancyLevel = "protein";
235-
Occupancy[proteinGroupName].Proteins[proteinName].Peptides[peptide].PeptideToProteinPositions(OneBasedStartResidue);
236-
}
237230
}
238231
}

mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsv.cs

Lines changed: 8 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
using System.Text.RegularExpressions;
55
using Chemistry;
66
using Omics.Fragmentation.Peptide;
7+
using MzLibUtil;
78

89
namespace Omics.SpectrumMatch
910
{
@@ -92,58 +93,15 @@ public static string RemoveParentheses(string baseSequence)
9293
}
9394

9495
/// <summary>
95-
/// Parses the full sequence to identify mods
96+
/// Parses the full sequence to identify mods.
9697
/// </summary>
97-
/// <param name="fullSequence"> Full sequence of the peptide in question</param>
98+
/// <param name="fullSeq"> Full sequence of the peptide in question</param>
99+
/// <param name="modOnNTerminus"> If true, the index of modifications at the N-terminus will be 0 (zero-based indexing). Otherwise, it is the index of the first amino acid (one-based indexing).</param>
100+
/// <param name="modOnCTerminus"> If true, the index of modifications at the C-terminus will be one more than the index of the last amino acid. Otherwise, it is the index of the last amino acid.</param>
98101
/// <returns> Dictionary with the key being the amino acid position of the mod and the value being the string representing the mod</returns>
99-
public static Dictionary<int, List<string>> ParseModifications(string fullSeq)
102+
public static Dictionary<int, List<string>> ParseModifications(string fullSeq, bool modOnNTerminus = true, bool modOnCTerminus = true)
100103
{
101-
// use a regex to get all modifications
102-
string pattern = @"\[(.+?)\]";
103-
Regex regex = new(pattern);
104-
105-
// remove each match after adding to the dict. Otherwise, getting positions
106-
// of the modifications will be rather difficult.
107-
//int patternMatches = regex.Matches(fullSeq).Count;
108-
Dictionary<int, List<string>> modDict = new();
109-
110-
111-
// If there is a missed cleavage, then there will be a label on K and a Label on X modification.
112-
// It'll be like [label]|[label] which complicates the positional stuff a little bit. Therefore,
113-
// RemoveSpecialCharacters will remove the "|", to ease things later on.
114-
RemoveSpecialCharacters(ref fullSeq);
115-
MatchCollection matches = regex.Matches(fullSeq);
116-
int captureLengthSum = 0;
117-
foreach (Match match in matches)
118-
{
119-
GroupCollection group = match.Groups;
120-
string val = group[1].Value;
121-
int startIndex = group[0].Index;
122-
int captureLength = group[0].Length;
123-
124-
List<string> modList = new List<string>();
125-
modList.Add(val);
126-
127-
// The position of the amino acids is tracked by the positionToAddToDict variable. It takes the
128-
// startIndex of the modification Match and removes the cumulative length of the modifications
129-
// found (including the brackets). The difference will be the number of nonmodification characters,
130-
// or the number of amino acids prior to the startIndex in the sequence.
131-
int positionToAddToDict = startIndex - captureLengthSum;
132-
133-
// check to see if key already exist
134-
// if the already key exists, update the current position with the capture length + 1.
135-
// otherwise, add the modification to the dict.
136-
if (modDict.ContainsKey(positionToAddToDict))
137-
{
138-
modDict[positionToAddToDict].Add(val);
139-
}
140-
else
141-
{
142-
modDict.Add(positionToAddToDict, modList);
143-
}
144-
captureLengthSum += captureLength;
145-
}
146-
return modDict;
104+
return fullSeq.ParseModifications(modOnNTerminus, modOnCTerminus);
147105
}
148106

149107
/// <summary>
@@ -155,9 +113,7 @@ public static Dictionary<int, List<string>> ParseModifications(string fullSeq)
155113
/// <returns></returns>
156114
public static void RemoveSpecialCharacters(ref string fullSeq, string replacement = @"", string specialCharacter = @"\|")
157115
{
158-
// next regex is used in the event that multiple modifications are on a missed cleavage Lysine (K)
159-
Regex regexSpecialChar = new(specialCharacter);
160-
fullSeq = regexSpecialChar.Replace(fullSeq, replacement);
116+
MzLibUtil.ClassExtensions.RemoveSpecialCharacters(ref fullSeq, replacement, specialCharacter);
161117
}
162118

163119

mzLib/Test/AveragingTests/TestAveragingSpectraWriteFile.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,8 @@ public static void TestOutputToCustomDirectoryAndNameMzML()
112112
{
113113
// output to a different directory than the files were originally in
114114
Parameters.OutputType = OutputType.MzML;
115-
string customDestinationDirectory = Path.Combine(OutputDirectory, "NewTestingDirectory");
116-
string customDestinationDirectory2 = Path.Combine(OutputDirectory, "NewTestingDirectory2");
115+
string customDestinationDirectory = Path.Combine(OutputDirectory, "NewAveragedTestingDirectory");
116+
string customDestinationDirectory2 = Path.Combine(OutputDirectory, "NewAveragedTestingDirectory2");
117117
Directory.CreateDirectory(customDestinationDirectory);
118118
string customName = "AveragedSpectra";
119119

mzLib/Test/FileReadingTests/TestPsmFromTsv.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ public static void TestParseModification()
188188

189189
// psm with two mods on the same amino acid
190190
string fullSeq = "[Common Fixed:Carbamidomethyl on C]|[UniProt:N-acetylserine on S]KPRKIEEIKDFLLTARRKDAKSVKIKKNKDNVKFK";
191-
modDict = Omics.SpectrumMatch.SpectrumMatchFromTsv.ParseModifications(fullSeq);
191+
modDict = Omics.SpectrumMatch.SpectrumMatchFromTsv.ParseModifications(fullSeq, true, true);
192192
Assert.That(modDict.Count == 1);
193193
Assert.That(modDict.ContainsKey(0));
194194
Assert.That(modDict[0].Count == 2);

mzLib/Test/TestMzLibUtil.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
using Assert = NUnit.Framework.Legacy.ClassicAssert;
33
using MzLibUtil;
44
using Readers;
5+
using System.Collections.Generic;
56

67
namespace Test
78
{

mzLib/TestFlashLFQ/TestFlashLFQ.cs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1361,6 +1361,12 @@ public static void TestFlashLfqQoutputRealData()
13611361
var peaks = results.Peaks.Values.ToList();
13621362
var peptides = results.PeptideModifiedSequences.Values.ToList();
13631363
var proteins = results.ProteinGroups.Values.ToList();
1364+
var modInfo = results.ModInfo;
1365+
1366+
Assert.AreEqual(6989789.488346225, peptides[0].GetTotalIntensity(), 0.0000001);
1367+
Assert.AreEqual(726036.539062, peptides[4].GetTotalIntensity(), 0.000001);
1368+
Assert.AreEqual(726036.539062, modInfo["Q7KZF4"].Proteins["Q7KZF4"].Peptides["EYGMIYLGK"].ModifiedAminoAcidPositions[4]["Common Variable:Oxidation on M"].Intensity, 0.000001);
1369+
Assert.AreEqual(modInfo["Q7KZF4"].Proteins["Q7KZF4"].Peptides["EYGMIYLGK"].Intensity, modInfo["Q7KZF4"].Proteins["Q7KZF4"].Peptides["EYGMIYLGK"].ModifiedAminoAcidPositions[4]["Common Variable:Oxidation on M"].Intensity, 0.000001);
13641370

13651371
Assert.AreEqual(4, peaks[0].Count(m => m.IsMbrPeak == false));
13661372
Assert.AreEqual(5, peaks[1].Count(m => m.IsMbrPeak == false));

0 commit comments

Comments
 (0)