Skip to content

Commit 68fd8cc

Browse files
committed
modifying utilprotein to track peptide intensity. Need to test. Saving work but untested. WIP.
1 parent 2b60a6b commit 68fd8cc

File tree

2 files changed

+45
-7
lines changed

2 files changed

+45
-7
lines changed

mzLib/MzLibUtil/PositionFrequencyAnalysis.cs

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
using System;
22
using System.Collections.Generic;
3+
using System.Linq;
34
using System.Text.RegularExpressions;
45
using Easy.Common.Extensions;
56

@@ -28,14 +29,16 @@ public class UtilPeptide
2829
public UtilProtein ParentProtein { get; set; }
2930
public int OneBasedStartIndexInProtein { get; set; }
3031
public Dictionary<int, Dictionary<string, UtilModification>> ModifiedAminoAcidPositions { get; set; }
31-
public double Intensity { get; set; }
32+
public double Intensity { get; set; }
33+
public string PositionIndexType { get; set; }
3234

33-
public UtilPeptide(string fullSequence, Dictionary<int, Dictionary<string, UtilModification>> mods = null, int oneBasedStartIndexInProtein = 1, double intensity = 0)
35+
public UtilPeptide(string fullSequence, Dictionary<int, Dictionary<string, UtilModification>> mods = null, int oneBasedStartIndexInProtein = 1, double intensity = 0, string positionIndexType= "peptide")
3436
{
3537
FullSequence = fullSequence;
3638
ModifiedAminoAcidPositions = mods.IsNotNullOrEmpty() ? mods : new Dictionary<int, Dictionary<string, UtilModification>>();
3739
OneBasedStartIndexInProtein = oneBasedStartIndexInProtein;
3840
Intensity = intensity;
41+
PositionIndexType = positionIndexType;
3942
SetBaseSequence();
4043
}
4144
public void SetBaseSequence(string modPattern = @"\[(.+?)\](?<!\[I+\])")
@@ -45,6 +48,7 @@ public void SetBaseSequence(string modPattern = @"\[(.+?)\](?<!\[I+\])")
4548
}
4649
public void PeptideToProteinPositions()
4750
{
51+
PositionIndexType = "protein";
4852
var modificationsToAdd = new Dictionary<int, Dictionary<string, UtilModification>>();
4953
var modificationsToRemove = new List<int>();
5054

@@ -78,6 +82,7 @@ public class UtilProtein
7882
public string Sequence { get; set; }
7983
public Dictionary<string, UtilPeptide> Peptides { get; set; }
8084
public Dictionary<int, Dictionary<string, UtilModification>> ModifiedAminoAcidPositionsInProtein { get; set; }
85+
public Dictionary<int, List<UtilPeptide>> PeptidesByProteinPosition { get; set; }
8186

8287
public UtilProtein(string accession, Dictionary<string, UtilPeptide> peptides=null)
8388
{
@@ -90,27 +95,54 @@ public void SetProteinModsFromPeptides()
9095
{
9196
// for now, this method must be used AFTER peptide mod positions are offsetted to protein positions
9297
ModifiedAminoAcidPositionsInProtein = new Dictionary<int, Dictionary<string, UtilModification>>();
98+
PeptidesByProteinPosition = new Dictionary<int, List<UtilPeptide>>();
99+
93100
foreach (var peptide in Peptides.Values)
94101
{
95-
peptide.PeptideToProteinPositions();
102+
if (peptide.PositionIndexType != "protein")
103+
{
104+
peptide.PeptideToProteinPositions();
105+
}
96106

97107
foreach (var modpos in peptide.ModifiedAminoAcidPositions)
98108
{
99109
if (!ModifiedAminoAcidPositionsInProtein.ContainsKey(modpos.Key))
100110
{
101111
ModifiedAminoAcidPositionsInProtein[modpos.Key] = new Dictionary<string, UtilModification>();
112+
PeptidesByProteinPosition[modpos.Key] = new List<UtilPeptide>();
102113
}
114+
115+
PeptidesByProteinPosition[modpos.Key].Add(peptide);
116+
103117
foreach (var mod in modpos.Value.Values)
104118
{
105119
if (!ModifiedAminoAcidPositionsInProtein[modpos.Key].ContainsKey(mod.IdWithMotif))
106120
{
107121
ModifiedAminoAcidPositionsInProtein[modpos.Key][mod.IdWithMotif] = new UtilModification(mod.IdWithMotif, modpos.Key, 0);
108122
}
109-
ModifiedAminoAcidPositionsInProtein[modpos.Key][mod.IdWithMotif].Intensity += mod.Intensity/peptide.Intensity; // might need to add some magic later to keep stored the mod intensity and the peptide intensity for MM output
123+
ModifiedAminoAcidPositionsInProtein[modpos.Key][mod.IdWithMotif].Intensity += mod.Intensity; // might need to add some magic later to keep stored the mod intensity and the peptide intensity for MM output
110124
}
111125
}
112126
}
113127
}
128+
129+
public Dictionary<int, Dictionary<string, UtilModification>> GetModStoichiometryFromProteinMods()
130+
{
131+
if (ModifiedAminoAcidPositionsInProtein == null)
132+
{
133+
SetProteinModsFromPeptides();
134+
}
135+
136+
var aaModsStoichiometry = ModifiedAminoAcidPositionsInProtein;
137+
foreach (var modpos in aaModsStoichiometry)
138+
{
139+
foreach (var mod in modpos.Value.Values)
140+
{
141+
mod.Intensity = mod.Intensity / PeptidesByProteinPosition[modpos.Key].Select(x => x.Intensity).Sum();
142+
}
143+
}
144+
return aaModsStoichiometry;
145+
}
114146
}
115147

116148
public class UtilProteinGroup

mzLib/Test/TestMzLibUtil.cs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -214,8 +214,9 @@ public void TestUtilClassesForPositionFrequencyAnalysis()
214214
string fullSeq1 = "DMME[Metal:Calcium[II] on E]LVQPSISGVDLDK";
215215
string fullSeq2 = "DM[Common Variable:Oxidation on M]ME[Metal:Sodium[I] on E]LVQPSISGVDLDK";
216216
string fullSeq3 = "DM[Common Variable:Oxidation on M]MELVQPSIC[Common Fixed: Carbamidomethyl on C]SGVDLDK";
217+
string fullSeq4 = "DM[Common Variable:Oxidation on M]MELVQPSICSGVDLDK";
217218

218-
string[] seqs = { fullSeq1, fullSeq2, fullSeq3 };
219+
string[] seqs = { fullSeq1, fullSeq2, fullSeq3, fullSeq4};
219220

220221
var modsForSeqs = seqs.Select(x => x.ParseModifications());
221222
var peptideMods = new List<Dictionary<int, Dictionary<string, UtilModification>>>();
@@ -240,7 +241,7 @@ public void TestUtilClassesForPositionFrequencyAnalysis()
240241
}
241242

242243
var peptides = new Dictionary<string, UtilPeptide>();
243-
int[] peptideStartIndicesInProtein = { 1, 1, 100 };
244+
int[] peptideStartIndicesInProtein = { 1, 1, 100, 100};
244245

245246
for (int i=0; i < seqs.Count(); i++)
246247
{
@@ -250,7 +251,7 @@ public void TestUtilClassesForPositionFrequencyAnalysis()
250251
var protein = new UtilProtein("TestAccession", peptides);
251252
protein.SetProteinModsFromPeptides();
252253

253-
Assert.That(protein.Peptides.Count == 3);
254+
Assert.That(protein.Peptides.Count == 4);
254255
Assert.That(protein.ModifiedAminoAcidPositionsInProtein.ContainsKey(2));
255256
Assert.That(protein.ModifiedAminoAcidPositionsInProtein.ContainsKey(4));
256257
Assert.That(protein.ModifiedAminoAcidPositionsInProtein.ContainsKey(101));
@@ -264,6 +265,11 @@ public void TestUtilClassesForPositionFrequencyAnalysis()
264265
Assert.That(protein.ModifiedAminoAcidPositionsInProtein[101].ContainsKey("Common Variable:Oxidation on M"));
265266
Assert.That(protein.ModifiedAminoAcidPositionsInProtein[110].Count() == 1);
266267
Assert.That(protein.ModifiedAminoAcidPositionsInProtein[110].ContainsKey("Common Fixed: Carbamidomethyl on C"));
268+
269+
//var modStoich = protein.GetModStoichiometryFromProteinMods();
270+
//add more tests here
271+
//Assert.That(modStoich[101].Values.First().Intensity == 1);
272+
267273
}
268274

269275
[Test]

0 commit comments

Comments
 (0)