|
| 1 | +using Easy.Common.Extensions; |
| 2 | +using System.Collections.Generic; |
| 3 | + |
| 4 | +namespace MzLibUtil.PositionFrequencyAnalysis |
| 5 | +{ |
| 6 | + public class PositionFrequencyAnalysis |
| 7 | + { |
| 8 | + public Dictionary<string, QuantifiedProteinGroup> ProteinGroups { get; private set; } |
| 9 | + |
| 10 | + //public Dictionary<string, (QuantifiedPeptide QuantifiedPeptide, string ProteinGroups)> Peptides { get; private set; } |
| 11 | + |
| 12 | + /// <summary> |
| 13 | + /// Calculates the occupancy of post-translational modifications at the peptide level. |
| 14 | + /// </summary> |
| 15 | + /// <param name="peptides"> A List of Tuples whose entries are ordered as (string FullSequence, string BaseSequence, List<string> ProteinGroups, Intensity) for each peptide.</param> |
| 16 | + /// <returns> A nested dictionary whose key mappings are as follows: string ProteinGroup-> string Protein-> string BaseSequence-> int ModifiedAminoAcidIndex-> string ModificationName-> double Intensity |
| 17 | + /// Note: Each BaseSequence dictionary contains a ModifiedAminoAcidIndex key of -1 that then contains a ModificationName key called "Total" that is used to track the total intensity observed for |
| 18 | + /// all of the amino acids in that peptide.</returns> |
| 19 | + /// |
| 20 | + public void SetUpQuantificationObjectsFromFullSequences(List<(string fullSeq, List<string> proteinGroups, double intensity)> peptides, Dictionary<string, string> proteinSequences=null) |
| 21 | + { |
| 22 | + ProteinGroups = new Dictionary<string, QuantifiedProteinGroup>(); |
| 23 | + |
| 24 | + // Go through the peptides given |
| 25 | + foreach (var pep in peptides) |
| 26 | + { |
| 27 | + string baseSeq = pep.fullSeq.GetBaseSequenceFromFullSequence(); |
| 28 | + |
| 29 | + // Go through the peptide's protein groups |
| 30 | + foreach (var pg in pep.proteinGroups) |
| 31 | + { |
| 32 | + // If have not seen that protein group, store it |
| 33 | + if (!ProteinGroups.ContainsKey(pg)) |
| 34 | + { |
| 35 | + ProteinGroups[pg] = new QuantifiedProteinGroup(pg); |
| 36 | + } |
| 37 | + var proteinGroup = ProteinGroups[pg]; |
| 38 | + |
| 39 | + // Go through the proteins in each protein group |
| 40 | + foreach (var proteinName in pg.Split('|')) |
| 41 | + { |
| 42 | + // Add the protein to the protein group's dictionary if it has not been added |
| 43 | + if (!proteinGroup.Proteins.ContainsKey(proteinName)) |
| 44 | + { |
| 45 | + proteinGroup.Proteins[proteinName] = new QuantifiedProtein(proteinName); |
| 46 | + if (proteinSequences.IsNotNullOrEmpty() && proteinSequences.ContainsKey(proteinName)) |
| 47 | + { |
| 48 | + proteinGroup.Proteins[proteinName].Sequence = proteinSequences[proteinName]; |
| 49 | + } |
| 50 | + } |
| 51 | + var protein = proteinGroup.Proteins[proteinName]; |
| 52 | + |
| 53 | + // If the peptide's base sequence has not been seen, add it to the protein's dictionary |
| 54 | + if (!protein.Peptides.ContainsKey(baseSeq)) |
| 55 | + { |
| 56 | + protein.Peptides[baseSeq] = new QuantifiedPeptide(pep.fullSeq, intensity: pep.intensity); |
| 57 | + } |
| 58 | + else |
| 59 | + { |
| 60 | + // If the peptide's base sequence has been seen, add the new full sequence to the existing peptide |
| 61 | + protein.Peptides[baseSeq].AddFullSequence(pep.fullSeq, intensity: pep.intensity); |
| 62 | + } |
| 63 | + } |
| 64 | + } |
| 65 | + } |
| 66 | + } |
| 67 | + } |
| 68 | +} |
0 commit comments