Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 29 additions & 57 deletions mzLib/FlashLFQ/FlashLfqEngine.cs
Original file line number Diff line number Diff line change
@@ -1,22 +1,21 @@
using Chemistry;
using MathNet.Numerics.Distributions;
using Easy.Common.Extensions;
using FlashLFQ.Interfaces;
using FlashLFQ.IsoTracker;
using FlashLFQ.PEP;
using MassSpectrometry;
using MathNet.Numerics.Statistics;
using MzLibUtil;
using Proteomics.AminoAcidPolymer;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
using System.Runtime.CompilerServices;
using System.IO;
using Easy.Common.Extensions;
using FlashLFQ.PEP;
using FlashLFQ.IsoTracker;
using System.Threading;
using FlashLFQ.Interfaces;
using MassSpectrometry;
using System.Threading.Tasks;

[assembly: InternalsVisibleTo("TestFlashLFQ")]
[assembly: InternalsVisibleTo("Test")]
Expand Down Expand Up @@ -70,6 +69,7 @@ public class FlashLfqEngine
private FlashLfqResults _results;
private readonly List<Identification> _allIdentifications;
private readonly Stopwatch _globalStopwatch;
public bool RnaModeActive { get; private set; } = false; // RNA mode is activated when the charge state is less than 1
#endregion

/// <summary>
Expand Down Expand Up @@ -352,21 +352,17 @@ public FlashLfqResults Run()
/// </summary>
internal void CalculateTheoreticalIsotopeDistributions()
{
ModifiedSequenceToIsotopicDistribution = new Dictionary<string, List<(double, double)>>();
// Find the maximum and minimum charge states across all identifications and set the _chargeStates property
var minChargeState = _allIdentifications.Min(p => p.PrecursorChargeState);
var maxChargeState = _allIdentifications.Max(p => p.PrecursorChargeState);
_chargeStates = Enumerable.Range(minChargeState, (maxChargeState - minChargeState) + 1).ToList();

// calculate averagine (used for isotopic distributions for unknown modifications)
double averageC = 4.9384;
double averageH = 7.7583;
double averageO = 1.4773;
double averageN = 1.3577;
double averageS = 0.0417;
if (minChargeState < 1 || maxChargeState < 1)
{
RnaModeActive = true; //RNA mode activated!!!
}

double averagineMass =
PeriodicTable.GetElement("C").AverageMass * averageC +
PeriodicTable.GetElement("H").AverageMass * averageH +
PeriodicTable.GetElement("O").AverageMass * averageO +
PeriodicTable.GetElement("N").AverageMass * averageN +
PeriodicTable.GetElement("S").AverageMass * averageS;
ModifiedSequenceToIsotopicDistribution = new Dictionary<string, List<(double, double)>>();

// calculate monoisotopic masses and isotopic envelope for the base sequences
foreach (Identification id in _allIdentifications)
Expand All @@ -377,59 +373,39 @@ internal void CalculateTheoreticalIsotopeDistributions()
}

ChemicalFormula formula = id.OptionalChemicalFormula;

var isotopicMassesAndNormalizedAbundances = new List<(double massShift, double abundancence)>();

if(formula is null)
{
formula = new ChemicalFormula();
if (id.BaseSequence.AllSequenceResiduesAreValid())
{
// there are sometimes non-parsable sequences in the base sequence input
formula = new Proteomics.AminoAcidPolymer.Peptide(id.BaseSequence).GetChemicalFormula();
double massDiff = id.MonoisotopicMass;
massDiff -= formula.MonoisotopicMass;
formula = RnaModeActive ?
new Transcriptomics.RNA(id.BaseSequence).GetChemicalFormula() :
new Proteomics.AminoAcidPolymer.Peptide(id.BaseSequence).GetChemicalFormula();

double massDiff = id.MonoisotopicMass - formula.MonoisotopicMass;
if (Math.Abs(massDiff) > 20)
{
double averagines = massDiff / averagineMass;

formula.Add("C", (int)Math.Round(averagines * averageC, 0));
formula.Add("H", (int)Math.Round(averagines * averageH, 0));
formula.Add("O", (int)Math.Round(averagines * averageO, 0));
formula.Add("N", (int)Math.Round(averagines * averageN, 0));
formula.Add("S", (int)Math.Round(averagines * averageS, 0));
var massDiffFormulaFromAveragine = IsotopicDistributionCalculator.GetAveragineFormula(massDiff);
formula.Add(massDiffFormulaFromAveragine);
}
}
else
{
double averagines = id.MonoisotopicMass / averagineMass;

formula.Add("C", (int)Math.Round(averagines * averageC, 0));
formula.Add("H", (int)Math.Round(averagines * averageH, 0));
formula.Add("O", (int)Math.Round(averagines * averageO, 0));
formula.Add("N", (int)Math.Round(averagines * averageN, 0));
formula.Add("S", (int)Math.Round(averagines * averageS, 0));
formula = IsotopicDistributionCalculator.GetAveragineFormula(id.MonoisotopicMass);
}
}

var isotopicDistribution = IsotopicDistribution.GetDistribution(formula, 0.125, 1e-8);

double[] masses = isotopicDistribution.Masses.ToArray();
double[] abundances = isotopicDistribution.Intensities.ToArray();

for (int i = 0; i < masses.Length; i++)
{
masses[i] += (id.MonoisotopicMass - formula.MonoisotopicMass);
}

double highestAbundance = abundances.Max();
int highestAbundanceIndex = Array.IndexOf(abundances, highestAbundance);

for (int i = 0; i < masses.Length; i++)
{
// expected isotopic mass shifts for this peptide
masses[i] -= id.MonoisotopicMass;
// expected isotopic mass shifts for this peptide (relative to the monoisotopic mass)
masses[i] -= formula.MonoisotopicMass;

// normalized abundance of each isotope
abundances[i] /= highestAbundance;
Expand All @@ -444,12 +420,8 @@ internal void CalculateTheoreticalIsotopeDistributions()
ModifiedSequenceToIsotopicDistribution.Add(id.ModifiedSequence, isotopicMassesAndNormalizedAbundances);
}

var minChargeState = _allIdentifications.Min(p => p.PrecursorChargeState);
var maxChargeState = _allIdentifications.Max(p => p.PrecursorChargeState);
_chargeStates = Enumerable.Range(minChargeState, (maxChargeState - minChargeState) + 1).ToList();

var peptideModifiedSequences = _allIdentifications.GroupBy(p => p.ModifiedSequence);
foreach (var identifications in peptideModifiedSequences)
// Set the peakfinding mass for each identification
foreach (var identifications in _allIdentifications.GroupBy(p => p.ModifiedSequence))
{
// isotope where normalized abundance is 1
double mostAbundantIsotopeShift = ModifiedSequenceToIsotopicDistribution[identifications.First().ModifiedSequence]
Expand Down
73 changes: 73 additions & 0 deletions mzLib/FlashLFQ/IsotopicDistributionCalculator.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
using Chemistry;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace FlashLFQ
{
public sealed class IsotopicDistributionCalculator
{
internal static double AveragineMass; // Averagine, for protEINS
internal static double AveratideMass; // Averatide, for ribonucleoTIDES (RNA)

// Magic numbers determined by https://pmc.ncbi.nlm.nih.gov/articles/PMC6166224/
// These number are for peptides/proteins
internal static double AverageC = 4.9384;
internal static double AverageH = 7.7583;
internal static double AverageO = 1.4773;
internal static double AverageN = 1.3577;
internal static double AverageS = 0.0417;

// These numbers are for RNA
internal static double RnaAverageC = 9.5;
internal static double RnaAverageH = 12.75;
internal static double RnaAverageO = 3.75;
internal static double RnaAverageN = 5;

static IsotopicDistributionCalculator()
{
AveragineMass =
PeriodicTable.GetElement("C").AverageMass * AverageC +
PeriodicTable.GetElement("H").AverageMass * AverageH +
PeriodicTable.GetElement("O").AverageMass * AverageO +
PeriodicTable.GetElement("N").AverageMass * AverageN +
PeriodicTable.GetElement("S").AverageMass * AverageS;

AveratideMass =
PeriodicTable.GetElement("C").AverageMass * RnaAverageC +
PeriodicTable.GetElement("H").AverageMass * RnaAverageH +
PeriodicTable.GetElement("O").AverageMass * RnaAverageO +
PeriodicTable.GetElement("N").AverageMass * RnaAverageN;
}

internal static ChemicalFormula GetFormula(bool useAveratideModel)
{
return useAveratideModel ? GetAveratideFormula(AveratideMass) : GetAveragineFormula(AveragineMass);
}

internal static ChemicalFormula GetAveragineFormula(double mass)
{
double averagines = mass / (double)AveragineMass;
ChemicalFormula formula = new ChemicalFormula();
formula.Add("C", (int)Math.Round(averagines * AverageC, 0));
formula.Add("H", (int)Math.Round(averagines * AverageH, 0));
formula.Add("O", (int)Math.Round(averagines * AverageO, 0));
formula.Add("N", (int)Math.Round(averagines * AverageN, 0));
formula.Add("S", (int)Math.Round(averagines * AverageS, 0));
return formula;
}

internal static ChemicalFormula GetAveratideFormula(double mass)
{
double averatides = mass / (double)AveratideMass;
ChemicalFormula formula = new ChemicalFormula();
formula.Add("C", (int)Math.Round(averatides * RnaAverageC, 0));
formula.Add("H", (int)Math.Round(averatides * RnaAverageH, 0));
formula.Add("O", (int)Math.Round(averatides * RnaAverageO, 0));
formula.Add("N", (int)Math.Round(averatides * RnaAverageN, 0));
return formula;
}
}
}
7 changes: 4 additions & 3 deletions mzLib/FlashLFQ/IsotopicEnvelope.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using MassSpectrometry;
using System;
using MassSpectrometry;

namespace FlashLFQ
{
Expand All @@ -17,15 +18,15 @@ public IsotopicEnvelope(IIndexedPeak monoisotopicPeak, int chargeState, double i
{
IndexedPeak = monoisotopicPeak;
ChargeState = chargeState;
Intensity = intensity / chargeState;
Intensity = intensity / Math.Abs(chargeState);
PearsonCorrelation = pearsonCorrelation;
}

public IsotopicEnvelope(IndexedMassSpectralPeak monoisotopicPeak, int chargeState, double intensity, double pearsonCorrelation)
{
IndexedPeak = monoisotopicPeak;
ChargeState = chargeState;
Intensity = intensity / chargeState;
Intensity = intensity / Math.Abs(chargeState);
PearsonCorrelation = pearsonCorrelation;
}

Expand Down
92 changes: 92 additions & 0 deletions mzLib/TestFlashLFQ/TestFlashLFQ.cs
Original file line number Diff line number Diff line change
Expand Up @@ -866,7 +866,7 @@
string peptide = "PEPTIDE";
double intensity = 1e6;

Loaders.LoadElements();

Check warning on line 869 in mzLib/TestFlashLFQ/TestFlashLFQ.cs

View workflow job for this annotation

GitHub Actions / build

'Loaders.LoadElements()' is obsolete: 'This method is obsolete. It happens automatically on first call to periodic table in static constructor'

Check warning on line 869 in mzLib/TestFlashLFQ/TestFlashLFQ.cs

View workflow job for this annotation

GitHub Actions / build

'Loaders.LoadElements()' is obsolete: 'This method is obsolete. It happens automatically on first call to periodic table in static constructor'

// generate mzml file

Expand Down Expand Up @@ -935,7 +935,7 @@
string peptide = "PEPTIDE";
double intensity = 1e6;

Loaders.LoadElements();

Check warning on line 938 in mzLib/TestFlashLFQ/TestFlashLFQ.cs

View workflow job for this annotation

GitHub Actions / build

'Loaders.LoadElements()' is obsolete: 'This method is obsolete. It happens automatically on first call to periodic table in static constructor'

Check warning on line 938 in mzLib/TestFlashLFQ/TestFlashLFQ.cs

View workflow job for this annotation

GitHub Actions / build

'Loaders.LoadElements()' is obsolete: 'This method is obsolete. It happens automatically on first call to periodic table in static constructor'

// generate mzml file

Expand Down Expand Up @@ -1270,6 +1270,98 @@
Assert.That(quantResult.ConditionsWithPeptideSampleQuantities["b"].Count == 2);
}

[Test]
public static void TestFlashLfqQoutputRna()
{
string testDataDirectory = @"\\bison.chem.wisc.edu\share\Users\Nic\RNA\Standards\250529_NewStandards_Better";
string outputDirectory = Path.Combine(testDataDirectory, "FlashLFQ");
Directory.CreateDirectory(outputDirectory);

string psmFile = Path.Combine(testDataDirectory, "2025-07-22-18-23-03\\Task1-RnaSearchTask\\AllOSMs.osmtsv");

SpectraFileInfo f1r1 = new SpectraFileInfo(Path.Combine(testDataDirectory, "RnaStandard_Subset.mzML"), "one", 1, 1, 1);

List<string> acceptableProteinGroupAccessions = new() { "Q7KZF4", "Q15149", "P52298" };

List<Identification> ids = new List<Identification>();
Dictionary<string, ProteinGroup> allProteinGroups = new Dictionary<string, ProteinGroup>();
foreach (string line in File.ReadAllLines(psmFile))
{
var split = line.Split(new char[] { '\t' });

//skip the header
if (split.Contains("File Name") || string.IsNullOrWhiteSpace(line))
{
continue;
}

SpectraFileInfo file = f1r1;

string baseSequence = split[13];
string fullSequence = split[14];
double monoMass = double.Parse(split[23]);
double rt = double.Parse(split[2]);
int z = (int)double.Parse(split[6]);
var proteinSubset = split[26].Split(new char[] { '|' });
List<ProteinGroup> proteinGroups = new();


foreach (var protein in proteinSubset)
{
if (allProteinGroups.TryGetValue(protein, out var proteinGroup))
{
proteinGroups.Add(proteinGroup);
}
else
{
allProteinGroups.Add(protein, new ProteinGroup(protein, "", ""));
proteinGroups.Add(allProteinGroups[protein]);
}
}

Identification id = new Identification(file, baseSequence, fullSequence, monoMass, rt, z, proteinGroups);
ids.Add(id);
}

var engine = new FlashLfqEngine(ids,
matchBetweenRuns: true,
requireMsmsIdInCondition: false,
useSharedPeptidesForProteinQuant: true,
maxThreads: 1);
var results = engine.Run();

results.WriteResults(Path.Combine(outputDirectory, "peaks.tsv"), Path.Combine(outputDirectory, "peptides.tsv"), Path.Combine(outputDirectory, "proteins.tsv"), Path.Combine(outputDirectory, "bayesian.tsv"), true);

var peaks = results.Peaks.Values.ToList();
var peptides = results.PeptideModifiedSequences.Values.ToList();
var proteins = results.ProteinGroups.Values.ToList();

Assert.AreEqual(4, peaks[0].Count(m => m.DetectionType != DetectionType.MBR));
Assert.AreEqual(5, peaks[1].Count(m => m.DetectionType != DetectionType.MBR));

CollectionAssert.AreEquivalent(new string[] { "Q7KZF4", "Q7KZF4", "P52298", "Q15149", "Q15149" }, peaks[0].SelectMany(i => i.Identifications).Select(g => g.ProteinGroups.First()).Select(m => m.ProteinGroupName).ToArray());
CollectionAssert.AreEquivalent(new string[] { "Q7KZF4", "P52298", "Q15149", "Q15149", "Q7KZF4", "Q7KZF4", "P52298" }, peaks[1].SelectMany(i => i.Identifications).Select(g => g.ProteinGroups.First()).Select(m => m.ProteinGroupName).ToArray());

Assert.AreEqual(6, peptides.Count);
CollectionAssert.AreEquivalent(new string[] { "Q7KZF4", "P52298", "Q15149", "Q15149", "Q7KZF4", "P52298" }, peptides.Select(g => g.ProteinGroups.First()).Select(m => m.ProteinGroupName).ToArray());

Assert.AreEqual(3, proteins.Count);

List<string> peaksList = File.ReadAllLines(Path.Combine(outputDirectory, "peaks.tsv")).ToList();
List<string> peptidesList = File.ReadAllLines(Path.Combine(outputDirectory, "peptides.tsv")).ToList();
List<string> proteinsList = File.ReadAllLines(Path.Combine(outputDirectory, "proteins.tsv")).ToList();

//check that all rows including header have the same number of elements
Assert.AreEqual(1, peaksList.Select(l => l.Split('\t').Length).Distinct().ToList().Count);
Assert.AreEqual(1, peptidesList.Select(l => l.Split('\t').Length).Distinct().ToList().Count);
Assert.AreEqual(1, proteinsList.Select(l => l.Split('\t').Length).Distinct().ToList().Count);

CollectionAssert.AreEquivalent(new string[] { "P52298", "Q15149", "Q7KZF4" }, proteins.Select(p => p.ProteinGroupName.ToArray()));

Directory.Delete(outputDirectory, true);
}


[Test]
public static void TestFlashLfqQoutputRealData()
{
Expand Down
2 changes: 1 addition & 1 deletion mzLib/mzLib.nuspec
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<package>
<metadata>
<id>mzLib</id>
<version>1.0.559</version>
<version>8.1.8</version>
<title>mzLib</title>
<authors>Stef S.</authors>
<owners>Stef S.</owners>
Expand Down
Loading