diff --git a/mzLib/Chemistry/ChemicalFormula.cs b/mzLib/Chemistry/ChemicalFormula.cs index a652f954f..dc31d9cc0 100644 --- a/mzLib/Chemistry/ChemicalFormula.cs +++ b/mzLib/Chemistry/ChemicalFormula.cs @@ -33,7 +33,7 @@ namespace Chemistry /// Formula can change!!! If isotopes or elements are changed. /// [Serializable] - public sealed class ChemicalFormula : IEquatable, IHasChemicalFormula + public sealed class ChemicalFormula : IEquatable, IEquatable, IHasChemicalFormula { // Main data stores, the isotopes and elements @@ -487,6 +487,8 @@ public bool Equals(ChemicalFormula other) return true; } + public bool Equals(IHasChemicalFormula other) => Equals(other?.ThisChemicalFormula); + /// /// Produces the Hill Notation of the chemical formula /// diff --git a/mzLib/Readers/InternalResults/IndividualResultRecords/OsmFromTsv.cs b/mzLib/Readers/InternalResults/IndividualResultRecords/OsmFromTsv.cs index d2bba2d1e..72a1d1e6e 100644 --- a/mzLib/Readers/InternalResults/IndividualResultRecords/OsmFromTsv.cs +++ b/mzLib/Readers/InternalResults/IndividualResultRecords/OsmFromTsv.cs @@ -1,24 +1,46 @@ -using Omics.SpectrumMatch; +using Chemistry; +using Omics.SpectrumMatch; +using Transcriptomics; +using Transcriptomics.Digestion; namespace Readers { public class OsmFromTsv : SpectrumMatchFromTsv { + public IHasChemicalFormula FivePrimeTerminus { get; set; } + public IHasChemicalFormula ThreePrimeTerminus { get; set; } + public OsmFromTsv(string line, char[] split, Dictionary parsedHeader) : base(line, split, parsedHeader) { - // TODO: Parse Oligo specific columns + var spl = line.Split(split).Select(p => p.Trim('\"')).ToArray(); + + if (parsedHeader[SpectrumMatchFromTsvHeader.FivePrimeTerminus] >= 0) + FivePrimeTerminus = ChemicalFormula.ParseFormula(spl[parsedHeader[SpectrumMatchFromTsvHeader.FivePrimeTerminus]]); + else if (PreviousResidue == "-") + FivePrimeTerminus = NucleicAcid.DefaultFivePrimeTerminus; + else + FivePrimeTerminus = Rnase.DefaultFivePrimeTerminus; + + if (parsedHeader[SpectrumMatchFromTsvHeader.ThreePrimeTerminus] >= 0) + ThreePrimeTerminus = ChemicalFormula.ParseFormula(spl[parsedHeader[SpectrumMatchFromTsvHeader.ThreePrimeTerminus]]); + else if (NextResidue == "-") + ThreePrimeTerminus = NucleicAcid.DefaultThreePrimeTerminus; + else + ThreePrimeTerminus = Rnase.DefaultThreePrimeTerminus; } /// - /// Constructor used to disambiguate PsmFromTsv to a single psm object + /// Constructor used to disambiguate PsmFromTsv to a single osm object /// - /// psm to disambiguate - /// sequence of ambiguous psm to use - public OsmFromTsv(OsmFromTsv psm, string fullSequence, int index = 0, string baseSequence = "") - : base(psm, fullSequence, index, baseSequence) + /// osm to disambiguate + /// sequence of ambiguous osm to use + public OsmFromTsv(OsmFromTsv osm, string fullSequence, int index = 0, string baseSequence = "", + IHasChemicalFormula? fivePrimeTerm = null, IHasChemicalFormula? threePrimeTerm = null) + : base(osm, fullSequence, index, baseSequence) { - // TODO: Parse Oligo specific columns + FivePrimeTerminus = fivePrimeTerm ?? osm.FivePrimeTerminus; + ThreePrimeTerminus = threePrimeTerm ?? osm.ThreePrimeTerminus; } } } diff --git a/mzLib/Readers/InternalResults/IndividualResultRecords/SpectrumMatchFromTsvHeader.cs b/mzLib/Readers/InternalResults/IndividualResultRecords/SpectrumMatchFromTsvHeader.cs index 7ca469da5..d850a79a5 100644 --- a/mzLib/Readers/InternalResults/IndividualResultRecords/SpectrumMatchFromTsvHeader.cs +++ b/mzLib/Readers/InternalResults/IndividualResultRecords/SpectrumMatchFromTsvHeader.cs @@ -116,5 +116,9 @@ public class SpectrumMatchFromTsvHeader public const string AllPotentialGlycanLocalization = "All Potential Glycan Localizations"; public const string AllSiteSpecificLocalizationProbability = "All SiteSpecific Localization Probability"; public const string LocalizationScore = "Localization Score"; + + // Oligo + public const string FivePrimeTerminus = "5'-Terminus"; + public const string ThreePrimeTerminus = "3'-Terminus"; } } diff --git a/mzLib/Readers/InternalResults/ResultFiles/SpectrumMatchTsvReader.cs b/mzLib/Readers/InternalResults/ResultFiles/SpectrumMatchTsvReader.cs index be639b4d6..2d265307c 100644 --- a/mzLib/Readers/InternalResults/ResultFiles/SpectrumMatchTsvReader.cs +++ b/mzLib/Readers/InternalResults/ResultFiles/SpectrumMatchTsvReader.cs @@ -215,7 +215,7 @@ public static Dictionary ParseHeader(string header) parsedHeader.Add(SpectrumMatchFromTsvHeader.ParentIonsLabel, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.ParentIonsLabel)); parsedHeader.Add(SpectrumMatchFromTsvHeader.Ms2ScanRetentionTime, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.Ms2ScanRetentionTime)); - + // Glyco parsedHeader.Add(SpectrumMatchFromTsvHeader.GlycanMass, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.GlycanMass)); parsedHeader.Add(SpectrumMatchFromTsvHeader.GlycanStructure, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.GlycanStructure)); parsedHeader.Add(SpectrumMatchFromTsvHeader.GlycanComposition, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.GlycanComposition)); @@ -247,6 +247,11 @@ public static Dictionary ParseHeader(string header) { parsedHeader[SpectrumMatchFromTsvHeader.AllSiteSpecificLocalizationProbability] = Array.IndexOf(spl, "AllSiteSpecificLocalizationProbability"); } + + // Oligo + parsedHeader.Add(SpectrumMatchFromTsvHeader.FivePrimeTerminus, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.FivePrimeTerminus)); + parsedHeader.Add(SpectrumMatchFromTsvHeader.ThreePrimeTerminus, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.ThreePrimeTerminus)); + return parsedHeader; } diff --git a/mzLib/Test/TestChemicalFormula.cs b/mzLib/Test/TestChemicalFormula.cs index 843115782..3254f826a 100644 --- a/mzLib/Test/TestChemicalFormula.cs +++ b/mzLib/Test/TestChemicalFormula.cs @@ -374,12 +374,42 @@ public static void FormulaValueEquality() } [Test] - public static void FormulaEquality() + public static void FormulaEquality_AreEqual_SameReference() { ChemicalFormula formulaA = ChemicalFormula.ParseFormula("C2H3NO"); Assert.AreEqual(formulaA, formulaA); } + [Test] + public static void FormulaEquality_AreEqual_DifferentReference() + { + ChemicalFormula formulaA = ChemicalFormula.ParseFormula("C2H3NO"); + ChemicalFormula formulaB = ChemicalFormula.ParseFormula("C2H3NO"); + Assert.AreEqual(formulaA, formulaB); + } + + [Test] + public static void FormulaEquality_Rearranged() + { + ChemicalFormula formulaA = ChemicalFormula.ParseFormula("C2H3NO"); + ChemicalFormula formulaB = ChemicalFormula.ParseFormula("H3NOC2"); + Assert.AreEqual(formulaA, formulaB); + } + + [Test] + public static void FormulaEquality_null() + { + ChemicalFormula formulaA = ChemicalFormula.ParseFormula("C2H3NO"); + Assert.AreNotEqual(formulaA, null); + } + + [Test] + public static void FormulaEquality_OtherType() + { + ChemicalFormula formulaA = ChemicalFormula.ParseFormula("C2H3NO"); + Assert.AreNotEqual(formulaA, "C2H3NO"); + } + [Test] public static void FormulaAlmostEquality() { @@ -388,6 +418,30 @@ public static void FormulaAlmostEquality() Assert.AreNotEqual(formulaA, formulaB); } + [Test] + public static void FormulaEquality_AreEqual_DifferentReference_Interface() + { + ChemicalFormula formulaA = ChemicalFormula.ParseFormula("C2H3NO"); + IHasChemicalFormula formulaB = ChemicalFormula.ParseFormula("C2H3NO"); + Assert.IsTrue(formulaA.Equals(formulaB)); + } + + [Test] + public static void FormulaEquality_Rearranged_Interface() + { + ChemicalFormula formulaA = ChemicalFormula.ParseFormula("C2H3NO"); + IHasChemicalFormula formulaB = ChemicalFormula.ParseFormula("H3NOC2"); + Assert.IsTrue(formulaA.Equals(formulaB)); + } + + [Test] + public static void FormulaAlmostEquality_Interface() + { + ChemicalFormula formulaA = ChemicalFormula.ParseFormula("C2H3NO"); + IHasChemicalFormula formulaB = ChemicalFormula.ParseFormula("C{12}2H3NO"); + Assert.IsFalse(formulaA.Equals(formulaB)); + } + [Test] public static void HashCodeEquality() { diff --git a/mzLib/Test/Transcriptomics/TestOsmReading.cs b/mzLib/Test/Transcriptomics/TestOsmReading.cs index 34554fe58..ee016e7f5 100644 --- a/mzLib/Test/Transcriptomics/TestOsmReading.cs +++ b/mzLib/Test/Transcriptomics/TestOsmReading.cs @@ -7,6 +7,8 @@ using System.Linq; using Omics.Fragmentation.Oligo; using Chemistry; +using Transcriptomics; +using Transcriptomics.Digestion; namespace Test.Transcriptomics; @@ -45,8 +47,8 @@ public static IEnumerable IonTestCases [Test] public static void LoadsWithoutCrashing_OsmSpecific() { - List errors = []; - List results = []; + List errors = new(); + List results = new(); Assert.DoesNotThrow(() => results = SpectrumMatchTsvReader.ReadOsmTsv(OsmFilePath, out errors)); Assert.That(errors.Count, Is.EqualTo(0)); Assert.That(results.Count, Is.EqualTo(6)); @@ -55,8 +57,8 @@ public static void LoadsWithoutCrashing_OsmSpecific() [Test] public static void LoadsWithoutCrashing_Generic() { - List errors = []; - List results = []; + List errors = new(); + List results = new(); Assert.DoesNotThrow(() => results = SpectrumMatchTsvReader.ReadTsv(OsmFilePath, out errors)); Assert.That(errors.Count, Is.EqualTo(0)); Assert.That(results.Count, Is.EqualTo(6)); @@ -77,7 +79,7 @@ public static void MatchedFragmentIonProperties_AreCorrectlySet_Case( FragmentationTerminus terminus ) { - List errors = []; + List errors = new(); var results = SpectrumMatchTsvReader.ReadOsmTsv(OsmFilePath, out errors); Assert.That(results.Count, Is.GreaterThan(0), "No results loaded from OSM TSV file."); @@ -103,4 +105,314 @@ FragmentationTerminus terminus Assert.That(Math.Round(ion.NeutralTheoreticalProduct.NeutralLoss, 2), Is.EqualTo(neutralLoss).Within(0.01), $"NeutralLoss not set correctly for {annotation}."); Assert.That(ion.NeutralTheoreticalProduct.Terminus, Is.EqualTo(terminus), $"Terminus not set correctly for {annotation}."); } + + [Test] + public static void TerminusProperties_AssignedCorrectly_WhenPreviousAndNextResiduesAreTerminal() + { + // Test that FivePrimeTerminus and ThreePrimeTerminus are correctly assigned based on PreviousResidue/NextResidue + List errors = new(); + var results = SpectrumMatchTsvReader.ReadOsmTsv(OsmFilePath, out errors); + + Assert.That(results.Count, Is.GreaterThan(0), "No results loaded from OSM TSV file."); + + foreach (var result in results) + { + // FivePrimeTerminus: if PreviousResidue == "-" => NucleicAcid.DefaultFivePrimeTerminus, else Rnase.DefaultFivePrimeTerminus + string actualFive = result.FivePrimeTerminus.ThisChemicalFormula.Formula; + string expectedFive = result.PreviousResidue == "-" + ? NucleicAcid.DefaultFivePrimeTerminus.Formula + : Rnase.DefaultFivePrimeTerminus.ThisChemicalFormula.Formula; + Assert.That(actualFive, Is.EqualTo(expectedFive), + $"FivePrimeTerminus mismatch for scan {result.Ms2ScanNumber}. Expected: {expectedFive}, Got: {actualFive}"); + + // ThreePrimeTerminus: if NextResidue == "-" => NucleicAcid.DefaultThreePrimeTerminus, else Rnase.DefaultThreePrimeTerminus + string actualThree = result.ThreePrimeTerminus.ThisChemicalFormula.Formula; + string expectedThree = result.NextResidue == "-" + ? NucleicAcid.DefaultThreePrimeTerminus.Formula + : Rnase.DefaultThreePrimeTerminus.ThisChemicalFormula.Formula; + Assert.That(actualThree, Is.EqualTo(expectedThree), + $"ThreePrimeTerminus mismatch for scan {result.Ms2ScanNumber}. Expected: {expectedThree}, Got: {actualThree}"); + } + } + + [Test] + public static void TerminusProperties_NotNull_ForAllResults() + { + // Ensure terminus properties are always set and never null + List errors = new(); + var results = SpectrumMatchTsvReader.ReadOsmTsv(OsmFilePath, out errors); + + Assert.That(results.Count, Is.GreaterThan(0)); + + foreach (var result in results) + { + Assert.That(result.FivePrimeTerminus, Is.Not.Null, + $"FivePrimeTerminus is null for scan {result.Ms2ScanNumber}"); + Assert.That(result.ThreePrimeTerminus, Is.Not.Null, + $"ThreePrimeTerminus is null for scan {result.Ms2ScanNumber}"); + Assert.That(result.FivePrimeTerminus.ThisChemicalFormula, Is.Not.Null, + $"FivePrimeTerminus.ThisChemicalFormula is null for scan {result.Ms2ScanNumber}"); + Assert.That(result.ThreePrimeTerminus.ThisChemicalFormula, Is.Not.Null, + $"ThreePrimeTerminus.ThisChemicalFormula is null for scan {result.Ms2ScanNumber}"); + } + } + + [Test] + public static void DisambiguatingConstructor_PreservesTerminusProperties() + { + // Test the disambiguating constructor (OsmFromTsv(OsmFromTsv osm, string fullSequence, ...)) + List errors = new(); + var results = SpectrumMatchTsvReader.ReadOsmTsv(OsmFilePath, out errors); + var originalOsm = results.First(); + + // Test 1: Constructor without explicit terminus parameters (should copy from original) + var clonedOsm = new OsmFromTsv(originalOsm, originalOsm.FullSequence); + + Assert.That(clonedOsm.FivePrimeTerminus.ThisChemicalFormula.Formula, + Is.EqualTo(originalOsm.FivePrimeTerminus.ThisChemicalFormula.Formula), + "Cloned OSM should preserve FivePrimeTerminus when not explicitly provided"); + Assert.That(clonedOsm.ThreePrimeTerminus.ThisChemicalFormula.Formula, + Is.EqualTo(originalOsm.ThreePrimeTerminus.ThisChemicalFormula.Formula), + "Cloned OSM should preserve ThreePrimeTerminus when not explicitly provided"); + } + + [Test] + public static void DisambiguatingConstructor_OverridesTerminusProperties_WhenProvided() + { + // Test the disambiguating constructor with explicit terminus parameters + List errors = new(); + var results = SpectrumMatchTsvReader.ReadOsmTsv(OsmFilePath, out errors); + var originalOsm = results.First(); + + // Create custom terminus formulas + var customFivePrime = ChemicalFormula.ParseFormula("H2O"); + var customThreePrime = ChemicalFormula.ParseFormula("PO4"); + + // Test 2: Constructor with explicit terminus parameters (should override original) + var customOsm = new OsmFromTsv(originalOsm, originalOsm.FullSequence, + fivePrimeTerm: customFivePrime, threePrimeTerm: customThreePrime); + + Assert.That(customOsm.FivePrimeTerminus.ThisChemicalFormula.Formula, + Is.EqualTo(customFivePrime.Formula), + "Custom FivePrimeTerminus should override original"); + Assert.That(customOsm.ThreePrimeTerminus.ThisChemicalFormula.Formula, + Is.EqualTo(customThreePrime.Formula), + "Custom ThreePrimeTerminus should override original"); + } + + [Test] + public static void OsmFromTsvFile_LoadsCorrectly() + { + // Test loading via OsmFromTsvFile + var osmFile = new OsmFromTsvFile(OsmFilePath); + osmFile.LoadResults(); + + Assert.That(osmFile.Results, Is.Not.Null); + Assert.That(osmFile.Results.Count, Is.EqualTo(6)); + Assert.That(osmFile.FileType, Is.EqualTo(SupportedFileType.osmtsv)); + + // Verify terminus properties are set for all results + foreach (var result in osmFile.Results) + { + Assert.That(result.FivePrimeTerminus, Is.Not.Null); + Assert.That(result.ThreePrimeTerminus, Is.Not.Null); + } + } + + [Test] + public static void TerminalCapFormulasAreConsistent() + { + string expectedNucleicAcidFivePrime = "O-3P-1"; + Assert.That(NucleicAcid.DefaultFivePrimeTerminus.Formula, Is.EqualTo(expectedNucleicAcidFivePrime), + "NucleicAcid.DefaultFivePrimeTerminus formula has changed unexpectedly."); + + string expectedNucleicAcidThreePrime = "HO"; + Assert.That(NucleicAcid.DefaultThreePrimeTerminus.Formula, Is.EqualTo(expectedNucleicAcidThreePrime), + "NucleicAcid.DefaultThreePrimeTerminus formula has changed unexpectedly."); + + string expectedRnaseFivePrime = "O-3P-1"; + Assert.That(Rnase.DefaultFivePrimeTerminus.ThisChemicalFormula.Formula, Is.EqualTo(expectedRnaseFivePrime), + "Rnase.DefaultFivePrimeTerminus formula has changed unexpectedly."); + + string expectedRnaseThreePrime = "H2O4P"; + Assert.That(Rnase.DefaultThreePrimeTerminus.ThisChemicalFormula.Formula, Is.EqualTo(expectedRnaseThreePrime), + "Rnase.DefaultThreePrimeTerminus formula has changed unexpectedly."); + } + + [Test] + public static void TerminusProperties_MatchExpectedFormulas() + { + // Verify the specific chemical formulas match expected values + List errors = new(); + var results = SpectrumMatchTsvReader.ReadOsmTsv(OsmFilePath, out errors); + + // Expected formulas from the static properties + IHasChemicalFormula expectedNucleicAcidFivePrime = NucleicAcid.DefaultFivePrimeTerminus; + IHasChemicalFormula expectedNucleicAcidThreePrime = NucleicAcid.DefaultThreePrimeTerminus; + IHasChemicalFormula expectedRnaseFivePrime = Rnase.DefaultFivePrimeTerminus; + IHasChemicalFormula expectedRnaseThreePrime = Rnase.DefaultThreePrimeTerminus; + + foreach (var result in results) + { + if (result.PreviousResidue == "-") + { + Assert.That(result.FivePrimeTerminus.ThisChemicalFormula, + Is.EqualTo(expectedNucleicAcidFivePrime), + $"Terminal oligo (PreviousResidue='-') should have NucleicAcid.DefaultFivePrimeTerminus for scan {result.Ms2ScanNumber}"); + } + else + { + Assert.That(result.FivePrimeTerminus.ThisChemicalFormula, + Is.EqualTo(expectedRnaseFivePrime), + $"Internal oligo (PreviousResidue!='-') should have Rnase.DefaultFivePrimeTerminus for scan {result.Ms2ScanNumber}"); + } + + if (result.NextResidue == "-") + { + Assert.That(result.ThreePrimeTerminus.ThisChemicalFormula, + Is.EqualTo(expectedNucleicAcidThreePrime), + $"Terminal oligo (NextResidue='-') should have NucleicAcid.DefaultThreePrimeTerminus for scan {result.Ms2ScanNumber}"); + } + else + { + Assert.That(result.ThreePrimeTerminus.ThisChemicalFormula, + Is.EqualTo(expectedRnaseThreePrime), + $"Internal oligo (NextResidue!='-') should have Rnase.DefaultThreePrimeTerminus for scan {result.Ms2ScanNumber}"); + } + } + } + + [Test] + public static void SpectrumMatchTsvReader_ParsesTerminusHeadersCorrectly() + { + // Verify that SpectrumMatchTsvReader.ParseHeader correctly identifies terminus columns + string testHeader = "File Name\tScan Number\tFull Sequence\t5'-Terminus\t3'-Terminus\tPrevious Residue\tNext Residue"; + var parsedHeader = SpectrumMatchTsvReader.ParseHeader(testHeader); + + Assert.That(parsedHeader.ContainsKey(SpectrumMatchFromTsvHeader.FivePrimeTerminus), Is.True, + "ParseHeader should recognize 5'-Terminus column"); + Assert.That(parsedHeader.ContainsKey(SpectrumMatchFromTsvHeader.ThreePrimeTerminus), Is.True, + "ParseHeader should recognize 3'-Terminus column"); + + Assert.That(parsedHeader[SpectrumMatchFromTsvHeader.FivePrimeTerminus], Is.EqualTo(3), + "5'-Terminus should be at index 3"); + Assert.That(parsedHeader[SpectrumMatchFromTsvHeader.ThreePrimeTerminus], Is.EqualTo(4), + "3'-Terminus should be at index 4"); + } + + [Test] + public static void SpectrumMatchTsvReader_HandlesAbsentTerminusHeaders() + { + // Verify that ParseHeader handles missing terminus columns correctly + string testHeader = "File Name\tScan Number\tFull Sequence\tPrevious Residue\tNext Residue"; + var parsedHeader = SpectrumMatchTsvReader.ParseHeader(testHeader); + + Assert.That(parsedHeader.ContainsKey(SpectrumMatchFromTsvHeader.FivePrimeTerminus), Is.True, + "ParseHeader should include FivePrimeTerminus key even when column absent"); + Assert.That(parsedHeader.ContainsKey(SpectrumMatchFromTsvHeader.ThreePrimeTerminus), Is.True, + "ParseHeader should include ThreePrimeTerminus key even when column absent"); + + Assert.That(parsedHeader[SpectrumMatchFromTsvHeader.FivePrimeTerminus], Is.EqualTo(-1), + "Missing 5'-Terminus column should have index -1"); + Assert.That(parsedHeader[SpectrumMatchFromTsvHeader.ThreePrimeTerminus], Is.EqualTo(-1), + "Missing 3'-Terminus column should have index -1"); + } + + [Test] + public static void OSM_ParsesTerminusCorrectly_NotInOsmFile_FullTranscript() + { + var osmLines = File.ReadAllLines(OsmFilePath); + var header = osmLines[0]; + var parsedHeader = SpectrumMatchTsvReader.ParseHeader(header); + var firstDataLine = osmLines[1]; + var spl = firstDataLine.Split('\t').Select(p => p.Trim('\"')).ToArray(); + + // Ensure prev and next residues match a full transcript + spl[parsedHeader[SpectrumMatchFromTsvHeader.PreviousResidue]] = "-"; + spl[parsedHeader[SpectrumMatchFromTsvHeader.NextResidue]] = "-"; + firstDataLine = string.Join("\t", spl); + + var osm = new OsmFromTsv(firstDataLine, ['\t'], parsedHeader); + Assert.That(osm.ThreePrimeTerminus, Is.EqualTo(NucleicAcid.DefaultThreePrimeTerminus), + "Full transcript should have NucleicAcid.DefaultThreePrimeTerminus"); + Assert.That(osm.FivePrimeTerminus, Is.EqualTo(NucleicAcid.DefaultFivePrimeTerminus), + "Full transcript should have NucleicAcid.DefaultFivePrimeTerminus"); + } + + [Test] + public static void OSM_ParsesTerminusCorrectly_NotInOsmFile_TerminalOligo() + { + var osmLines = File.ReadAllLines(OsmFilePath); + var header = osmLines[0]; + var parsedHeader = SpectrumMatchTsvReader.ParseHeader(header); + var firstDataLine = osmLines[1]; + var spl = firstDataLine.Split('\t').Select(p => p.Trim('\"')).ToArray(); + + // Ensure prev and next residues match a 5'-terminal oligo + spl[parsedHeader[SpectrumMatchFromTsvHeader.PreviousResidue]] = "-"; + spl[parsedHeader[SpectrumMatchFromTsvHeader.NextResidue]] = "A"; + firstDataLine = string.Join("\t", spl); + + var osm = new OsmFromTsv(firstDataLine, ['\t'], parsedHeader); + Assert.That(osm.FivePrimeTerminus, Is.EqualTo(NucleicAcid.DefaultFivePrimeTerminus), + "5'-terminal oligo should have NucleicAcid.DefaultFivePrimeTerminus"); + Assert.That(osm.ThreePrimeTerminus, Is.EqualTo(Rnase.DefaultThreePrimeTerminus), + "5'-terminal oligo should have Rnase.DefaultThreePrimeTerminus"); + + // ensure prev and next residues match a 3'-terminal oligo + spl[parsedHeader[SpectrumMatchFromTsvHeader.PreviousResidue]] = "U"; + spl[parsedHeader[SpectrumMatchFromTsvHeader.NextResidue]] = "-"; + firstDataLine = string.Join("\t", spl); + + osm = new OsmFromTsv(firstDataLine, ['\t'], parsedHeader); + Assert.That(osm.FivePrimeTerminus, Is.EqualTo(Rnase.DefaultFivePrimeTerminus), + "5'-terminal oligo should have Rnase.DefaultFivePrimeTerminus"); + Assert.That(osm.ThreePrimeTerminus, Is.EqualTo(NucleicAcid.DefaultThreePrimeTerminus), + "3'-terminal oligo should have NucleicAcid.DefaultThreePrimeTerminus"); + } + + [Test] + public static void OSM_ParsesTerminusCorrectly_NotInOsmFile_InternalOligo() + { + var osmLines = File.ReadAllLines(OsmFilePath); + var header = osmLines[0]; + var parsedHeader = SpectrumMatchTsvReader.ParseHeader(header); + var firstDataLine = osmLines[1]; + var spl = firstDataLine.Split('\t').Select(p => p.Trim('\"')).ToArray(); + + // Ensure prev and next residues match a internal oligo + spl[parsedHeader[SpectrumMatchFromTsvHeader.PreviousResidue]] = "U"; + spl[parsedHeader[SpectrumMatchFromTsvHeader.NextResidue]] = "A"; + firstDataLine = string.Join("\t", spl); + + var osm = new OsmFromTsv(firstDataLine, ['\t'], parsedHeader); + Assert.That(osm.ThreePrimeTerminus, Is.EqualTo(Rnase.DefaultThreePrimeTerminus), + "Internal oligo should have Rnase.DefaultThreePrimeTerminus"); + Assert.That(osm.FivePrimeTerminus, Is.EqualTo(Rnase.DefaultFivePrimeTerminus), + "Internal oligo should have Rnase.DefaultFivePrimeTerminus"); + } + + [Test] + public static void OSM_ParsesTerminusCorrectly_InOsmFile_CustomFormulas() + { + var osmLines = File.ReadAllLines(OsmFilePath); + var header = osmLines[0] + "\t5'-Terminus\t3'-Terminus"; + var parsedHeader = SpectrumMatchTsvReader.ParseHeader(header); + + Assert.That(parsedHeader.ContainsKey(SpectrumMatchFromTsvHeader.FivePrimeTerminus), Is.True, + "ParseHeader should recognize 5'-Terminus column"); + Assert.That(parsedHeader.ContainsKey(SpectrumMatchFromTsvHeader.ThreePrimeTerminus), Is.True, + "ParseHeader should recognize 3'-Terminus column"); + + var dummyFivePrime = ChemicalFormula.ParseFormula("Cr2N7O2"); + var dummyThreePrime = ChemicalFormula.ParseFormula("Y3Ir2Dy6"); + + var firstDataLine = osmLines[1] + $"\t\"{dummyFivePrime.Formula}\"\t\"{dummyThreePrime.Formula}\""; + + var osm = new OsmFromTsv(firstDataLine, ['\t'], parsedHeader); + Assert.That(osm.FivePrimeTerminus.ThisChemicalFormula.Formula, Is.EqualTo(dummyFivePrime.Formula), + "Custom 5'-Terminus formula not parsed correctly."); + Assert.That(osm.ThreePrimeTerminus.ThisChemicalFormula.Formula, Is.EqualTo(dummyThreePrime.Formula), + "Custom 3'-Terminus formula not parsed correctly."); + } }