-
Notifications
You must be signed in to change notification settings - Fork 110
Make "Associate Proteins" more forgiving when peptide filter or digest settings would exclude the peptide #3169
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
nickshulman
merged 17 commits into
master
from
Skyline/work/20240923_AssociateProteinsLessStrictFiltering
Oct 11, 2024
Merged
Changes from 1 commit
Commits
Show all changes
17 commits
Select commit
Hold shift + click to select a range
d21d2c5
Allow non-tryptic peptides to be associated with a protein so long as…
nickshulman 2fffb19
Fix problem where results were being updated at wrong spot in the loop
nickshulman 56373de
Fix incorrect use of "Parallel" instead of "ParallelEx"
nickshulman 3a84298
Merge branch 'master' into Skyline/work/20240923_AssociateProteinsLes…
nickshulman b7a47f5
Merge branch 'master' into Skyline/work/20240923_AssociateProteinsLes…
nickshulman 10f2de0
Fix intermittent failure in TestHugeAssociateProteins
nickshulman 7b0e4e4
Use Tuple<string, bool> in "ProteinPeptideMatches"
nickshulman 71c918b
Add "ProteinAssociationTest"
nickshulman a67a923
Delete "TwoProteins.fasta" and use TemporaryDirectory instead
nickshulman 2b433d1
Use ParallelEx.For to enumerate over ProteinPeptideMatches objects
nickshulman 6a0ae1b
Add method "ProteinAssociation.UseProteinSource"
nickshulman da5516b
Merge remote-tracking branch 'remotes/origin/master' into Skyline/wor…
nickshulman 1a71b69
Change "AssociateProteins" to take an Enzyme instead of passing in a …
nickshulman 4235938
Remove inaccurate comment
nickshulman e27a2f8
Use Enzyme from the document instead of passing it in.
nickshulman 234c1b0
Fix TestAssociateProteins
nickshulman fb7a62c
Merge remote-tracking branch 'remotes/origin/master' into Skyline/wor…
nickshulman File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
/* | ||
* Original author: Nicholas Shulman <nicksh .at. u.washington.edu>, | ||
* MacCoss Lab, Department of Genome Sciences, UW | ||
* | ||
* Copyright 2024 University of Washington - Seattle, WA | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Threading; | ||
using System.Windows.Forms; | ||
using Microsoft.VisualStudio.TestTools.UnitTesting; | ||
using pwiz.Skyline.Model; | ||
using pwiz.Skyline.Model.DocSettings; | ||
using pwiz.Skyline.Model.Proteome; | ||
using pwiz.Skyline.Properties; | ||
using pwiz.Skyline.Util; | ||
using pwiz.SkylineTestUtil; | ||
|
||
namespace pwiz.SkylineTest | ||
{ | ||
[TestClass] | ||
public class ProteinAssociationTest : AbstractUnitTest | ||
{ | ||
[TestMethod] | ||
public void TestTrypticProteinAssociation() | ||
{ | ||
TestFilesDir = new TestFilesDir(TestContext, @"Test\ProteinAssociationTest.data"); | ||
var peptides = new[] | ||
{ | ||
"ADLINNLGTIAK", "ELISNASDALDKIR", "FAFQAEVNR", "IDIIPNPQER", "LIINSLYK", | ||
"LISLTDENALSGNEELTVK", "NKEIFLR", "NLLHVTDTGVGMTR", "SGTSEFLNK", "TDDEVVQREEEAIQLDGLNASQIR", | ||
"KYSQFINFPIYVWSSK" | ||
}; | ||
var document = CreateDocumentWithPeptides(peptides); | ||
string fastaFilePath = TestFilesDir.GetTestPath("TwoProteins.fasta"); | ||
|
||
// Associate proteins using Trypsin. The peptide "NKEIFLR" is only tryptic for the first protein | ||
var trypsin = EnzymeList.GetDefault(); | ||
var trypsinAssociatedProteins = AssociateProteins(document, fastaFilePath, trypsin); | ||
CollectionAssert.Contains(trypsinAssociatedProteins["Protein1"], "NKEIFLR"); | ||
CollectionAssert.DoesNotContain(trypsinAssociatedProteins["Protein2"], "NKEIFLR"); | ||
CollectionAssert.AreEquivalent(new[] { "ADLINNLGTIAK", "ELISNASDALDKIR", "IDIIPNPQER" }, | ||
trypsinAssociatedProteins["Protein2"]); | ||
|
||
// Now associate proteins using Chymotrypsin. The peptide "NKEIFLR" is not chymotryptic for either protein | ||
var chymotrypsin = new Enzyme("Chymotrypsin", "FWYL", "P"); | ||
var chymotrypsinAssociatedProteins = AssociateProteins(document, fastaFilePath, chymotrypsin); | ||
CollectionAssert.Contains(trypsinAssociatedProteins["Protein1"], "NKEIFLR"); | ||
CollectionAssert.Contains(trypsinAssociatedProteins["Protein2"], "NKEIFLR"); | ||
CollectionAssert.AreEquivalent(new[] { "ADLINNLGTIAK", "ELISNASDALDKIR", "IDIIPNPQER", "NKEIFLR" }, | ||
chymotrypsinAssociatedProteins["Protein2"]); | ||
} | ||
|
||
private static Dictionary<string, List<string>> AssociateProteins(SrmDocument document, string fastaFilePath, Enzyme enzyme) | ||
{ | ||
var lenientDigestSettings = new DigestSettings(DigestSettings.MAX_MISSED_CLEAVAGES, false); | ||
var proteinAssociation = new ProteinAssociation(document, new LongWaitBrokerImpl()); | ||
proteinAssociation.UseFastaFile(fastaFilePath, proteinSequence => enzyme.Digest(proteinSequence, lenientDigestSettings), new LongWaitBrokerImpl()); | ||
return proteinAssociation.AssociatedProteins.ToDictionary( | ||
kvp => kvp.Key.Sequence.Name, kvp => kvp.Value.Peptides.Select(p => p.Peptide.Sequence).ToList()); | ||
} | ||
|
||
private static SrmDocument CreateDocumentWithPeptides(IEnumerable<string> peptides) | ||
{ | ||
var settings = SrmSettingsList.GetDefault(); | ||
var peptideDocNodes = new List<PeptideDocNode>(); | ||
foreach (var peptideSequence in peptides) | ||
{ | ||
var peptideDocNode = | ||
new PeptideDocNode(new Peptide(peptideSequence)).ChangeSettings(settings, SrmSettingsDiff.ALL); | ||
peptideDocNodes.Add(peptideDocNode); | ||
} | ||
|
||
var peptideGroupDocNode = new PeptideGroupDocNode(new PeptideGroup(), Annotations.EMPTY, "Peptide List", | ||
null, peptideDocNodes.ToArray()); | ||
return (SrmDocument)new SrmDocument(settings).ChangeChildren(new DocNode[] { peptideGroupDocNode }); | ||
} | ||
|
||
private class LongWaitBrokerImpl : ILongWaitBroker | ||
{ | ||
public bool IsCanceled | ||
{ | ||
get { return false; } | ||
} | ||
public int ProgressValue { get; set; } | ||
public string Message { get; set; } | ||
public bool IsDocumentChanged(SrmDocument docOrig) | ||
{ | ||
return false; | ||
} | ||
|
||
public DialogResult ShowDialog(Func<IWin32Window, DialogResult> show) | ||
{ | ||
throw new InvalidOperationException(); | ||
} | ||
|
||
public void SetProgressCheckCancel(int step, int totalSteps) | ||
{ | ||
} | ||
|
||
public CancellationToken CancellationToken => CancellationToken.None; | ||
} | ||
} | ||
} |
8 changes: 8 additions & 0 deletions
8
pwiz_tools/Skyline/Test/ProteinAssociationTest.data/TwoProteins.fasta
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
>Protein1 | ||
MRALWVLGLCCVLLTFGSVRADDEVDVDGTVEEDLGKSREGSRTDDEVVQREEEAIQLDG | ||
LNASQIRELREKSEKFAFQAEVNRMMKLIINSLYKNKEIFLRELISNASDALDKIRLISL | ||
TDENALSGNEELTVKIKCDKEKNLLHVTDTGVGMTREELVKNLGTIAKSGTSEFLNKMTE | ||
>Protein2 | ||
MPEEVHHGEEEVETFAFQAEIAQLMSLIINTFYSNKEIFLRELISNASDALDKIRYESLT | ||
DPSKLDSGKELKIDIIPNPQERTLTLVDTGIGMTKADLINNLGTIAKSGTKAFMEALQAG | ||
ADISMIGQFGVGFYSAYLVAEKVVVITKHNDDEQYAWESSAGGSFTVRADHGEPIGRGTK |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we want a pattern of many unit tests having their own .data directory with small test data files in them? I think you could just write this out to a temp file directly from the code to keep the repo tidier. I love the new ability to keep things in .data dir instead of a .zip file, but think we should still use it judiciously. Like .sky files. Those would be a pain to write directly from test code (as xml I mean; the settings can be generated programmatically). As would any big DSV file.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The only way I know to create and clean up a temporary directory is by using "TestFilesDir".
Do you know of an easier way to do that which does not require either a .zip file or .data folder?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How about:
using var testDir = new TemporaryDirectory(Path.Combine(TestContext.TestRunDirectory, TestContext.TestName));
If it works we could definitely have a shortcut for that in AbstractUnitTest. :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, that works. Thanks!
I am probably going to add a method to ProteinAssociation which takes a ProteinSource so that it can be used without any file on disk.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unfortunately,
TestContext.TestRunDirectory
is null when running from TestRunner.I added a method
ProteinAssociate.UseProteinSource
so I do not need a file on disk.