Skip to content

Commit

Permalink
Result Text and Individual File Result Cleanup (#2397)
Browse files Browse the repository at this point in the history
* Updated to MzLib 1.0.548 and fixed custom ions in search tasks

* reverted calibration task change

* merged in master bbbyy

* Spectral Library from Command Line (#2386)

* Updated to MzLib 1.0.548 and fixed custom ions in search tasks

* reverted calibration task change

* merged in master bbbyy

* Enabled Library Loading from command line

* replaced "Peptides" with GlobalVariables.AnalyteType

* built lazy loading search result structure for post search analysis task

* Finsihed test case strucutre, added test for result files. Added top-down test case

* changed access modifier

* Refactored test case setup

* Added more test cases to hopefully up coverage
  • Loading branch information
nbollis authored Aug 16, 2024
1 parent 401a5a9 commit 1d402e2
Show file tree
Hide file tree
Showing 13 changed files with 513 additions and 117 deletions.
61 changes: 31 additions & 30 deletions MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,17 @@ public MyTaskResults Run()
HistogramAnalysis();
WritePsmResults();
WritePeptideResults();
if (Parameters.CurrentRawFileList.Count > 1 && Parameters.SearchParameters.WriteIndividualFiles)
if (Parameters.CurrentRawFileList.Count > 1 && (Parameters.SearchParameters.WriteIndividualFiles
|| Parameters.SearchParameters.WriteMzId ||
Parameters.SearchParameters.WritePepXml))
{
// create individual files subdirectory
Directory.CreateDirectory(Parameters.IndividualResultsOutputFolder);
WriteIndividualPsmResults();
WriteIndividualPeptideResults();
if (Parameters.SearchParameters.WriteIndividualFiles)
{
WriteIndividualPsmResults();
WriteIndividualPeptideResults();
}
}
WriteProteinResults();
AddResultsTotalsToAllResultsTsv();
Expand Down Expand Up @@ -615,7 +620,7 @@ private void WritePsmResults()
"PEP could not be calculated due to an insufficient number of PSMs. Results were filtered by q-value." +
Environment.NewLine);
}
string psmResultsText = "All target PSMs with " + psmsForPsmResults.FilterType + " = " + Math.Round(psmsForPsmResults.FilterThreshold, 2) + ": " +
string psmResultsText = "All target PSMs with " + psmsForPsmResults.FilterType + " <= " + Math.Round(psmsForPsmResults.FilterThreshold, 2) + ": " +
psmsForPsmResults.TargetPsmsAboveThreshold;
ResultsDictionary[("All", "PSMs")] = psmResultsText;
}
Expand All @@ -632,7 +637,7 @@ private void WritePeptideResults()
filterAtPeptideLevel: true);

// write PSMs
string writtenFile = Path.Combine(Parameters.OutputFolder, "AllPeptides.psmtsv");
string writtenFile = Path.Combine(Parameters.OutputFolder, $"All{GlobalVariables.AnalyteType}s.psmtsv");
WritePsmsToTsv(peptidesForPeptideResults.OrderByDescending(p => p).ToList(), writtenFile, writePeptideLevelResults: true);
FinishedWritingFile(writtenFile, new List<string> { Parameters.SearchTaskId });

Expand All @@ -642,9 +647,9 @@ private void WritePeptideResults()
Parameters.SearchTaskResults.AddPsmPeptideProteinSummaryText(
"PEP could not be calculated due to an insufficient number of PSMs. Results were filtered by q-value." + Environment.NewLine);
}
string peptideResultsText = "All target peptides with " + peptidesForPeptideResults.FilterType + " = " + Math.Round(peptidesForPeptideResults.FilterThreshold, 2) + ": " +
string peptideResultsText = $"All target {GlobalVariables.AnalyteType.ToLower()}s with " + peptidesForPeptideResults.FilterType + " <= " + Math.Round(peptidesForPeptideResults.FilterThreshold, 2) + ": " +
peptidesForPeptideResults.TargetPsmsAboveThreshold;
ResultsDictionary[("All", "Peptides")] = peptideResultsText;
ResultsDictionary[("All", GlobalVariables.AnalyteType)] = peptideResultsText;
}

private void WriteIndividualPsmResults()
Expand Down Expand Up @@ -679,7 +684,7 @@ private void WriteIndividualPsmResults()
FinishedWritingFile(writtenFile, new List<string> { Parameters.SearchTaskId, "Individual Spectra Files", psmFileGroup.Key });

// write summary text
string psmResultsText = strippedFileName + " - All target PSMs with " + psmsToWrite.FilterType + " = " + Math.Round(psmsToWrite.FilterThreshold, 2) + ": " +
string psmResultsText = strippedFileName + " - Target PSMs with " + psmsToWrite.FilterType + " <= " + Math.Round(psmsToWrite.FilterThreshold, 2) + ": " +
psmsToWrite.TargetPsmsAboveThreshold;
ResultsDictionary[(strippedFileName, "PSMs")] = psmResultsText;
}
Expand Down Expand Up @@ -710,16 +715,16 @@ private void WriteIndividualPeptideResults()
filterAtPeptideLevel: true);

// write PSMs
string writtenFile = Path.Combine(Parameters.IndividualResultsOutputFolder, strippedFileName + "_Peptides.psmtsv");
string writtenFile = Path.Combine(Parameters.IndividualResultsOutputFolder, strippedFileName + $"_{GlobalVariables.AnalyteType}s.psmtsv");
WritePsmsToTsv(peptidesToWrite, writtenFile, writePeptideLevelResults: true);
FinishedWritingFile(writtenFile, new List<string> { Parameters.SearchTaskId, "Individual Spectra Files", psmFileGroup.Key });

// write summary text
string peptideResultsText = strippedFileName + " - All target peptides with " + peptidesToWrite.FilterType + " = " + Math.Round(peptidesToWrite.FilterThreshold, 2) + ": " +
string peptideResultsText = strippedFileName + $" - Target {GlobalVariables.AnalyteType.ToLower()}s with " + peptidesToWrite.FilterType + " <= " + Math.Round(peptidesToWrite.FilterThreshold, 2) + ": " +
peptidesToWrite.TargetPsmsAboveThreshold;
ResultsDictionary[(strippedFileName, "Peptides")] = peptideResultsText;
ResultsDictionary[(strippedFileName, GlobalVariables.AnalyteType)] = peptideResultsText;
}

}
private void UpdateSpectralLibrary()
{
Expand Down Expand Up @@ -831,7 +836,7 @@ private void WriteProteinResults()
}
else
{
string proteinResultsText = "All target protein groups with q-value = 0.01 (1% FDR): " + ProteinGroups.Count(b => b.QValue <= 0.01 && !b.IsDecoy);
string proteinResultsText = "All target protein groups with q-value <= 0.01 (1% FDR): " + ProteinGroups.Count(b => b.QValue <= 0.01 && !b.IsDecoy);
ResultsDictionary[("All", "Proteins")] = proteinResultsText;
}

Expand All @@ -847,13 +852,6 @@ private void WriteProteinResults()
string writtenFile = Path.Combine(Parameters.OutputFolder, fileName);
WriteProteinGroupsToTsv(ProteinGroups, writtenFile, new List<string> { Parameters.SearchTaskId });

if (Parameters.CurrentRawFileList.Count > 1 && (Parameters.SearchParameters.WriteIndividualFiles
|| Parameters.SearchParameters.WriteMzId ||
Parameters.SearchParameters.WritePepXml))
{
Directory.CreateDirectory(Parameters.IndividualResultsOutputFolder);
}

var psmsGroupedByFile = FilteredPsms.Filter(Parameters.AllPsms,
CommonParameters,
includeDecoys: true,
Expand Down Expand Up @@ -910,14 +908,15 @@ private void WriteProteinResults()

if (Parameters.SearchParameters.WriteIndividualFiles && Parameters.CurrentRawFileList.Count > 1)
{
// write summary text
string proteinResultsText = strippedFileName + " - Target protein groups within 1 % FDR: " + subsetProteinGroupsForThisFile.Count(b => b.QValue <= 0.01 && !b.IsDecoy);
ResultsDictionary[(strippedFileName, "Proteins")] = proteinResultsText;

// write result files
writtenFile = Path.Combine(Parameters.IndividualResultsOutputFolder, strippedFileName + "_ProteinGroups.tsv");
WriteProteinGroupsToTsv(subsetProteinGroupsForThisFile, writtenFile, new List<string> { Parameters.SearchTaskId, "Individual Spectra Files", fullFilePath });
}

// write summary text
string proteinResultsText = strippedFileName + " - Target protein groups within 1 % FDR: " + subsetProteinGroupsForThisFile.Count(b => b.QValue <= 0.01 && !b.IsDecoy);
ResultsDictionary[(strippedFileName, "Proteins")] = proteinResultsText;

psmsForThisFile = FilteredPsms.Filter(psmsForThisFile,
CommonParameters,
includeDecoys: Parameters.SearchParameters.WriteDecoys,
Expand Down Expand Up @@ -1851,26 +1850,28 @@ private void WriteProteinGroupsToTsv(List<EngineLayer.ProteinGroup> proteinGroup
FinishedWritingFile(filePath, nestedIds);
}
}

/// <summary>
/// This is a handy dictionary to keep track of the PSM, peptide and protein count results at the
/// "All" level and at the individual raw file level.
/// The keys are a tuple such as ("All", "PSMs") or ("RawFileName", "Peptides")
// The values are the results as a string
/// The values are the results as a string
/// </summary>
private void ConstructResultsDictionary()
{
ResultsDictionary = new();

ResultsDictionary.Add(("All", "PSMs"),"");
ResultsDictionary.Add(("All", "Peptides"), "");
ResultsDictionary = new()
{
{ ("All", "PSMs"), "" },
{ ("All", GlobalVariables.AnalyteType), "" }
};

if (Parameters.CurrentRawFileList.Count > 1 && Parameters.SearchParameters.WriteIndividualFiles)
{
foreach (var rawFile in Parameters.CurrentRawFileList)
{
string fileNameWithoutExtension = Path.GetFileNameWithoutExtension(rawFile);
ResultsDictionary.Add((fileNameWithoutExtension, "PSMs"), "");
ResultsDictionary.Add((fileNameWithoutExtension, "Peptides"), "");
ResultsDictionary.Add((fileNameWithoutExtension, GlobalVariables.AnalyteType), "");
}
}

Expand Down
192 changes: 192 additions & 0 deletions MetaMorpheus/Test/EverythingRunnerEngineTestCase.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.IO;
using System.Linq;
using Nett;
using NUnit.Framework;
using TaskLayer;

namespace Test
{
public enum EverythingRunnerEngineTestCases
{
BottomUpQValue,
BottomUpQValueNoIndividualFilesWriteMzId,
BottomUpQValueNoIndividualFilesWritePepXml,
BottomUpQValueSingle,
BottomUpPepQValue,
TopDownQValue,
TopDownQValueSingle
}

/// <summary>
/// Test cases for the post search analysis task. These test cases are used to verify that the post search analysis task is functioning correctly.
/// This structure ensures that the database search is only ran once, and only ran once called.
/// These directories are cleaned up in the Global Cleanup found in SetUpTests.GlobalTearDown
/// </summary>
[ExcludeFromCodeCoverage]
internal class EverythingRunnerEngineTestCase : IDisposable
{
internal EverythingRunnerEngineTestCases TestCase { get; init; }
internal List<(string, MetaMorpheusTask)> TaskList { get; init; }
internal List<DbForTask> DatabaseList { get; init; }
internal List<string> DataFileList { get; init; }
internal string OutputDirectory => Path.Combine(ResultDirectory, TestCase.ToString());
internal bool IsTopDown { get; init; }
internal bool HasRun { get; private set; }
internal bool WriteIndividualResults { get; init; }
internal bool WritePepXml { get; init; }
internal bool WriteMzId { get; init; }

internal EverythingRunnerEngineTestCase(EverythingRunnerEngineTestCases testCase,
List<(string, MetaMorpheusTask)> taskList, List<string> dataFileList,
List<DbForTask> databaseList, bool isTopDown)
{
TestCase = testCase;
TaskList = taskList;
DatabaseList = databaseList;
DataFileList = dataFileList;
IsTopDown = isTopDown;
HasRun = false;

var firstSearchTask = taskList.Select(p => p.Item2).FirstOrDefault(p => p.TaskType == MyTask.Search);
if (firstSearchTask is null) return;

var searchTask = (SearchTask)firstSearchTask;
WriteIndividualResults = searchTask.SearchParameters.WriteIndividualFiles;
WritePepXml = searchTask.SearchParameters.WritePepXml;
WriteMzId = searchTask.SearchParameters.WriteMzId;
}

internal void Run()
{
if (Directory.Exists(OutputDirectory))
Directory.Delete(OutputDirectory, true);

var runner = new EverythingRunnerEngine(TaskList, DataFileList, DatabaseList, OutputDirectory);
runner.Run();
HasRun = true;
}

public void Dispose()
{
if (Directory.Exists(OutputDirectory))
Directory.Delete(OutputDirectory, true);
}

#region Case Setup

internal static string ResultDirectory =>
Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\PostSearchAnalysisTaskTest");

private static Dictionary<EverythingRunnerEngineTestCases, EverythingRunnerEngineTestCase> _cases;

internal static bool TryGetTestCase(EverythingRunnerEngineTestCases testCase,
out EverythingRunnerEngineTestCase outCase)
{
if (!_cases.TryGetValue(testCase, out outCase)) return false;

if (!outCase.HasRun)
outCase.Run();
return true;
}

internal static EverythingRunnerEngineTestCase GetTestCase(EverythingRunnerEngineTestCases testCase)
{
if (!TryGetTestCase(testCase, out var outCase))
throw new KeyNotFoundException($"Test case {testCase} not found");
return outCase;
}

internal static void DisposeAll()
{
foreach (var testCase in _cases.Values)
testCase.Dispose();
}

static EverythingRunnerEngineTestCase()
{
// Directory GlobalSetup
if (Directory.Exists(ResultDirectory))
Directory.Delete(ResultDirectory, true);

if (!Directory.Exists(ResultDirectory))
Directory.CreateDirectory(ResultDirectory);

// Test Case Instantiation
_cases = new();

string myTomlPath = Path.Combine(TestContext.CurrentContext.TestDirectory,
@"TestData\Task1-SearchTaskconfig.toml");
SearchTask searchTaskLoaded = Toml.ReadFile<SearchTask>(myTomlPath, MetaMorpheusTask.tomlConfig);
string myFile1 = Path.Combine(TestContext.CurrentContext.TestDirectory,
@"TestData\TaGe_SA_A549_3_snip.mzML");
string myFile2 = Path.Combine(TestContext.CurrentContext.TestDirectory,
@"TestData\TaGe_SA_A549_3_snip_2.mzML");
string myDatabase = Path.Combine(TestContext.CurrentContext.TestDirectory,
@"TestData\TaGe_SA_A549_3_snip.fasta");
_cases.Add(EverythingRunnerEngineTestCases.BottomUpQValue,
new EverythingRunnerEngineTestCase(EverythingRunnerEngineTestCases.BottomUpQValue,
new List<(string, MetaMorpheusTask)> { ("postSearchAnalysisTaskTestOutput", searchTaskLoaded) },
new List<string> { myFile1, myFile2 }, new List<DbForTask> { new DbForTask(myDatabase, false) },
false));
_cases.Add(EverythingRunnerEngineTestCases.BottomUpQValueSingle,
new EverythingRunnerEngineTestCase(EverythingRunnerEngineTestCases.BottomUpQValueSingle,
new List<(string, MetaMorpheusTask)> { ("postSearchAnalysisTaskTestOutput", searchTaskLoaded) },
new List<string> { myFile2 },
new List<DbForTask> { new DbForTask(myDatabase, false) }, false));

searchTaskLoaded = Toml.ReadFile<SearchTask>(myTomlPath, MetaMorpheusTask.tomlConfig);
searchTaskLoaded.SearchParameters.WriteIndividualFiles = false;
searchTaskLoaded.SearchParameters.WriteMzId = true;
searchTaskLoaded.SearchParameters.WritePepXml = false;
_cases.Add(EverythingRunnerEngineTestCases.BottomUpQValueNoIndividualFilesWriteMzId,
new EverythingRunnerEngineTestCase(EverythingRunnerEngineTestCases.BottomUpQValueNoIndividualFilesWriteMzId,
new List<(string, MetaMorpheusTask)> { ("postSearchAnalysisTaskTestOutput", searchTaskLoaded) },
new List<string> { myFile1, myFile2 }, new List<DbForTask> { new DbForTask(myDatabase, false) },
false));

searchTaskLoaded = Toml.ReadFile<SearchTask>(myTomlPath, MetaMorpheusTask.tomlConfig);
searchTaskLoaded.SearchParameters.WriteIndividualFiles = false;
searchTaskLoaded.SearchParameters.WriteMzId = false;
searchTaskLoaded.SearchParameters.WritePepXml = true;
_cases.Add(EverythingRunnerEngineTestCases.BottomUpQValueNoIndividualFilesWritePepXml,
new EverythingRunnerEngineTestCase(EverythingRunnerEngineTestCases.BottomUpQValueNoIndividualFilesWritePepXml,
new List<(string, MetaMorpheusTask)> { ("postSearchAnalysisTaskTestOutput", searchTaskLoaded) },
new List<string> { myFile1, myFile2 }, new List<DbForTask> { new DbForTask(myDatabase, false) },
false));

myTomlPath = Path.Combine(TestContext.CurrentContext.TestDirectory,
@"TestData\Task2-SearchTaskconfig.toml");
searchTaskLoaded = Toml.ReadFile<SearchTask>(myTomlPath, MetaMorpheusTask.tomlConfig);
// TODO: Uncomment this line and change values for PR 2394
//searchTaskLoaded.CommonParameters.QValueCutoffForPepCalculation = 0.01;
_cases.Add(EverythingRunnerEngineTestCases.BottomUpPepQValue,
new EverythingRunnerEngineTestCase(EverythingRunnerEngineTestCases.BottomUpPepQValue,
new List<(string, MetaMorpheusTask)> { ("postSearchAnalysisTaskTestOutput", searchTaskLoaded) },
new List<string> { myFile1, myFile2 }, new List<DbForTask> { new DbForTask(myDatabase, false) },
false));

myTomlPath = Path.Combine(TestContext.CurrentContext.TestDirectory,
@"TopDownTestData\TopDownSearchToml.toml");
searchTaskLoaded = Toml.ReadFile<SearchTask>(myTomlPath, MetaMorpheusTask.tomlConfig);
myFile1 = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\SmallCalibratible_Yeast.mzML");
myFile2 = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TopDownTestData\slicedTDYeast.mzML");
myDatabase = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\smalldb.fasta");
_cases.Add(EverythingRunnerEngineTestCases.TopDownQValue,
new EverythingRunnerEngineTestCase(EverythingRunnerEngineTestCases.TopDownQValue,
new List<(string, MetaMorpheusTask)> { ("postSearchAnalysisTaskTestOutput", searchTaskLoaded) },
new List<string> { myFile1, myFile2 }, new List<DbForTask> { new DbForTask(myDatabase, false) },
true));
_cases.Add(EverythingRunnerEngineTestCases.TopDownQValueSingle,
new EverythingRunnerEngineTestCase(EverythingRunnerEngineTestCases.TopDownQValueSingle,
new List<(string, MetaMorpheusTask)> { ("postSearchAnalysisTaskTestOutput", searchTaskLoaded) },
new List<string> { myFile2 }, new List<DbForTask> { new DbForTask(myDatabase, false) }, true));
}

#endregion
}
}


Loading

0 comments on commit 1d402e2

Please sign in to comment.