From 0037ef7a7c5825d629ce65b39d64d9c7d602a09e Mon Sep 17 00:00:00 2001 From: jcharkow Date: Tue, 20 Dec 2022 16:45:47 -0500 Subject: [PATCH 01/13] [API][TEST] OpenSwathIsotopeGeneratorCacher class Add additional class which will cache generated isotope patterns. This should speed up the execution of OpenSwathWorkflow This commit is very preliminary with nothing implmented yet --- .../OpenSwathIsotopeGeneratorCacher.h | 123 +++ .../OpenSwathIsotopeGeneratorCacher.cpp | 829 ++++++++++++++++++ .../class_tests/openms/executables.cmake | 1 + .../OpenSwathIsotopeGeneratorCacher_test.cpp | 63 ++ 4 files changed, 1016 insertions(+) create mode 100644 src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.h create mode 100644 src/openms/source/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.cpp create mode 100644 src/tests/class_tests/openms/source/OpenSwathIsotopeGeneratorCacher_test.cpp diff --git a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.h b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.h new file mode 100644 index 00000000000..7d60c90cc52 --- /dev/null +++ b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.h @@ -0,0 +1,123 @@ +// -------------------------------------------------------------------------- +// OpenMS -- Open-Source Mass Spectrometry +// -------------------------------------------------------------------------- +// Copyright The OpenMS Team -- Eberhard Karls University Tuebingen, +// ETH Zurich, and Freie Universitaet Berlin 2002-2022. +// +// This software is released under a three-clause BSD license: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of any author or any participating institution +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// For a full list of authors, refer to the file AUTHORS. +// -------------------------------------------------------------------------- +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING +// INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// -------------------------------------------------------------------------- +// $Maintainer: Joshua Charkow$ +// $Authors: Joshua Charkow$ +// -------------------------------------------------------------------------- + +#pragma once + +// data access +#include +#include +#include +#include +#include + +//#include +//#include + +namespace OpenMS +{ + /** @brief A class that stores a map of theoretical spectra to be used in OpenSwathScoring + * + * + * + */ + class OPENMS_DLLAPI OpenSwathIsotopeGeneratorCacher + { + typedef OpenMS::FeatureFinderAlgorithmPickedHelperStructs::TheoreticalIsotopePattern TheoreticalIsotopePattern; + + + + std::map cachedDistributions_; + double massStep_; // step between adjacent isotope distributions + double halfMassStep_; // cached value of half of the mass step + CoarseIsotopePatternGenerator solver_; // used for computing the isotope distributions + + + public: + + OpenSwathIsotopeGeneratorCacher(const Size max_isotope, const double massStep, const bool round_masses = false): + solver_(max_isotope, round_masses), + massStep_(massStep), + halfMassStep_(massStep / 2.0) + { + } + + /// Destructor + ~OpenSwathIsotopeGeneratorCacher(); + + + /** @brief Initialize cacher object + * the scoring object + * + * Sets the parameters for the scoring. + * + * @param mzStart - start mz to generate a theoretical isotope distribtuion + * @param mzEnd - end mz to generate a theoretical isotope distribtuion + * @param mzStep - step between precursor mz for isotope distribution + * + */ + void initialize(double mzStart, + int mzEnd); + + double getMassStep(); + + double getHalfMassStep(); + + int getMaxIsotope(); + + bool getRoundMasses(); + + + + /** @brief compute and cache and isotope distribution with the corresponding mass + * @param mass - mass to create the isotope distribution from + */ + IsotopeDistribution addEntry(double mass); + + /** @breif gets the cached isotope distribution for the provided mass + * if no isotope distribution exist within the halfMassStep_ than create and cache a new distribution + * + * @param mass - mass to fetch the isotope distribution + */ + IsotopeDistribution get(double mass); + + /** @brief gets the theoretical spectrum corresponding with the provided m/z value + * mz - mz of distribution to get + * charge - charge of mz + * mannmass - ??? + */ + IsotopeDistribution get(double mz, int charge, const double mannmass = 1.00048) const; + + }; +} + diff --git a/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.cpp b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.cpp new file mode 100644 index 00000000000..4821eb584a9 --- /dev/null +++ b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.cpp @@ -0,0 +1,829 @@ +// -------------------------------------------------------------------------- +// OpenMS -- Open-Source Mass Spectrometry +// -------------------------------------------------------------------------- +// Copyright The OpenMS Team -- Eberhard Karls University Tuebingen, +// ETH Zurich, and Freie Universitaet Berlin 2002-2022. +// +// This software is released under a three-clause BSD license: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of any author or any participating institution +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// For a full list of authors, refer to the file AUTHORS. +// -------------------------------------------------------------------------- +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING +// INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// -------------------------------------------------------------------------- +// $Maintainer: Joshua Charkow$ +// $Authors: Joshua Charkow$ +// -------------------------------------------------------------------------- + +// theoretical spectrum generator +#include +#include +#include + +namespace OpenMS +{ + + // note these should be mass values rather than m/z values + void initialize(double massStart, + double massEnd) + { + massStep_ = massStep; + for (double mass=massStart; mass < massEnd; mass += massStep) + { + std::cout << "Current mz: " << mz << std::endl; + charge = std::abs(charge); + // create the theoretical distribution + + + //Note: this is a rough estimate of the weight, usually the protons should be deducted first, left for backwards compatibility. + IsotopeDistribution dist = solver.estimateFromPeptideWeight(mass); + + + cachedDistributions_[mass] = dist; + } + + + double getMassStep() + { + return massStep_; + } + + double getHalfMassStep() + { + return halfMassStep_; + } + + int getMaxIsotope() + { + return solver_.getMaxIsotope(); + } + + bool getRoundMasses() + { + return sovler_.getRoundMasses() + } + + + IsotopeDistribution addEntry(mass) + { + IsotopeDistribution dist = solver.estimateFromPeptideWeight(mass); + cachedDistributions_[mass] = dist; + return dist; + } + + IsotopeDistribution get(double mass) + { + auto ele = std::upper_bound(cachedDistribution_.begin(), cachedDistribution_.end(), mass); + + auto ptrNext += ele; + + + if ele->first + if (ele == cachedDistribution_.end()) + { + // element is not found so create an entry + return addEntry(mass); + } + else if (std::abs(ele->first - mass) > halfMassStep_) // element found is more than halfMassStep_ away from target mass so generate a new entry + { + return addEntry(mass); + } + else // there is a current cached distribution that can be used! + { + auto ptrPrevious -= ele; + + if (std::abs(ptrPrevious->first - mass) < std::abs(ele->first - mass)) // check if the previous entry is closer than the first entry + { + return ptrPrevious->second; + } + else + { + return ele->second; + } + } + } + + + std::vector > get(double mz, int charge, const double mannmass) + { + IsotopeDistribution distribution = get(mz * charge); + + std::vector > isotopes_spec; + for (IsotopeDistribution::Iterator it = isotopeDist.begin(); it != isotopeDist.end(); ++it) + { + isotopes_spec.emplace_back(mass, it->getIntensity()); + mass += mannmass / charge; + } + + return isotopes_spec; + } + + + + + + + + + + for (IsotopeDistribution::Iterator it = d.begin(); it != d.end(); ++it) + { + isotopes_spec.emplace_back(mass, it->getIntensity()); + mass += mannmass / charge; + } + } //end of dia_isotope_corr_sub + + + + 2 /** @brief gets the theoretical spectrum corresponding with the provided m/z value + 1 */ +87 IsotopeDistribution get() const; + + void initialize( + void sortSpectrumByMZ(OpenSwath::Spectrum& spec) + { + //sort index list + std::vector > sorted_indices; + sorted_indices.reserve(spec.getMZArray()->data.size()); + auto mz_it = spec.getMZArray()->data.begin(); + for (Size i = 0; i < spec.getMZArray()->data.size(); ++i) + { + sorted_indices.emplace_back(*mz_it, i); + ++mz_it; + } + std::stable_sort(sorted_indices.begin(), sorted_indices.end()); + + // extract list of indices + std::vector select_indices; + select_indices.reserve(sorted_indices.size()); + for (const auto& sidx : sorted_indices) + { + select_indices.push_back(sidx.second); + } + + for (auto& da : spec.getDataArrays() ) + { + if (da->data.empty()) continue; + OpenSwath::BinaryDataArrayPtr tmp(new OpenSwath::BinaryDataArray); + tmp->description = da->description; + tmp->data.reserve(select_indices.size()); + for (Size i = 0; i < select_indices.size(); ++i) + { + tmp->data.push_back( da->data[ select_indices[i] ] ); + } + da = tmp; + } + + OPENMS_POSTCONDITION( std::adjacent_find(spec.getMZArray()->data.begin(), + spec.getMZArray()->data.end(), std::greater()) == spec.getMZArray()->data.end(), + "Postcondition violated: m/z vector needs to be sorted!" ) + } +} + +namespace OpenMS +{ + + /// Constructor + OpenSwathScoring::OpenSwathScoring() : + rt_normalization_factor_(1.0), + spacing_for_spectra_resampling_(0.005), + add_up_spectra_(1), + spectra_addition_method_("simple"), + im_drift_extra_pcnt_(0.0) + { + } + + /// Destructor + OpenSwathScoring::~OpenSwathScoring() = default; + + void OpenSwathScoring::initialize(double rt_normalization_factor, + int add_up_spectra, + double spacing_for_spectra_resampling, + const double drift_extra, + const OpenSwath_Scores_Usage & su, + const std::string& spectrum_addition_method) + { + this->rt_normalization_factor_ = rt_normalization_factor; + this->add_up_spectra_ = add_up_spectra; + this->spectra_addition_method_ = spectrum_addition_method; + this->im_drift_extra_pcnt_ = drift_extra; + this->spacing_for_spectra_resampling_ = spacing_for_spectra_resampling; + this->su_ = su; + } + + void OpenSwathScoring::calculateDIAScores(OpenSwath::IMRMFeature* imrmfeature, + const std::vector& transitions, + const std::vector& swath_maps, + const OpenSwath::SpectrumAccessPtr& ms1_map, + const OpenMS::DIAScoring& diascoring, + const CompoundType& compound, + OpenSwath_Scores& scores, + std::vector& masserror_ppm, + const double drift_lower, + const double drift_upper, + const double drift_target) + { + OPENMS_PRECONDITION(imrmfeature != nullptr, "Feature to be scored cannot be null"); + OPENMS_PRECONDITION(transitions.size() > 0, "There needs to be at least one transition."); + OPENMS_PRECONDITION(swath_maps.size() > 0, "There needs to be at least one swath map."); + + // Identify corresponding SONAR maps (if more than one map is used) + std::vector used_swath_maps; + if (swath_maps.size() > 1 || transitions.empty()) + { + double precursor_mz = transitions[0].getPrecursorMZ(); + for (size_t i = 0; i < swath_maps.size(); ++i) + { + if (swath_maps[i].ms1) {continue;} // skip MS1 + if (precursor_mz > swath_maps[i].lower && precursor_mz < swath_maps[i].upper) + { + used_swath_maps.push_back(swath_maps[i]); + } + } + } + else + { + used_swath_maps = swath_maps; + } + + std::vector normalized_library_intensity; + getNormalized_library_intensities_(transitions, normalized_library_intensity); + + // find spectrum that is closest to the apex of the peak using binary search + std::vector spectra = fetchSpectrumSwath(used_swath_maps, imrmfeature->getRT(), add_up_spectra_, drift_lower, drift_upper); + + // set the DIA parameters + double dia_extract_window_ = (double)diascoring.getParameters().getValue("dia_extraction_window"); + bool dia_extraction_ppm_ = diascoring.getParameters().getValue("dia_extraction_unit") == "ppm"; + + // score drift time dimension + if ( su_.use_im_scores) + { + IonMobilityScoring::driftScoring(spectra, transitions, scores, + drift_lower, drift_upper, drift_target, + dia_extract_window_, dia_extraction_ppm_, + false, im_drift_extra_pcnt_); + } + + + // Mass deviation score + diascoring.dia_massdiff_score(transitions, spectra, normalized_library_intensity, scores.massdev_score, scores.weighted_massdev_score, masserror_ppm, drift_lower, drift_upper); + + //TODO this score and the next, both rely on the CoarseIsotope of the PeptideAveragine. Maybe we could + // DIA dotproduct and manhattan score based on library intensity and sum formula if present + if (su_.use_ms2_isotope_scores) + { + diascoring.score_with_isotopes(spectra, transitions, scores.dotprod_score_dia, scores.manhatt_score_dia, drift_lower, drift_upper); + + // Isotope correlation / overlap score: Is this peak part of an + // isotopic pattern or is it the monoisotopic peak in an isotopic + // pattern? + // Currently this is computed for an averagine model of a peptide so its + // not optimal for metabolites - but better than nothing, given that for + // most fragments we don't really know their composition + diascoring + .dia_isotope_scores(transitions, spectra, imrmfeature, scores.isotope_correlation, scores.isotope_overlap, drift_lower, drift_upper); + } + + // Peptide-specific scores (only useful, when product transitions are REAL fragments, e.g. not in FFID) + // and only if sequence is known (non-empty) + if (compound.isPeptide() && !compound.sequence.empty() && su_.use_ionseries_scores) + { + // Presence of b/y series score + OpenMS::AASequence aas; + int by_charge_state = 1; // for which charge states should we check b/y series + OpenSwathDataAccessHelper::convertPeptideToAASequence(compound, aas); + diascoring.dia_by_ion_score(spectra, aas, by_charge_state, scores.bseries_score, scores.yseries_score, drift_lower, drift_upper); + } + + if (ms1_map && ms1_map->getNrSpectra() > 0) + { + double precursor_mz = transitions[0].precursor_mz; + double rt = imrmfeature->getRT(); + + calculatePrecursorDIAScores(ms1_map, diascoring, precursor_mz, rt, compound, scores, drift_lower, drift_upper); + } + + + if ( (ms1_map && ms1_map->getNrSpectra() > 0) && ( su_.use_im_scores) ) // IM MS1 scores + { + double dia_extract_window_ = (double)diascoring.getParameters().getValue("dia_extraction_window"); + bool dia_extraction_ppm_ = diascoring.getParameters().getValue("dia_extraction_unit") == "ppm"; + double rt = imrmfeature->getRT(); + + std::vector ms1_spectrum = fetchSpectrumSwath(ms1_map, rt, add_up_spectra_, drift_lower, drift_upper); + IonMobilityScoring::driftScoringMS1(ms1_spectrum, + transitions, scores, drift_lower, drift_upper, drift_target, dia_extract_window_, dia_extraction_ppm_, false, im_drift_extra_pcnt_); + + IonMobilityScoring::driftScoringMS1Contrast(spectra, ms1_spectrum, + transitions, scores, drift_lower, drift_upper, dia_extract_window_, dia_extraction_ppm_, im_drift_extra_pcnt_); + } + } + + void OpenSwathScoring::calculatePrecursorDIAScores(const OpenSwath::SpectrumAccessPtr& ms1_map, + const OpenMS::DIAScoring & diascoring, + double precursor_mz, + double rt, + const CompoundType& compound, + OpenSwath_Scores & scores, + double drift_lower, double drift_upper) + { + // Compute precursor-level scores: + // - compute mass difference in ppm + // - compute isotopic pattern score + if (ms1_map && ms1_map->getNrSpectra() > 0) + { + std::vector ms1_spectrum = fetchSpectrumSwath(ms1_map, rt, add_up_spectra_, drift_lower, drift_upper); + diascoring.dia_ms1_massdiff_score(precursor_mz, ms1_spectrum, scores.ms1_ppm_score, drift_lower, drift_upper); + + // derive precursor charge state (get from data if possible) + int precursor_charge = 1; + if (compound.getChargeState() != 0) + { + precursor_charge = compound.getChargeState(); + } + + if (compound.isPeptide()) + { + if (!compound.sequence.empty()) + { + diascoring.dia_ms1_isotope_scores(precursor_mz, ms1_spectrum, scores.ms1_isotope_correlation, + scores.ms1_isotope_overlap, + AASequence::fromString(compound.sequence).getFormula(Residue::Full, precursor_charge), drift_lower, drift_upper); + } + else + { + diascoring.dia_ms1_isotope_scores_averagine(precursor_mz, ms1_spectrum, + scores.ms1_isotope_correlation, + scores.ms1_isotope_overlap, precursor_charge, drift_lower, drift_upper); + } + } + else + { + if (!compound.sequence.empty()) + { + EmpiricalFormula empf{compound.sequence}; + //Note: this only sets the charge to be extracted again in the following function. + // It is not really used in EmpiricalFormula. Also the m/z of the formula is not used since + // it is shadowed by the exact precursor_mz. + //TODO check if charges are the same (in case the charge was actually present in the sum_formula?) + empf.setCharge(precursor_charge); + diascoring.dia_ms1_isotope_scores(precursor_mz, ms1_spectrum, scores.ms1_isotope_correlation, + scores.ms1_isotope_overlap, + empf, drift_lower, drift_upper); + } + else + { + diascoring.dia_ms1_isotope_scores_averagine(precursor_mz, ms1_spectrum, + scores.ms1_isotope_correlation, + scores.ms1_isotope_overlap, precursor_charge, drift_lower, drift_upper); + } + } + } + } + + void OpenSwathScoring::calculateDIAIdScores(OpenSwath::IMRMFeature* imrmfeature, + const TransitionType & transition, + const std::vector& swath_maps, + const OpenMS::DIAScoring & diascoring, + OpenSwath_Scores & scores, + double drift_lower, double drift_upper) + { + OPENMS_PRECONDITION(imrmfeature != nullptr, "Feature to be scored cannot be null"); + OPENMS_PRECONDITION(swath_maps.size() > 0, "There needs to be at least one swath map."); + + // Identify corresponding SONAR maps (if more than one map is used) + std::vector used_swath_maps; + if (swath_maps.size() > 1) + { + double precursor_mz = transition.getPrecursorMZ(); + for (size_t i = 0; i < swath_maps.size(); ++i) + { + if (swath_maps[i].ms1) {continue;} // skip MS1 + if (precursor_mz > swath_maps[i].lower && precursor_mz < swath_maps[i].upper) + { + used_swath_maps.push_back(swath_maps[i]); + } + } + } + else + { + used_swath_maps = swath_maps; + } + + // find spectrum that is closest to the apex of the peak using binary search + std::vector spectrum = fetchSpectrumSwath(used_swath_maps, imrmfeature->getRT(), add_up_spectra_, drift_lower, drift_upper); + + // If no charge is given, we assume it to be 1 + int putative_product_charge = 1; + if (transition.getProductChargeState() != 0) + { + putative_product_charge = transition.getProductChargeState(); + } + + // Isotope correlation / overlap score: Is this peak part of an + // isotopic pattern or is it the monoisotopic peak in an isotopic + // pattern? + diascoring.dia_ms1_isotope_scores_averagine(transition.getProductMZ(), + spectrum, + scores.isotope_correlation, + scores.isotope_overlap, + putative_product_charge, + drift_lower, + drift_upper); + // Mass deviation score + diascoring.dia_ms1_massdiff_score(transition.getProductMZ(), spectrum, scores.massdev_score, drift_lower, drift_upper); + } + + void OpenSwathScoring::calculateChromatographicScores( + OpenSwath::IMRMFeature* imrmfeature, + const std::vector& native_ids, + const std::vector& precursor_ids, + const std::vector& normalized_library_intensity, + std::vector& signal_noise_estimators, + OpenSwath_Scores & scores) const + { + OPENMS_PRECONDITION(imrmfeature != nullptr, "Feature to be scored cannot be null"); + OpenSwath::MRMScoring mrmscore_; + if (su_.use_coelution_score_ || su_.use_shape_score_ || (!imrmfeature->getPrecursorIDs().empty() && su_.use_ms1_correlation)) + mrmscore_.initializeXCorrMatrix(imrmfeature, native_ids); + + // XCorr score (coelution) + if (su_.use_coelution_score_) + { + scores.xcorr_coelution_score = mrmscore_.calcXcorrCoelutionScore(); + scores.weighted_coelution_score = mrmscore_.calcXcorrCoelutionWeightedScore(normalized_library_intensity); + } + + // XCorr score (shape) + // mean over the intensities at the max of the crosscorrelation + // FEATURE : weigh by the intensity as done by mQuest + // FEATURE : normalize with the intensity at the peak group apex? + if (su_.use_shape_score_) + { + scores.xcorr_shape_score = mrmscore_.calcXcorrShapeScore(); + scores.weighted_xcorr_shape = mrmscore_.calcXcorrShapeWeightedScore(normalized_library_intensity); + } + + // check that the MS1 feature is present and that the MS1 correlation should be calculated + if (!imrmfeature->getPrecursorIDs().empty() && su_.use_ms1_correlation) + { + // we need at least two precursor isotopes + if (precursor_ids.size() > 1) + { + mrmscore_.initializeXCorrPrecursorMatrix(imrmfeature, precursor_ids); + scores.ms1_xcorr_coelution_score = mrmscore_.calcXcorrPrecursorCoelutionScore(); + scores.ms1_xcorr_shape_score = mrmscore_.calcXcorrPrecursorShapeScore(); + } + mrmscore_.initializeXCorrPrecursorContrastMatrix(imrmfeature, precursor_ids, native_ids); // perform cross-correlation on monoisotopic precursor + scores.ms1_xcorr_coelution_contrast_score = mrmscore_.calcXcorrPrecursorContrastCoelutionScore(); + scores.ms1_xcorr_shape_contrast_score = mrmscore_.calcXcorrPrecursorContrastShapeScore(); + + mrmscore_.initializeXCorrPrecursorCombinedMatrix(imrmfeature, precursor_ids, native_ids); // perform cross-correlation on monoisotopic precursor + scores.ms1_xcorr_coelution_combined_score = mrmscore_.calcXcorrPrecursorCombinedCoelutionScore(); + scores.ms1_xcorr_shape_combined_score = mrmscore_.calcXcorrPrecursorCombinedShapeScore(); + } + + if (su_.use_nr_peaks_score_) + { + scores.nr_peaks = boost::numeric_cast(imrmfeature->size()); + } + + // Signal to noise scoring + if (su_.use_sn_score_) + { + scores.sn_ratio = mrmscore_.calcSNScore(imrmfeature, signal_noise_estimators); + // everything below S/N 1 can be set to zero (and the log safely applied) + if (scores.sn_ratio < 1) + { + scores.log_sn_score = 0; + } + else + { + scores.log_sn_score = std::log(scores.sn_ratio); + } + } + + // Mutual information scoring + if (su_.use_mi_score_) + { + mrmscore_.initializeMIMatrix(imrmfeature, native_ids); + scores.mi_score = mrmscore_.calcMIScore(); + scores.weighted_mi_score = mrmscore_.calcMIWeightedScore(normalized_library_intensity); + } + + // check that the MS1 feature is present and that the MS1 MI should be calculated + if (!imrmfeature->getPrecursorIDs().empty() && su_.use_ms1_mi) + { + // we need at least two precursor isotopes + if (precursor_ids.size() > 1) + { + mrmscore_.initializeMIPrecursorMatrix(imrmfeature, precursor_ids); + scores.ms1_mi_score = mrmscore_.calcMIPrecursorScore(); + } + mrmscore_.initializeMIPrecursorContrastMatrix(imrmfeature, precursor_ids, native_ids); + scores.ms1_mi_contrast_score = mrmscore_.calcMIPrecursorContrastScore(); + + mrmscore_.initializeMIPrecursorCombinedMatrix(imrmfeature, precursor_ids, native_ids); + scores.ms1_mi_combined_score = mrmscore_.calcMIPrecursorCombinedScore(); + } + } + + void OpenSwathScoring::calculateChromatographicIdScores( + OpenSwath::IMRMFeature* imrmfeature, + const std::vector& native_ids_identification, + const std::vector& native_ids_detection, + std::vector& signal_noise_estimators, + OpenSwath_Ind_Scores & idscores) const + { + OPENMS_PRECONDITION(imrmfeature != nullptr, "Feature to be scored cannot be null"); + OpenSwath::MRMScoring mrmscore_; + mrmscore_.initializeXCorrContrastMatrix(imrmfeature, native_ids_identification, native_ids_detection); + + if (su_.use_coelution_score_) + { + idscores.ind_xcorr_coelution_score = mrmscore_.calcSeparateXcorrContrastCoelutionScore(); + } + + if (su_.use_shape_score_) + { + idscores.ind_xcorr_shape_score = mrmscore_.calcSeparateXcorrContrastShapeScore(); + } + + // Signal to noise scoring + if (su_.use_sn_score_) + { + idscores.ind_log_sn_score = mrmscore_.calcSeparateSNScore(imrmfeature, signal_noise_estimators); + } + + // Mutual information scoring + if (su_.use_mi_score_) + { + mrmscore_.initializeMIContrastMatrix(imrmfeature, native_ids_identification, native_ids_detection); + idscores.ind_mi_score = mrmscore_.calcSeparateMIContrastScore(); + } + } + + void OpenSwathScoring::calculateLibraryScores( + OpenSwath::IMRMFeature* imrmfeature, + const std::vector & transitions, + const CompoundType& pep, + const double normalized_feature_rt, + OpenSwath_Scores & scores) + { + OPENMS_PRECONDITION(imrmfeature != nullptr, "Feature to be scored cannot be null"); + std::vector normalized_library_intensity; + getNormalized_library_intensities_(transitions, normalized_library_intensity); + + std::vector native_ids; + native_ids.reserve(transitions.size()); + for (const auto& trans : transitions) + { + native_ids.push_back(trans.getNativeID()); + } + + if (su_.use_library_score_) + { + OpenSwath::MRMScoring::calcLibraryScore(imrmfeature, transitions, + scores.library_corr, scores.library_norm_manhattan, scores.library_manhattan, + scores.library_dotprod, scores.library_sangle, scores.library_rootmeansquare); + } + + // Retention time score + if (su_.use_rt_score_) + { + // rt score is delta iRT + double normalized_experimental_rt = normalized_feature_rt; + double rt_score = OpenSwath::MRMScoring::calcRTScore(pep, normalized_experimental_rt); + + scores.normalized_experimental_rt = normalized_experimental_rt; + scores.raw_rt_score = rt_score; + scores.norm_rt_score = rt_score / rt_normalization_factor_; + } + } + + void OpenSwathScoring::getNormalized_library_intensities_(const std::vector & transitions, + std::vector& normalized_library_intensity) + { + normalized_library_intensity.clear(); + for (Size i = 0; i < transitions.size(); i++) + { + normalized_library_intensity.push_back(transitions[i].getLibraryIntensity()); + } + for (Size i = 0; i < normalized_library_intensity.size(); i++) + { + // the library intensity should never be below zero + if (normalized_library_intensity[i] < 0.0) { normalized_library_intensity[i] = 0.0; } + } + OpenSwath::Scoring::normalize_sum(&normalized_library_intensity[0], boost::numeric_cast(normalized_library_intensity.size())); + } + + std::vector OpenSwathScoring::fetchSpectrumSwath(OpenSwath::SpectrumAccessPtr swathmap, double RT, int nr_spectra_to_add, double drift_lower, double drift_upper) + { + + std::vector all_spectra = fetchMultipleSpectra_(swathmap, RT, nr_spectra_to_add); + if (spectra_addition_method_ == "simple") + { + return all_spectra; + } + else + { + std::vector spectrum_out; + spectrum_out.push_back(getAddedSpectra_(all_spectra, drift_lower, drift_upper)); + return spectrum_out; + } + } + + std::vector OpenSwathScoring::fetchSpectrumSwath(std::vector swath_maps, double RT, int nr_spectra_to_add, double drift_lower, double drift_upper) + { + OPENMS_PRECONDITION(nr_spectra_to_add >= 1, "nr_spectra_to_add must be at least 1.") + OPENMS_PRECONDITION(!swath_maps.empty(), "swath_maps vector cannot be empty") + + // This is not SONAR data + if (swath_maps.size() == 1) + { + return fetchSpectrumSwath(swath_maps[0].sptr, RT, nr_spectra_to_add, drift_lower, drift_upper); + } + else + { + // multiple SWATH maps for a single precursor -> this is SONAR data, in all cases only return a single spectrum + std::vector all_spectra; + for (size_t i = 0; i < swath_maps.size(); ++i) + { + std::vector spectrum_vector = fetchMultipleSpectra_(swath_maps[i].sptr, RT, nr_spectra_to_add); + OpenSwath::SpectrumPtr spec = getAddedSpectra_(spectrum_vector, drift_lower, drift_upper); + all_spectra.push_back(spec); + } + OpenSwath::SpectrumPtr spectrum_ = SpectrumAddition::addUpSpectra(all_spectra, spacing_for_spectra_resampling_, true); + std::vector spectrum_out; + spectrum_out.push_back(spectrum_); + return spectrum_out; + } + } + + OpenSwath::SpectrumPtr OpenSwathScoring::filterByDrift_(const OpenSwath::SpectrumPtr& input, const double drift_lower, const double drift_upper) + { + // NOTE: this function is very inefficient because filtering unsorted array + OPENMS_PRECONDITION(drift_upper > 0, "Cannot filter by drift time if upper value is less or equal to zero"); + //OPENMS_PRECONDITION(input->getDriftTimeArray() != nullptr, "Cannot filter by drift time if no drift time is available."); + + if (input->getDriftTimeArray() == nullptr) + { + std::cerr << "Warning: Cannot filter by drift time if no drift time is available.\n"; + return input; + } + + OpenSwath::SpectrumPtr output(new OpenSwath::Spectrum); + + OpenSwath::BinaryDataArrayPtr mz_arr = input->getMZArray(); + OpenSwath::BinaryDataArrayPtr int_arr = input->getIntensityArray(); + OpenSwath::BinaryDataArrayPtr im_arr = input->getDriftTimeArray(); + + std::vector::const_iterator mz_it = mz_arr->data.begin(); + std::vector::const_iterator int_it = int_arr->data.begin(); + std::vector::const_iterator im_it = im_arr->data.begin(); + std::vector::const_iterator mz_end = mz_arr->data.end(); + + OpenSwath::BinaryDataArrayPtr mz_arr_out(new OpenSwath::BinaryDataArray); + OpenSwath::BinaryDataArrayPtr intens_arr_out(new OpenSwath::BinaryDataArray); + OpenSwath::BinaryDataArrayPtr im_arr_out(new OpenSwath::BinaryDataArray); + im_arr_out->description = im_arr->description; + + size_t n = mz_arr->data.size(); + im_arr_out->data.reserve(n); + while (mz_it != mz_end) + { + if (*im_it > drift_lower && *im_it < drift_upper) + { + mz_arr_out->data.push_back( *mz_it ); + intens_arr_out->data.push_back( *int_it ); + im_arr_out->data.push_back( *im_it ); + } + ++mz_it; + ++int_it; + ++im_it; + } + output->setMZArray(mz_arr_out); + output->setIntensityArray(intens_arr_out); + output->getDataArrays().push_back(im_arr_out); + return output; + } + + std::vector OpenSwathScoring::fetchMultipleSpectra_(const OpenSwath::SpectrumAccessPtr& swath_map, + double RT, int nr_spectra_to_fetch) + { + std::vector indices = swath_map->getSpectraByRT(RT, 0.0); + std::vector all_spectra; + + if (indices.empty() ) + { + return all_spectra; + } + int closest_idx = boost::numeric_cast(indices[0]); + if (indices[0] != 0 && + std::fabs(swath_map->getSpectrumMetaById(boost::numeric_cast(indices[0]) - 1).RT - RT) < + std::fabs(swath_map->getSpectrumMetaById(boost::numeric_cast(indices[0])).RT - RT)) + { + closest_idx--; + } + + all_spectra.push_back(swath_map->getSpectrumById(closest_idx)); + for (int i = 1; i <= nr_spectra_to_fetch / 2; i++) // cast to int is intended! + { + if (closest_idx - i >= 0) + { + all_spectra.push_back(swath_map->getSpectrumById(closest_idx - i)); + } + if (closest_idx + i < (int)swath_map->getNrSpectra()) + { + all_spectra.push_back(swath_map->getSpectrumById(closest_idx + i)); + } + } + + return all_spectra; + } + + OpenSwath::SpectrumPtr OpenSwathScoring::getAddedSpectra_(std::vector& all_spectra, const double drift_lower, const double drift_upper) + { + OpenSwath::SpectrumPtr added_spec(new OpenSwath::Spectrum); + added_spec->getDataArrays().push_back( OpenSwath::BinaryDataArrayPtr(new OpenSwath::BinaryDataArray) ); + added_spec->getDataArrays().back()->description = "Ion Mobility"; + + + // If no spectra found return + if (all_spectra.empty()) + { + return added_spec; + } + + // For spectrum resampling since ion mobility is not supported filter by IM window + if (drift_lower > 0) + { + + for (auto& s: all_spectra) s = filterByDrift_(s, drift_lower, drift_upper); + } + + // If only one spectrum, no adding to be done + if (all_spectra.size() == 1) + { + return all_spectra[0]; + } + + // Otherwise do spectrum addition + if (spectra_addition_method_ == "simple") + { + // Ensure that we have the same number of data arrays as in the input spectrum + if (!all_spectra.empty() && all_spectra[0]->getDataArrays().size() > 2) + { + for (Size k = 2; k < all_spectra[0]->getDataArrays().size(); k++) + { + OpenSwath::BinaryDataArrayPtr tmp (new OpenSwath::BinaryDataArray()); + tmp->description = all_spectra[0]->getDataArrays()[k]->description; + added_spec->getDataArrays().push_back(tmp); + } + } + + // Simply add up data and sort in the end + for (const auto& s : all_spectra) + { + for (Size k = 0; k < s->getDataArrays().size(); k++) + { + auto& v1 = added_spec->getDataArrays()[k]->data; + auto& v2 = s->getDataArrays()[k]->data; + + v1.reserve( v1.size() + v2.size() ); + v1.insert( v1.end(), v2.begin(), v2.end() ); + } + } + sortSpectrumByMZ(*added_spec); + } + else + { + added_spec = SpectrumAddition::addUpSpectra(all_spectra, spacing_for_spectra_resampling_, true); + } + + OPENMS_POSTCONDITION( std::adjacent_find(added_spec->getMZArray()->data.begin(), + added_spec->getMZArray()->data.end(), std::greater()) == added_spec->getMZArray()->data.end(), + "Postcondition violated: m/z vector needs to be sorted!" ) + + return added_spec; + } + +} diff --git a/src/tests/class_tests/openms/executables.cmake b/src/tests/class_tests/openms/executables.cmake index bf244972247..6b1b76346a1 100644 --- a/src/tests/class_tests/openms/executables.cmake +++ b/src/tests/class_tests/openms/executables.cmake @@ -713,6 +713,7 @@ if(NOT DISABLE_OPENSWATH) CachedMzML_test CachedMzMLHandler_test HDF5_test + OpenSwathIsotopeGeneratorCacher_test ) endif(NOT DISABLE_OPENSWATH) diff --git a/src/tests/class_tests/openms/source/OpenSwathIsotopeGeneratorCacher_test.cpp b/src/tests/class_tests/openms/source/OpenSwathIsotopeGeneratorCacher_test.cpp new file mode 100644 index 00000000000..3bb1577c5fa --- /dev/null +++ b/src/tests/class_tests/openms/source/OpenSwathIsotopeGeneratorCacher_test.cpp @@ -0,0 +1,63 @@ +// -------------------------------------------------------------------------- +// OpenMS -- Open-Source Mass Spectrometry +// -------------------------------------------------------------------------- +// Copyright The OpenMS Team -- Eberhard Karls University Tuebingen, +// ETH Zurich, and Freie Universitaet Berlin 2002-2022. +// +// This software is released under a three-clause BSD license: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of any author or any participating institution +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// For a full list of authors, refer to the file AUTHORS. +// -------------------------------------------------------------------------- +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING +// INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// -------------------------------------------------------------------------- +// $Maintainer: Joshua Charkow $ +// $Authors: Joshua Charkow $ +// -------------------------------------------------------------------------- + +#include +#include + +using namespace std; +using namespace OpenMS; + + +///////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////// +START_TEST(OpenSwathIsotopeGeneratorCacher, "$Id$") + + +START_SECTION(OpenSwathIsotopeGeneratorCacher(Size max_isotope, double massStep)) +{ + IsotopeDistribution* nullPointer = nullptr; + OpenSwathIsotopeGeneratorCacher* ptr = new OpenSwathIsotopeGeneratorCacher(0, 0.5); + Size max_isotope = ptr->getMaxIsotope(); + TEST_EQUAL(max_isotope, 0) + TEST_EQUAL(ptr->getRoundMasses(), false) + TEST_EQUAL(ptr->getMassStep(), 0.5) + TEST_EQUAL(ptr->getHalfMassStep(), 0.25) + TEST_NOT_EQUAL(ptr, nullPointer) + delete ptr; +} +END_SECTION + +///////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////// +END_TEST From 1f0dd24b60ddc2702fbf207ee1c1379834f9e116 Mon Sep 17 00:00:00 2001 From: jcharkow Date: Wed, 21 Dec 2022 14:06:21 -0500 Subject: [PATCH 02/13] [API] got OpenSwathIsotopeGeneratorCacher building --- .../OpenSwathIsotopeGeneratorCacher.h | 23 +- .../OpenSwathIsotopeGeneratorCacher.cpp | 775 +----------------- .../source/ANALYSIS/OPENSWATH/sources.cmake | 1 + 3 files changed, 54 insertions(+), 745 deletions(-) diff --git a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.h b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.h index 7d60c90cc52..4af73092822 100644 --- a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.h +++ b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.h @@ -57,20 +57,20 @@ namespace OpenMS - std::map cachedDistributions_; double massStep_; // step between adjacent isotope distributions double halfMassStep_; // cached value of half of the mass step CoarseIsotopePatternGenerator solver_; // used for computing the isotope distributions - + std::map cachedIsotopeDistributions_; public: OpenSwathIsotopeGeneratorCacher(const Size max_isotope, const double massStep, const bool round_masses = false): - solver_(max_isotope, round_masses), massStep_(massStep), - halfMassStep_(massStep / 2.0) + halfMassStep_(massStep / 2.0), + solver_(max_isotope, round_masses), + cachedIsotopeDistributions_() { - } + }; /// Destructor ~OpenSwathIsotopeGeneratorCacher(); @@ -81,13 +81,12 @@ namespace OpenMS * * Sets the parameters for the scoring. * - * @param mzStart - start mz to generate a theoretical isotope distribtuion - * @param mzEnd - end mz to generate a theoretical isotope distribtuion - * @param mzStep - step between precursor mz for isotope distribution + * @param massStart - start mz to generate a theoretical isotope distribtuion + * @param massEnd - end mz to generate a theoretical isotope distribtuion + * @param massStep - step between precursor mz for isotope distribution * */ - void initialize(double mzStart, - int mzEnd); + void initialize(double massStart, double massEnd, double massStep); double getMassStep(); @@ -116,8 +115,8 @@ namespace OpenMS * charge - charge of mz * mannmass - ??? */ - IsotopeDistribution get(double mz, int charge, const double mannmass = 1.00048) const; - + //std::vector> get(double mz, int charge, const double mannmass = 1.00048) const; + std::vector> get(double mz, int charge, const double mannmass = 1.00048); }; } diff --git a/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.cpp b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.cpp index 4821eb584a9..4412d4904c5 100644 --- a/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.cpp +++ b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.cpp @@ -32,6 +32,7 @@ // $Authors: Joshua Charkow$ // -------------------------------------------------------------------------- +#include // theoretical spectrum generator #include #include @@ -40,790 +41,98 @@ namespace OpenMS { + // + OpenSwathIsotopeGeneratorCacher::~OpenSwathIsotopeGeneratorCacher() = default; // note these should be mass values rather than m/z values - void initialize(double massStart, - double massEnd) + void OpenSwathIsotopeGeneratorCacher::initialize(double massStart, double massEnd, double massStep) { massStep_ = massStep; - for (double mass=massStart; mass < massEnd; mass += massStep) + for (double mass=massStart; mass < massEnd; mass += massStep_) { - std::cout << "Current mz: " << mz << std::endl; - charge = std::abs(charge); - // create the theoretical distribution - + std::cout << "Current mz: " << mass << std::endl; + // create the theoretical distribution //Note: this is a rough estimate of the weight, usually the protons should be deducted first, left for backwards compatibility. - IsotopeDistribution dist = solver.estimateFromPeptideWeight(mass); - - - cachedDistributions_[mass] = dist; - } - - - double getMassStep() - { - return massStep_; - } - - double getHalfMassStep() - { - return halfMassStep_; - } - - int getMaxIsotope() - { - return solver_.getMaxIsotope(); - } - - bool getRoundMasses() - { - return sovler_.getRoundMasses() - } - - - IsotopeDistribution addEntry(mass) - { - IsotopeDistribution dist = solver.estimateFromPeptideWeight(mass); - cachedDistributions_[mass] = dist; - return dist; - } - - IsotopeDistribution get(double mass) - { - auto ele = std::upper_bound(cachedDistribution_.begin(), cachedDistribution_.end(), mass); - - auto ptrNext += ele; - - - if ele->first - if (ele == cachedDistribution_.end()) - { - // element is not found so create an entry - return addEntry(mass); - } - else if (std::abs(ele->first - mass) > halfMassStep_) // element found is more than halfMassStep_ away from target mass so generate a new entry - { - return addEntry(mass); - } - else // there is a current cached distribution that can be used! - { - auto ptrPrevious -= ele; - - if (std::abs(ptrPrevious->first - mass) < std::abs(ele->first - mass)) // check if the previous entry is closer than the first entry - { - return ptrPrevious->second; - } - else - { - return ele->second; - } - } - } - + IsotopeDistribution dist = solver_.estimateFromPeptideWeight(mass); - std::vector > get(double mz, int charge, const double mannmass) - { - IsotopeDistribution distribution = get(mz * charge); - - std::vector > isotopes_spec; - for (IsotopeDistribution::Iterator it = isotopeDist.begin(); it != isotopeDist.end(); ++it) - { - isotopes_spec.emplace_back(mass, it->getIntensity()); - mass += mannmass / charge; + cachedIsotopeDistributions_[mass] = dist; } - - return isotopes_spec; } - - - - - - - - - for (IsotopeDistribution::Iterator it = d.begin(); it != d.end(); ++it) - { - isotopes_spec.emplace_back(mass, it->getIntensity()); - mass += mannmass / charge; - } - } //end of dia_isotope_corr_sub - - - - 2 /** @brief gets the theoretical spectrum corresponding with the provided m/z value - 1 */ -87 IsotopeDistribution get() const; - - void initialize( - void sortSpectrumByMZ(OpenSwath::Spectrum& spec) + double OpenSwathIsotopeGeneratorCacher::getMassStep() { - //sort index list - std::vector > sorted_indices; - sorted_indices.reserve(spec.getMZArray()->data.size()); - auto mz_it = spec.getMZArray()->data.begin(); - for (Size i = 0; i < spec.getMZArray()->data.size(); ++i) - { - sorted_indices.emplace_back(*mz_it, i); - ++mz_it; - } - std::stable_sort(sorted_indices.begin(), sorted_indices.end()); - - // extract list of indices - std::vector select_indices; - select_indices.reserve(sorted_indices.size()); - for (const auto& sidx : sorted_indices) - { - select_indices.push_back(sidx.second); - } - - for (auto& da : spec.getDataArrays() ) - { - if (da->data.empty()) continue; - OpenSwath::BinaryDataArrayPtr tmp(new OpenSwath::BinaryDataArray); - tmp->description = da->description; - tmp->data.reserve(select_indices.size()); - for (Size i = 0; i < select_indices.size(); ++i) - { - tmp->data.push_back( da->data[ select_indices[i] ] ); - } - da = tmp; - } - - OPENMS_POSTCONDITION( std::adjacent_find(spec.getMZArray()->data.begin(), - spec.getMZArray()->data.end(), std::greater()) == spec.getMZArray()->data.end(), - "Postcondition violated: m/z vector needs to be sorted!" ) + return massStep_; } -} - -namespace OpenMS -{ - /// Constructor - OpenSwathScoring::OpenSwathScoring() : - rt_normalization_factor_(1.0), - spacing_for_spectra_resampling_(0.005), - add_up_spectra_(1), - spectra_addition_method_("simple"), - im_drift_extra_pcnt_(0.0) + double OpenSwathIsotopeGeneratorCacher::getHalfMassStep() { + return halfMassStep_; } - /// Destructor - OpenSwathScoring::~OpenSwathScoring() = default; - - void OpenSwathScoring::initialize(double rt_normalization_factor, - int add_up_spectra, - double spacing_for_spectra_resampling, - const double drift_extra, - const OpenSwath_Scores_Usage & su, - const std::string& spectrum_addition_method) + int OpenSwathIsotopeGeneratorCacher::getMaxIsotope() { - this->rt_normalization_factor_ = rt_normalization_factor; - this->add_up_spectra_ = add_up_spectra; - this->spectra_addition_method_ = spectrum_addition_method; - this->im_drift_extra_pcnt_ = drift_extra; - this->spacing_for_spectra_resampling_ = spacing_for_spectra_resampling; - this->su_ = su; + return solver_.getMaxIsotope(); } - void OpenSwathScoring::calculateDIAScores(OpenSwath::IMRMFeature* imrmfeature, - const std::vector& transitions, - const std::vector& swath_maps, - const OpenSwath::SpectrumAccessPtr& ms1_map, - const OpenMS::DIAScoring& diascoring, - const CompoundType& compound, - OpenSwath_Scores& scores, - std::vector& masserror_ppm, - const double drift_lower, - const double drift_upper, - const double drift_target) + bool OpenSwathIsotopeGeneratorCacher::getRoundMasses() { - OPENMS_PRECONDITION(imrmfeature != nullptr, "Feature to be scored cannot be null"); - OPENMS_PRECONDITION(transitions.size() > 0, "There needs to be at least one transition."); - OPENMS_PRECONDITION(swath_maps.size() > 0, "There needs to be at least one swath map."); - - // Identify corresponding SONAR maps (if more than one map is used) - std::vector used_swath_maps; - if (swath_maps.size() > 1 || transitions.empty()) - { - double precursor_mz = transitions[0].getPrecursorMZ(); - for (size_t i = 0; i < swath_maps.size(); ++i) - { - if (swath_maps[i].ms1) {continue;} // skip MS1 - if (precursor_mz > swath_maps[i].lower && precursor_mz < swath_maps[i].upper) - { - used_swath_maps.push_back(swath_maps[i]); - } - } - } - else - { - used_swath_maps = swath_maps; - } - - std::vector normalized_library_intensity; - getNormalized_library_intensities_(transitions, normalized_library_intensity); - - // find spectrum that is closest to the apex of the peak using binary search - std::vector spectra = fetchSpectrumSwath(used_swath_maps, imrmfeature->getRT(), add_up_spectra_, drift_lower, drift_upper); - - // set the DIA parameters - double dia_extract_window_ = (double)diascoring.getParameters().getValue("dia_extraction_window"); - bool dia_extraction_ppm_ = diascoring.getParameters().getValue("dia_extraction_unit") == "ppm"; - - // score drift time dimension - if ( su_.use_im_scores) - { - IonMobilityScoring::driftScoring(spectra, transitions, scores, - drift_lower, drift_upper, drift_target, - dia_extract_window_, dia_extraction_ppm_, - false, im_drift_extra_pcnt_); - } - - - // Mass deviation score - diascoring.dia_massdiff_score(transitions, spectra, normalized_library_intensity, scores.massdev_score, scores.weighted_massdev_score, masserror_ppm, drift_lower, drift_upper); - - //TODO this score and the next, both rely on the CoarseIsotope of the PeptideAveragine. Maybe we could - // DIA dotproduct and manhattan score based on library intensity and sum formula if present - if (su_.use_ms2_isotope_scores) - { - diascoring.score_with_isotopes(spectra, transitions, scores.dotprod_score_dia, scores.manhatt_score_dia, drift_lower, drift_upper); - - // Isotope correlation / overlap score: Is this peak part of an - // isotopic pattern or is it the monoisotopic peak in an isotopic - // pattern? - // Currently this is computed for an averagine model of a peptide so its - // not optimal for metabolites - but better than nothing, given that for - // most fragments we don't really know their composition - diascoring - .dia_isotope_scores(transitions, spectra, imrmfeature, scores.isotope_correlation, scores.isotope_overlap, drift_lower, drift_upper); - } - - // Peptide-specific scores (only useful, when product transitions are REAL fragments, e.g. not in FFID) - // and only if sequence is known (non-empty) - if (compound.isPeptide() && !compound.sequence.empty() && su_.use_ionseries_scores) - { - // Presence of b/y series score - OpenMS::AASequence aas; - int by_charge_state = 1; // for which charge states should we check b/y series - OpenSwathDataAccessHelper::convertPeptideToAASequence(compound, aas); - diascoring.dia_by_ion_score(spectra, aas, by_charge_state, scores.bseries_score, scores.yseries_score, drift_lower, drift_upper); - } - - if (ms1_map && ms1_map->getNrSpectra() > 0) - { - double precursor_mz = transitions[0].precursor_mz; - double rt = imrmfeature->getRT(); - - calculatePrecursorDIAScores(ms1_map, diascoring, precursor_mz, rt, compound, scores, drift_lower, drift_upper); - } - - - if ( (ms1_map && ms1_map->getNrSpectra() > 0) && ( su_.use_im_scores) ) // IM MS1 scores - { - double dia_extract_window_ = (double)diascoring.getParameters().getValue("dia_extraction_window"); - bool dia_extraction_ppm_ = diascoring.getParameters().getValue("dia_extraction_unit") == "ppm"; - double rt = imrmfeature->getRT(); - - std::vector ms1_spectrum = fetchSpectrumSwath(ms1_map, rt, add_up_spectra_, drift_lower, drift_upper); - IonMobilityScoring::driftScoringMS1(ms1_spectrum, - transitions, scores, drift_lower, drift_upper, drift_target, dia_extract_window_, dia_extraction_ppm_, false, im_drift_extra_pcnt_); - - IonMobilityScoring::driftScoringMS1Contrast(spectra, ms1_spectrum, - transitions, scores, drift_lower, drift_upper, dia_extract_window_, dia_extraction_ppm_, im_drift_extra_pcnt_); - } + return solver_.getRoundMasses(); } - void OpenSwathScoring::calculatePrecursorDIAScores(const OpenSwath::SpectrumAccessPtr& ms1_map, - const OpenMS::DIAScoring & diascoring, - double precursor_mz, - double rt, - const CompoundType& compound, - OpenSwath_Scores & scores, - double drift_lower, double drift_upper) - { - // Compute precursor-level scores: - // - compute mass difference in ppm - // - compute isotopic pattern score - if (ms1_map && ms1_map->getNrSpectra() > 0) - { - std::vector ms1_spectrum = fetchSpectrumSwath(ms1_map, rt, add_up_spectra_, drift_lower, drift_upper); - diascoring.dia_ms1_massdiff_score(precursor_mz, ms1_spectrum, scores.ms1_ppm_score, drift_lower, drift_upper); - - // derive precursor charge state (get from data if possible) - int precursor_charge = 1; - if (compound.getChargeState() != 0) - { - precursor_charge = compound.getChargeState(); - } - if (compound.isPeptide()) - { - if (!compound.sequence.empty()) - { - diascoring.dia_ms1_isotope_scores(precursor_mz, ms1_spectrum, scores.ms1_isotope_correlation, - scores.ms1_isotope_overlap, - AASequence::fromString(compound.sequence).getFormula(Residue::Full, precursor_charge), drift_lower, drift_upper); - } - else - { - diascoring.dia_ms1_isotope_scores_averagine(precursor_mz, ms1_spectrum, - scores.ms1_isotope_correlation, - scores.ms1_isotope_overlap, precursor_charge, drift_lower, drift_upper); - } - } - else - { - if (!compound.sequence.empty()) - { - EmpiricalFormula empf{compound.sequence}; - //Note: this only sets the charge to be extracted again in the following function. - // It is not really used in EmpiricalFormula. Also the m/z of the formula is not used since - // it is shadowed by the exact precursor_mz. - //TODO check if charges are the same (in case the charge was actually present in the sum_formula?) - empf.setCharge(precursor_charge); - diascoring.dia_ms1_isotope_scores(precursor_mz, ms1_spectrum, scores.ms1_isotope_correlation, - scores.ms1_isotope_overlap, - empf, drift_lower, drift_upper); - } - else - { - diascoring.dia_ms1_isotope_scores_averagine(precursor_mz, ms1_spectrum, - scores.ms1_isotope_correlation, - scores.ms1_isotope_overlap, precursor_charge, drift_lower, drift_upper); - } - } - } - } - - void OpenSwathScoring::calculateDIAIdScores(OpenSwath::IMRMFeature* imrmfeature, - const TransitionType & transition, - const std::vector& swath_maps, - const OpenMS::DIAScoring & diascoring, - OpenSwath_Scores & scores, - double drift_lower, double drift_upper) + IsotopeDistribution OpenSwathIsotopeGeneratorCacher::addEntry(double mass) { - OPENMS_PRECONDITION(imrmfeature != nullptr, "Feature to be scored cannot be null"); - OPENMS_PRECONDITION(swath_maps.size() > 0, "There needs to be at least one swath map."); - - // Identify corresponding SONAR maps (if more than one map is used) - std::vector used_swath_maps; - if (swath_maps.size() > 1) - { - double precursor_mz = transition.getPrecursorMZ(); - for (size_t i = 0; i < swath_maps.size(); ++i) - { - if (swath_maps[i].ms1) {continue;} // skip MS1 - if (precursor_mz > swath_maps[i].lower && precursor_mz < swath_maps[i].upper) - { - used_swath_maps.push_back(swath_maps[i]); - } - } - } - else - { - used_swath_maps = swath_maps; - } + IsotopeDistribution dist = solver_.estimateFromPeptideWeight(mass); - // find spectrum that is closest to the apex of the peak using binary search - std::vector spectrum = fetchSpectrumSwath(used_swath_maps, imrmfeature->getRT(), add_up_spectra_, drift_lower, drift_upper); - - // If no charge is given, we assume it to be 1 - int putative_product_charge = 1; - if (transition.getProductChargeState() != 0) - { - putative_product_charge = transition.getProductChargeState(); - } - - // Isotope correlation / overlap score: Is this peak part of an - // isotopic pattern or is it the monoisotopic peak in an isotopic - // pattern? - diascoring.dia_ms1_isotope_scores_averagine(transition.getProductMZ(), - spectrum, - scores.isotope_correlation, - scores.isotope_overlap, - putative_product_charge, - drift_lower, - drift_upper); - // Mass deviation score - diascoring.dia_ms1_massdiff_score(transition.getProductMZ(), spectrum, scores.massdev_score, drift_lower, drift_upper); + cachedIsotopeDistributions_[mass] = dist; + return dist; } - void OpenSwathScoring::calculateChromatographicScores( - OpenSwath::IMRMFeature* imrmfeature, - const std::vector& native_ids, - const std::vector& precursor_ids, - const std::vector& normalized_library_intensity, - std::vector& signal_noise_estimators, - OpenSwath_Scores & scores) const + IsotopeDistribution OpenSwathIsotopeGeneratorCacher::get(double mass) { - OPENMS_PRECONDITION(imrmfeature != nullptr, "Feature to be scored cannot be null"); - OpenSwath::MRMScoring mrmscore_; - if (su_.use_coelution_score_ || su_.use_shape_score_ || (!imrmfeature->getPrecursorIDs().empty() && su_.use_ms1_correlation)) - mrmscore_.initializeXCorrMatrix(imrmfeature, native_ids); + auto ele = cachedIsotopeDistributions_.upper_bound(mass); + //auto ele = std::upper_bound(cachedIsotopeDistributions_.begin(), cachedIsotopeDistributions_.end(), mass); - // XCorr score (coelution) - if (su_.use_coelution_score_) - { - scores.xcorr_coelution_score = mrmscore_.calcXcorrCoelutionScore(); - scores.weighted_coelution_score = mrmscore_.calcXcorrCoelutionWeightedScore(normalized_library_intensity); - } - // XCorr score (shape) - // mean over the intensities at the max of the crosscorrelation - // FEATURE : weigh by the intensity as done by mQuest - // FEATURE : normalize with the intensity at the peak group apex? - if (su_.use_shape_score_) + if (ele == cachedIsotopeDistributions_.end()) { - scores.xcorr_shape_score = mrmscore_.calcXcorrShapeScore(); - scores.weighted_xcorr_shape = mrmscore_.calcXcorrShapeWeightedScore(normalized_library_intensity); + // element is not found so create an entry + return addEntry(mass); } - - // check that the MS1 feature is present and that the MS1 correlation should be calculated - if (!imrmfeature->getPrecursorIDs().empty() && su_.use_ms1_correlation) + else if (std::abs(ele->first - mass) > halfMassStep_) // element found is more than halfMassStep_ away from target mass so generate a new entry { - // we need at least two precursor isotopes - if (precursor_ids.size() > 1) - { - mrmscore_.initializeXCorrPrecursorMatrix(imrmfeature, precursor_ids); - scores.ms1_xcorr_coelution_score = mrmscore_.calcXcorrPrecursorCoelutionScore(); - scores.ms1_xcorr_shape_score = mrmscore_.calcXcorrPrecursorShapeScore(); - } - mrmscore_.initializeXCorrPrecursorContrastMatrix(imrmfeature, precursor_ids, native_ids); // perform cross-correlation on monoisotopic precursor - scores.ms1_xcorr_coelution_contrast_score = mrmscore_.calcXcorrPrecursorContrastCoelutionScore(); - scores.ms1_xcorr_shape_contrast_score = mrmscore_.calcXcorrPrecursorContrastShapeScore(); - - mrmscore_.initializeXCorrPrecursorCombinedMatrix(imrmfeature, precursor_ids, native_ids); // perform cross-correlation on monoisotopic precursor - scores.ms1_xcorr_coelution_combined_score = mrmscore_.calcXcorrPrecursorCombinedCoelutionScore(); - scores.ms1_xcorr_shape_combined_score = mrmscore_.calcXcorrPrecursorCombinedShapeScore(); + return addEntry(mass); } - - if (su_.use_nr_peaks_score_) + else // there is a current cached distribution that can be used! { - scores.nr_peaks = boost::numeric_cast(imrmfeature->size()); - } + auto ptrPrevious = ele--; - // Signal to noise scoring - if (su_.use_sn_score_) - { - scores.sn_ratio = mrmscore_.calcSNScore(imrmfeature, signal_noise_estimators); - // everything below S/N 1 can be set to zero (and the log safely applied) - if (scores.sn_ratio < 1) + if (std::abs(ptrPrevious->first - mass) < std::abs(ele->first - mass)) // check if the previous entry is closer than the first entry { - scores.log_sn_score = 0; + return ptrPrevious->second; } else { - scores.log_sn_score = std::log(scores.sn_ratio); - } - } - - // Mutual information scoring - if (su_.use_mi_score_) - { - mrmscore_.initializeMIMatrix(imrmfeature, native_ids); - scores.mi_score = mrmscore_.calcMIScore(); - scores.weighted_mi_score = mrmscore_.calcMIWeightedScore(normalized_library_intensity); - } - - // check that the MS1 feature is present and that the MS1 MI should be calculated - if (!imrmfeature->getPrecursorIDs().empty() && su_.use_ms1_mi) - { - // we need at least two precursor isotopes - if (precursor_ids.size() > 1) - { - mrmscore_.initializeMIPrecursorMatrix(imrmfeature, precursor_ids); - scores.ms1_mi_score = mrmscore_.calcMIPrecursorScore(); - } - mrmscore_.initializeMIPrecursorContrastMatrix(imrmfeature, precursor_ids, native_ids); - scores.ms1_mi_contrast_score = mrmscore_.calcMIPrecursorContrastScore(); - - mrmscore_.initializeMIPrecursorCombinedMatrix(imrmfeature, precursor_ids, native_ids); - scores.ms1_mi_combined_score = mrmscore_.calcMIPrecursorCombinedScore(); - } - } - - void OpenSwathScoring::calculateChromatographicIdScores( - OpenSwath::IMRMFeature* imrmfeature, - const std::vector& native_ids_identification, - const std::vector& native_ids_detection, - std::vector& signal_noise_estimators, - OpenSwath_Ind_Scores & idscores) const - { - OPENMS_PRECONDITION(imrmfeature != nullptr, "Feature to be scored cannot be null"); - OpenSwath::MRMScoring mrmscore_; - mrmscore_.initializeXCorrContrastMatrix(imrmfeature, native_ids_identification, native_ids_detection); - - if (su_.use_coelution_score_) - { - idscores.ind_xcorr_coelution_score = mrmscore_.calcSeparateXcorrContrastCoelutionScore(); - } - - if (su_.use_shape_score_) - { - idscores.ind_xcorr_shape_score = mrmscore_.calcSeparateXcorrContrastShapeScore(); - } - - // Signal to noise scoring - if (su_.use_sn_score_) - { - idscores.ind_log_sn_score = mrmscore_.calcSeparateSNScore(imrmfeature, signal_noise_estimators); - } - - // Mutual information scoring - if (su_.use_mi_score_) - { - mrmscore_.initializeMIContrastMatrix(imrmfeature, native_ids_identification, native_ids_detection); - idscores.ind_mi_score = mrmscore_.calcSeparateMIContrastScore(); - } - } - - void OpenSwathScoring::calculateLibraryScores( - OpenSwath::IMRMFeature* imrmfeature, - const std::vector & transitions, - const CompoundType& pep, - const double normalized_feature_rt, - OpenSwath_Scores & scores) - { - OPENMS_PRECONDITION(imrmfeature != nullptr, "Feature to be scored cannot be null"); - std::vector normalized_library_intensity; - getNormalized_library_intensities_(transitions, normalized_library_intensity); - - std::vector native_ids; - native_ids.reserve(transitions.size()); - for (const auto& trans : transitions) - { - native_ids.push_back(trans.getNativeID()); - } - - if (su_.use_library_score_) - { - OpenSwath::MRMScoring::calcLibraryScore(imrmfeature, transitions, - scores.library_corr, scores.library_norm_manhattan, scores.library_manhattan, - scores.library_dotprod, scores.library_sangle, scores.library_rootmeansquare); - } - - // Retention time score - if (su_.use_rt_score_) - { - // rt score is delta iRT - double normalized_experimental_rt = normalized_feature_rt; - double rt_score = OpenSwath::MRMScoring::calcRTScore(pep, normalized_experimental_rt); - - scores.normalized_experimental_rt = normalized_experimental_rt; - scores.raw_rt_score = rt_score; - scores.norm_rt_score = rt_score / rt_normalization_factor_; - } - } - - void OpenSwathScoring::getNormalized_library_intensities_(const std::vector & transitions, - std::vector& normalized_library_intensity) - { - normalized_library_intensity.clear(); - for (Size i = 0; i < transitions.size(); i++) - { - normalized_library_intensity.push_back(transitions[i].getLibraryIntensity()); - } - for (Size i = 0; i < normalized_library_intensity.size(); i++) - { - // the library intensity should never be below zero - if (normalized_library_intensity[i] < 0.0) { normalized_library_intensity[i] = 0.0; } - } - OpenSwath::Scoring::normalize_sum(&normalized_library_intensity[0], boost::numeric_cast(normalized_library_intensity.size())); - } - - std::vector OpenSwathScoring::fetchSpectrumSwath(OpenSwath::SpectrumAccessPtr swathmap, double RT, int nr_spectra_to_add, double drift_lower, double drift_upper) - { - - std::vector all_spectra = fetchMultipleSpectra_(swathmap, RT, nr_spectra_to_add); - if (spectra_addition_method_ == "simple") - { - return all_spectra; - } - else - { - std::vector spectrum_out; - spectrum_out.push_back(getAddedSpectra_(all_spectra, drift_lower, drift_upper)); - return spectrum_out; - } - } - - std::vector OpenSwathScoring::fetchSpectrumSwath(std::vector swath_maps, double RT, int nr_spectra_to_add, double drift_lower, double drift_upper) - { - OPENMS_PRECONDITION(nr_spectra_to_add >= 1, "nr_spectra_to_add must be at least 1.") - OPENMS_PRECONDITION(!swath_maps.empty(), "swath_maps vector cannot be empty") - - // This is not SONAR data - if (swath_maps.size() == 1) - { - return fetchSpectrumSwath(swath_maps[0].sptr, RT, nr_spectra_to_add, drift_lower, drift_upper); - } - else - { - // multiple SWATH maps for a single precursor -> this is SONAR data, in all cases only return a single spectrum - std::vector all_spectra; - for (size_t i = 0; i < swath_maps.size(); ++i) - { - std::vector spectrum_vector = fetchMultipleSpectra_(swath_maps[i].sptr, RT, nr_spectra_to_add); - OpenSwath::SpectrumPtr spec = getAddedSpectra_(spectrum_vector, drift_lower, drift_upper); - all_spectra.push_back(spec); + return ele->second; } - OpenSwath::SpectrumPtr spectrum_ = SpectrumAddition::addUpSpectra(all_spectra, spacing_for_spectra_resampling_, true); - std::vector spectrum_out; - spectrum_out.push_back(spectrum_); - return spectrum_out; } - } - - OpenSwath::SpectrumPtr OpenSwathScoring::filterByDrift_(const OpenSwath::SpectrumPtr& input, const double drift_lower, const double drift_upper) - { - // NOTE: this function is very inefficient because filtering unsorted array - OPENMS_PRECONDITION(drift_upper > 0, "Cannot filter by drift time if upper value is less or equal to zero"); - //OPENMS_PRECONDITION(input->getDriftTimeArray() != nullptr, "Cannot filter by drift time if no drift time is available."); - - if (input->getDriftTimeArray() == nullptr) - { - std::cerr << "Warning: Cannot filter by drift time if no drift time is available.\n"; - return input; - } - - OpenSwath::SpectrumPtr output(new OpenSwath::Spectrum); - OpenSwath::BinaryDataArrayPtr mz_arr = input->getMZArray(); - OpenSwath::BinaryDataArrayPtr int_arr = input->getIntensityArray(); - OpenSwath::BinaryDataArrayPtr im_arr = input->getDriftTimeArray(); - - std::vector::const_iterator mz_it = mz_arr->data.begin(); - std::vector::const_iterator int_it = int_arr->data.begin(); - std::vector::const_iterator im_it = im_arr->data.begin(); - std::vector::const_iterator mz_end = mz_arr->data.end(); - - OpenSwath::BinaryDataArrayPtr mz_arr_out(new OpenSwath::BinaryDataArray); - OpenSwath::BinaryDataArrayPtr intens_arr_out(new OpenSwath::BinaryDataArray); - OpenSwath::BinaryDataArrayPtr im_arr_out(new OpenSwath::BinaryDataArray); - im_arr_out->description = im_arr->description; - - size_t n = mz_arr->data.size(); - im_arr_out->data.reserve(n); - while (mz_it != mz_end) - { - if (*im_it > drift_lower && *im_it < drift_upper) - { - mz_arr_out->data.push_back( *mz_it ); - intens_arr_out->data.push_back( *int_it ); - im_arr_out->data.push_back( *im_it ); - } - ++mz_it; - ++int_it; - ++im_it; - } - output->setMZArray(mz_arr_out); - output->setIntensityArray(intens_arr_out); - output->getDataArrays().push_back(im_arr_out); - return output; } - std::vector OpenSwathScoring::fetchMultipleSpectra_(const OpenSwath::SpectrumAccessPtr& swath_map, - double RT, int nr_spectra_to_fetch) - { - std::vector indices = swath_map->getSpectraByRT(RT, 0.0); - std::vector all_spectra; - - if (indices.empty() ) - { - return all_spectra; - } - int closest_idx = boost::numeric_cast(indices[0]); - if (indices[0] != 0 && - std::fabs(swath_map->getSpectrumMetaById(boost::numeric_cast(indices[0]) - 1).RT - RT) < - std::fabs(swath_map->getSpectrumMetaById(boost::numeric_cast(indices[0])).RT - RT)) - { - closest_idx--; - } - - all_spectra.push_back(swath_map->getSpectrumById(closest_idx)); - for (int i = 1; i <= nr_spectra_to_fetch / 2; i++) // cast to int is intended! - { - if (closest_idx - i >= 0) - { - all_spectra.push_back(swath_map->getSpectrumById(closest_idx - i)); - } - if (closest_idx + i < (int)swath_map->getNrSpectra()) - { - all_spectra.push_back(swath_map->getSpectrumById(closest_idx + i)); - } - } - - return all_spectra; - } - OpenSwath::SpectrumPtr OpenSwathScoring::getAddedSpectra_(std::vector& all_spectra, const double drift_lower, const double drift_upper) + std::vector> OpenSwathIsotopeGeneratorCacher::get(double product_mz, int charge, const double mannmass) { - OpenSwath::SpectrumPtr added_spec(new OpenSwath::Spectrum); - added_spec->getDataArrays().push_back( OpenSwath::BinaryDataArrayPtr(new OpenSwath::BinaryDataArray) ); - added_spec->getDataArrays().back()->description = "Ion Mobility"; - - - // If no spectra found return - if (all_spectra.empty()) - { - return added_spec; - } - - // For spectrum resampling since ion mobility is not supported filter by IM window - if (drift_lower > 0) - { - - for (auto& s: all_spectra) s = filterByDrift_(s, drift_lower, drift_upper); - } + IsotopeDistribution distribution = get(product_mz * charge); - // If only one spectrum, no adding to be done - if (all_spectra.size() == 1) - { - return all_spectra[0]; - } - - // Otherwise do spectrum addition - if (spectra_addition_method_ == "simple") - { - // Ensure that we have the same number of data arrays as in the input spectrum - if (!all_spectra.empty() && all_spectra[0]->getDataArrays().size() > 2) - { - for (Size k = 2; k < all_spectra[0]->getDataArrays().size(); k++) - { - OpenSwath::BinaryDataArrayPtr tmp (new OpenSwath::BinaryDataArray()); - tmp->description = all_spectra[0]->getDataArrays()[k]->description; - added_spec->getDataArrays().push_back(tmp); - } - } + double currentIsotopeMz = product_mz; // mz value of current isotope - // Simply add up data and sort in the end - for (const auto& s : all_spectra) - { - for (Size k = 0; k < s->getDataArrays().size(); k++) - { - auto& v1 = added_spec->getDataArrays()[k]->data; - auto& v2 = s->getDataArrays()[k]->data; - - v1.reserve( v1.size() + v2.size() ); - v1.insert( v1.end(), v2.begin(), v2.end() ); - } - } - sortSpectrumByMZ(*added_spec); - } - else + std::vector > isotopes_spec; + for (IsotopeDistribution::Iterator it = distribution.begin(); it != distribution.end(); ++it) { - added_spec = SpectrumAddition::addUpSpectra(all_spectra, spacing_for_spectra_resampling_, true); + isotopes_spec.emplace_back(currentIsotopeMz, it->getIntensity()); + currentIsotopeMz += mannmass / charge; } - OPENMS_POSTCONDITION( std::adjacent_find(added_spec->getMZArray()->data.begin(), - added_spec->getMZArray()->data.end(), std::greater()) == added_spec->getMZArray()->data.end(), - "Postcondition violated: m/z vector needs to be sorted!" ) - - return added_spec; + return isotopes_spec; } - } diff --git a/src/openms/source/ANALYSIS/OPENSWATH/sources.cmake b/src/openms/source/ANALYSIS/OPENSWATH/sources.cmake index 5b162268ce0..534f0a7104d 100644 --- a/src/openms/source/ANALYSIS/OPENSWATH/sources.cmake +++ b/src/openms/source/ANALYSIS/OPENSWATH/sources.cmake @@ -39,6 +39,7 @@ set(sources_list TargetedSpectraExtractor.cpp TransitionTSVFile.cpp TransitionPQPFile.cpp + OpenSwathIsotopeGeneratorCacher.cpp ) ### add path to the filenames From 72752e46a9fa50bd13ba4693412f17fd9c736b98 Mon Sep 17 00:00:00 2001 From: jcharkow Date: Wed, 21 Dec 2022 17:38:33 -0500 Subject: [PATCH 03/13] [TEST] create test for OpenSwathIsotopeGenerator Revamped code when creating the test. Might not be the most efficient way right now however it is quite readable --- .../OpenSwathIsotopeGeneratorCacher.h | 8 +- .../OpenSwathIsotopeGeneratorCacher.cpp | 158 ++++++++++++++++-- .../OpenSwathIsotopeGeneratorCacher_test.cpp | 149 +++++++++++++++++ 3 files changed, 297 insertions(+), 18 deletions(-) diff --git a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.h b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.h index 4af73092822..ddd635a386e 100644 --- a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.h +++ b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.h @@ -82,7 +82,7 @@ namespace OpenMS * Sets the parameters for the scoring. * * @param massStart - start mz to generate a theoretical isotope distribtuion - * @param massEnd - end mz to generate a theoretical isotope distribtuion + * @param massEnd - end mz to generate a theoretical isotope distribtuion (non inclusive) * @param massStep - step between precursor mz for isotope distribution * */ @@ -97,6 +97,12 @@ namespace OpenMS bool getRoundMasses(); + /** @brief fetches a copy of the cachedIsotopeDistribution map, useful for testing + * + */ + std::map fetchCache(); + + /** @brief compute and cache and isotope distribution with the corresponding mass * @param mass - mass to create the isotope distribution from diff --git a/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.cpp b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.cpp index 4412d4904c5..40ca45c3cd7 100644 --- a/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.cpp +++ b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.cpp @@ -49,8 +49,6 @@ namespace OpenMS massStep_ = massStep; for (double mass=massStart; mass < massEnd; mass += massStep_) { - std::cout << "Current mz: " << mass << std::endl; - // create the theoretical distribution //Note: this is a rough estimate of the weight, usually the protons should be deducted first, left for backwards compatibility. IsotopeDistribution dist = solver_.estimateFromPeptideWeight(mass); @@ -79,6 +77,11 @@ namespace OpenMS return solver_.getRoundMasses(); } + std::map OpenSwathIsotopeGeneratorCacher::fetchCache() + { + return cachedIsotopeDistributions_; + } + IsotopeDistribution OpenSwathIsotopeGeneratorCacher::addEntry(double mass) { @@ -90,34 +93,155 @@ namespace OpenMS IsotopeDistribution OpenSwathIsotopeGeneratorCacher::get(double mass) { - auto ele = cachedIsotopeDistributions_.upper_bound(mass); - //auto ele = std::upper_bound(cachedIsotopeDistributions_.begin(), cachedIsotopeDistributions_.end(), mass); - - - if (ele == cachedIsotopeDistributions_.end()) + for (const auto &[key, value]:cachedIsotopeDistributions_) { - // element is not found so create an entry - return addEntry(mass); + std::cout << "key is: " << key << std::endl; } - else if (std::abs(ele->first - mass) > halfMassStep_) // element found is more than halfMassStep_ away from target mass so generate a new entry + + if (mass <= (cachedIsotopeDistributions_.begin()->first + halfMassStep_) ) // all cached greater than target (with tolerance) { - return addEntry(mass); + std::cout << "All cached elements strictly greater than target (with tolerance)" << std::endl; + if ( (cachedIsotopeDistributions_.begin()->first - mass) <= halfMassStep_) + { + std::cout << "first element matches"; + return cachedIsotopeDistributions_.begin()->second; + } + else + { + std::cout << "first element does not match"; + return addEntry(mass); + } } - else // there is a current cached distribution that can be used! + else // mass is somewhere in the middle or all elements are less than mass { - auto ptrPrevious = ele--; + auto upperBound = cachedIsotopeDistributions_.upper_bound(mass); + auto prevEle = upperBound; + prevEle--; + std::cout << "ptr previous: " << prevEle->first << std::endl; + std::cout << "upper bound (first element not less than " << mass << ": " << upperBound->first << std::endl; + + if (upperBound == cachedIsotopeDistributions_.end()) + { + std::cout << "upper bound is at the end, meaning all elements are less than or equal to mass" << std::endl; + if (mass - prevEle->first <= halfMassStep_) + { + std::cout << "last element is in range" << std::endl; + return prevEle->second; + } + else + { + std::cout << "last element is not in range" << std::endl; + return addEntry(mass); + } + } + else if ((mass - prevEle->first) > halfMassStep_) + { + std::cout << "mass" << mass << std::endl; + std::cout << "Previous element is too far away (half mass step: " << (mass - prevEle->first) << ")" << std::endl; + if ((upperBound->first - mass) <= halfMassStep_) + { + + std::cout << "upper bound in range, match!" << std::endl; + return upperBound->second; + } + else + { + std::cout << "upper bound not in range, new entry" << std::endl; + return addEntry(mass); + } + } + else //prevEle in range + { + std::cout << "previous element is in range" << std::endl; + if ((upperBound->first - mass) > halfMassStep_) + { + std::cout << "upper bound not in range match previous element" << std::endl; + } + else // both elements in range + { + std::cout << "both elements in range see which is closer" << std::endl; + if ((upperBound->first - mass) <= (mass - prevEle->first)) + { + std::cout << "upper bound is closer" << std::endl; + return upperBound->second; + } + else + { + std::cout << "previous element is closer" << std::endl; + return prevEle->second; + } + } + } + } + } - if (std::abs(ptrPrevious->first - mass) < std::abs(ele->first - mass)) // check if the previous entry is closer than the first entry + + + +/* + if ((ele == cachedIsotopeDistributions_.end()) & (previousEle == cachedIsotopeDistributions_.end()) | (ele == cachedIsotopeDistributions_.begin()) // all cached elements are greater than target mass + { + std::cout << "All elements greater than target mass, creating entry" << std::endl; + return addEnrtry(mass); + } + else if ((ele == cachedIsotopeDistributions_.end()) & (previousEle != cachedIsotopeDistributions_.end())) // last element might match + { + std::cout << "Last element might match" << std::endl; + if (std::abs(previousEle->first - mass) < halfStep_) { - return ptrPrevious->second; + std::cout << "Last element matches" << std::endl; + return previousEle->second; } else { - return ele->second; + return addEntry(mass); } } + //else if ((ele == cachedIsotopeDistributions_.begin()) & (previousEle != cachedIsotopeDistributions_.end())) // last element might match + // + // at the end of the array, possible that we still have a match though if this is just slightly over the last element + std::cout << "At the end of the array..." << std::endl; + if (std::abs(previousEle->first - mass) < std::abs(ele->first - mass)) // check if the previous entry is within range + { + double rslt = std::abs(previousEle->first - mass) < std::abs(ele->first - mass); + std::cout << "rslt is: " << rslt << std::endl; + std::cout << "Taking previous element" << std::endl; + return ele->second; + } + else // previous entry not within range + { + double rslt = std::abs(previousEle->first - mass) < std::abs(ele->first - mass); + std::cout << "rslt is: " << rslt << std::endl; + std::cout << "No match creating entry" << std::endl; + return addEntry(mass); + } + + // determine if the current element or the element before it is closer + if (std::abs(previousEle->first - mass) < std::abs(ele->first - mass)) // check if the previous entry is closer than the first entry + { + double rslt = std::abs(previousEle->first - mass) < std::abs(ele->first - mass); + std::cout << "rslt is: " << rslt << std::endl; + std::cout << "Taking previous element" << std::endl; + ele = previousEle; + } + + if (std::abs(ele->first - mass) > halfMassStep_) // element found is more than halfMassStep_ away from target mass so generate a new entry + { + double rslt = std::abs(ele->first - mass) > halfMassStep_; + + std::cout << "Closest element is not in range, creating entry" << " closest element: " << ele->first << " delta: " << rslt << std::endl; + return addEntry(mass); + } + else // there is a current cached distribution that can be used! + { + std::cout << "Using the cache" << std::endl; + + return ele->second; + } + } + */ + - } std::vector> OpenSwathIsotopeGeneratorCacher::get(double product_mz, int charge, const double mannmass) diff --git a/src/tests/class_tests/openms/source/OpenSwathIsotopeGeneratorCacher_test.cpp b/src/tests/class_tests/openms/source/OpenSwathIsotopeGeneratorCacher_test.cpp index 3bb1577c5fa..acfce1351c9 100644 --- a/src/tests/class_tests/openms/source/OpenSwathIsotopeGeneratorCacher_test.cpp +++ b/src/tests/class_tests/openms/source/OpenSwathIsotopeGeneratorCacher_test.cpp @@ -35,6 +35,13 @@ #include #include + +// data access +#include + +// For creating isotopic distributions +#include + using namespace std; using namespace OpenMS; @@ -58,6 +65,148 @@ START_SECTION(OpenSwathIsotopeGeneratorCacher(Size max_isotope, double massStep) } END_SECTION +START_SECTION(void OpenSwathIsotopeGeneratorCacher::initialize(double massStart, double massEnd, double massStep)) +{ + int maxIsotope = 2; + double massStep = 1; + OpenSwathIsotopeGeneratorCacher isotopeCacher(maxIsotope, massStep); + isotopeCacher.initialize(100, 102, massStep); // since non inclusive massEnd should generate cache for 100 and 101 + + std::map cache = isotopeCacher.fetchCache(); + + CoarseIsotopePatternGenerator isotopeGenerator(maxIsotope, massStep); + + // create the test cache + std::map test; + test[100] = isotopeGenerator.estimateFromPeptideWeight(100); + test[101] = isotopeGenerator.estimateFromPeptideWeight(101); + + + TEST_EQUAL(cache.size(), test.size()); // both should be 2 + TEST_EQUAL(cache.size(), 2); + + // test isotopes distributions are equal + for ( const auto &[key, testDist]: test ) + { + TEST_EQUAL(cache[key].getContainer().size(), test[key].getContainer().size()) + for (Size i = 0; i != testDist.size(); ++i) + { + TEST_EQUAL(round(cache[key].getContainer()[i].getMZ()), test[key].getContainer()[i].getMZ()) + TEST_REAL_SIMILAR(cache[key].getContainer()[i].getIntensity(), test[key].getContainer()[i].getIntensity()) + } + } +} +END_SECTION + +START_SECTION(IsotopeDistribution OpenSwathIsotopeGeneratorCacher::get(double mass)) +{ + // Setup + int maxIsotope = 2; + double massStep = 5; + IsotopeDistribution cachedDistribution; + IsotopeDistribution testDistribution; + OpenSwathIsotopeGeneratorCacher isotopeCacher(maxIsotope, massStep); + isotopeCacher.initialize(100, 120, massStep); // since non inclusive massEnd should generate cache for 100 and 101 + CoarseIsotopePatternGenerator isotopeGenerator(maxIsotope, massStep); + + + // ############ Case #1 get(100) Should fetch distribution of mass 100 because it is in the spectrum ############### + std::cout << "Case 1" << std::endl; + cachedDistribution = isotopeCacher.get(100); + testDistribution = isotopeGenerator.estimateFromPeptideWeight(100); + + TEST_EQUAL(testDistribution.size(), cachedDistribution.size()) + for (Size i = 0; i != testDistribution.size(); ++i) + { + TEST_EQUAL(round(cachedDistribution.getContainer()[i].getMZ()), testDistribution.getContainer()[i].getMZ()) + TEST_REAL_SIMILAR(cachedDistribution.getContainer()[i].getIntensity(), testDistribution.getContainer()[i].getIntensity()) + } + + // ########## Case #2 get(107.5) Should fetch distribution of mass 110 because it is the closest to the requested mass and it is less than halfStep away ################# + std::cout << "Case 2" << " get(" << 107.5 << ")" << std::endl; + cachedDistribution = isotopeCacher.get(107.5); + testDistribution = isotopeGenerator.estimateFromPeptideWeight(110); + + TEST_EQUAL(testDistribution.size(), cachedDistribution.size()) + for (Size i = 0; i != testDistribution.size(); ++i) + { + TEST_EQUAL(round(cachedDistribution.getContainer()[i].getMZ()), testDistribution.getContainer()[i].getMZ()) + TEST_REAL_SIMILAR(cachedDistribution.getContainer()[i].getIntensity(), testDistribution.getContainer()[i].getIntensity()) + } + + + // ########## Case #3 get(103) Should fetch distribution of mass 105 because it is the closest to the requested mass (always round up if halfway of step) and it is less than halfStep away ################# + + std::cout << "Case 3" << std::endl; + cachedDistribution = isotopeCacher.get(103); + testDistribution = isotopeGenerator.estimateFromPeptideWeight(105); + + TEST_EQUAL(testDistribution.size(), cachedDistribution.size()) + for (Size i = 0; i != testDistribution.size(); ++i) + { + TEST_EQUAL(round(cachedDistribution.getContainer()[i].getMZ()), testDistribution.getContainer()[i].getMZ()) + TEST_REAL_SIMILAR(cachedDistribution.getContainer()[i].getIntensity(), testDistribution.getContainer()[i].getIntensity()) + } + + // ########## Case #4 get(200.4) Should fetch distribution of mass 200.4 because there is no cached distribution close to requested mass + std::cout << "Case 4" << std::endl; + cachedDistribution = isotopeCacher.get(200.4); + testDistribution = isotopeGenerator.estimateFromPeptideWeight(200.4); // don't need to recreate this + + TEST_EQUAL(testDistribution.size(), cachedDistribution.size()) + for (Size i = 0; i != testDistribution.size(); ++i) + { + TEST_EQUAL(round(cachedDistribution.getContainer()[i].getMZ()), testDistribution.getContainer()[i].getMZ()) + TEST_REAL_SIMILAR(cachedDistribution.getContainer()[i].getIntensity(), testDistribution.getContainer()[i].getIntensity()) + } + + // ########## Case #5 get(200.7) Should fetch distribution of mass 200.4 because this is cached (from example above) and within range + std::cout << "Case 5" << std::endl; + cachedDistribution = isotopeCacher.get(201); + // testDistribution = isotopeGenerator.estimateFromPeptideWeight(200.4); // don't need to recreate this + + TEST_EQUAL(testDistribution.size(), cachedDistribution.size()) + for (Size i = 0; i != testDistribution.size(); ++i) + { + TEST_EQUAL(round(cachedDistribution.getContainer()[i].getMZ()), testDistribution.getContainer()[i].getMZ()) + TEST_REAL_SIMILAR(cachedDistribution.getContainer()[i].getIntensity(), testDistribution.getContainer()[i].getIntensity()) + } + + // ########## Case #6 get(90.5) Should fetch distribution of mass 90.5 because no value is close to this + std::cout << "Case 6" << std::endl; + cachedDistribution = isotopeCacher.get(90.5); + testDistribution = isotopeGenerator.estimateFromPeptideWeight(90.5); // don't need to recreate this + + TEST_EQUAL(testDistribution.size(), cachedDistribution.size()) + for (Size i = 0; i != testDistribution.size(); ++i) + { + TEST_EQUAL(round(cachedDistribution.getContainer()[i].getMZ()), testDistribution.getContainer()[i].getMZ()) + TEST_REAL_SIMILAR(cachedDistribution.getContainer()[i].getIntensity(), testDistribution.getContainer()[i].getIntensity()) + } + + // ########## Case #7 get(89) Should fetch distribution of mass 90.5 because this is close + std::cout << "Case 6" << std::endl; + cachedDistribution = isotopeCacher.get(89); + testDistribution = isotopeGenerator.estimateFromPeptideWeight(90.5); // don't need to recreate this + + TEST_EQUAL(testDistribution.size(), cachedDistribution.size()) + for (Size i = 0; i != testDistribution.size(); ++i) + { + TEST_EQUAL(round(cachedDistribution.getContainer()[i].getMZ()), testDistribution.getContainer()[i].getMZ()) + TEST_REAL_SIMILAR(cachedDistribution.getContainer()[i].getIntensity(), testDistribution.getContainer()[i].getIntensity()) + } + + + + + +} + +END_SECTION + + + + ///////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////// END_TEST From 5d0f3a33f606adf9d63a4335fb005230165ed947 Mon Sep 17 00:00:00 2001 From: jcharkow Date: Thu, 22 Dec 2022 14:47:31 -0500 Subject: [PATCH 04/13] [API] Integrate OpenSwathIsotopeGeneratorCacher with OpenSwathWorkflow --- .../OpenMS/ANALYSIS/OPENSWATH/DIAHelper.h | 3 +- .../OpenMS/ANALYSIS/OPENSWATH/DIAPrescoring.h | 7 +- .../OpenMS/ANALYSIS/OPENSWATH/DIAScoring.h | 15 ++- .../OPENSWATH/MRMFeatureFinderScoring.h | 18 ++- .../OpenSwathIsotopeGeneratorCacher.h | 23 +++- .../ANALYSIS/OPENSWATH/OpenSwathScoring.h | 14 ++- .../ANALYSIS/OPENSWATH/OpenSwathWorkflow.h | 11 +- .../source/ANALYSIS/OPENSWATH/DIAHelper.cpp | 8 +- .../ANALYSIS/OPENSWATH/DIAPrescoring.cpp | 13 ++- .../source/ANALYSIS/OPENSWATH/DIAScoring.cpp | 29 ++--- .../OPENSWATH/MRMFeatureFinderScoring.cpp | 56 +++++---- .../OpenSwathIsotopeGeneratorCacher.cpp | 110 ++++++++++++++++++ .../ANALYSIS/OPENSWATH/OpenSwathScoring.cpp | 22 ++-- .../ANALYSIS/OPENSWATH/OpenSwathWorkflow.cpp | 14 ++- .../FeatureFinderAlgorithmMetaboIdent.cpp | 52 +++++---- .../FeatureFinderIdentificationAlgorithm.cpp | 68 +++++------ 16 files changed, 323 insertions(+), 140 deletions(-) diff --git a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAHelper.h b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAHelper.h index 36fde6ce5ad..6b4991e4acf 100644 --- a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAHelper.h +++ b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAHelper.h @@ -34,6 +34,7 @@ #pragma once +#include #include #include @@ -168,7 +169,7 @@ namespace OpenMS /// Old + new peaks are pushed to @p isotopeMasses OPENMS_DLLAPI void addSinglePeakIsotopes2Spec(double mz, double ity, std::vector >& isotope_masses, //[out] - Size nr_isotopes, int charge); + Size nr_isotopes, int charge, const OpenSwathIsotopeGeneratorCacher isotopeCacher); /// sorts vector of pairs by first OPENMS_DLLAPI void sortByFirst(std::vector >& tmp); diff --git a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAPrescoring.h b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAPrescoring.h index c7ee750ecc4..3918c02dbb4 100644 --- a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAPrescoring.h +++ b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAPrescoring.h @@ -39,6 +39,8 @@ #include #include +#include + namespace OpenMS { @@ -84,7 +86,8 @@ namespace OpenMS double& dotprod, double& manhattan, double drift_start, - double drift_end) const; + double drift_end, + const OpenSwathIsotopeGeneratorCacher& isotopeCacher) const; /** @brief Compute manhattan and dotprod score for all spectra which can be accessed by @@ -92,7 +95,7 @@ namespace OpenMS */ void operator()(const OpenSwath::SpectrumAccessPtr& swath_ptr, OpenSwath::LightTargetedExperiment& transition_exp_used, - OpenSwath::IDataFrameWriter* ivw, double drift_start, double drift_end) const; + OpenSwath::IDataFrameWriter* ivw, double drift_start, double drift_end, const OpenSwathIsotopeGeneratorCacher& isotopeCacher) const; }; diff --git a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAScoring.h b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAScoring.h index b5ed809009d..50d93e351fc 100644 --- a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAScoring.h +++ b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAScoring.h @@ -42,6 +42,9 @@ #include #include + +#include + namespace OpenMS { class TheoreticalSpectrumGenerator; @@ -113,7 +116,8 @@ namespace OpenMS double& isotope_corr, double& isotope_overlap, double drift_lower, - double drift_upper) const; + double drift_upper, + const OpenSwathIsotopeGeneratorCacher isotopeCacher) const; /// Massdiff scores, see class description void dia_massdiff_score(const std::vector& transitions, @@ -138,7 +142,7 @@ namespace OpenMS /// Precursor isotope scores for precursors (peptides and metabolites) void dia_ms1_isotope_scores_averagine(double precursor_mz, const std::vector& spectrum, - double& isotope_corr, double& isotope_overlap, int charge_state, double drift_start, double drift_end) const; + double& isotope_corr, double& isotope_overlap, int charge_state, double drift_start, double drift_end, const OpenSwathIsotopeGeneratorCacher isotopeCacher) const; void dia_ms1_isotope_scores(double precursor_mz, const std::vector& spectrum, double& isotope_corr, double& isotope_overlap, const EmpiricalFormula& sum_formula, double drift_start, double drift_end) const; @@ -153,7 +157,8 @@ namespace OpenMS double& dotprod, double& manhattan, double drift_start, - double drift_end) const; + double drift_end, + const OpenSwathIsotopeGeneratorCacher isotopeCacher) const; //@} private: @@ -172,7 +177,7 @@ namespace OpenMS const std::vector& spectrum, std::map& intensities, double& isotope_corr, - double& isotope_overlap, double drift_start, double drift_end) const; + double& isotope_overlap, double drift_start, double drift_end, const OpenSwathIsotopeGeneratorCacher& isotopeCacher) const; /// retrieves intensities from MRMFeature /// computes a vector of relative intensities for each feature (output to intensities) @@ -210,7 +215,7 @@ namespace OpenMS */ double scoreIsotopePattern_(const std::vector& isotopes_int, double product_mz, - int putative_fragment_charge) const; + int putative_fragment_charge, const OpenSwathIsotopeGeneratorCacher& isotopeCacher) const; /** @brief Compare an experimental isotope pattern to a theoretical one diff --git a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/MRMFeatureFinderScoring.h b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/MRMFeatureFinderScoring.h index a485c8fc2d2..e2ff3c6aef3 100644 --- a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/MRMFeatureFinderScoring.h +++ b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/MRMFeatureFinderScoring.h @@ -37,11 +37,11 @@ #define USE_SP_INTERFACE // Actual scoring -#include - +#include #include +#include +#include #include -#include // Kernel classes #include @@ -132,7 +132,8 @@ namespace OpenMS FeatureMap& output, const TargetedExperiment& transition_exp, const TransformationDescription& trafo, - const PeakMap& swath_map); + const PeakMap& swath_map, + const OpenSwathIsotopeGeneratorCacher& isotopeCacher); /** @brief Pick and score features in a single experiment from chromatograms * @@ -153,7 +154,8 @@ namespace OpenMS const OpenSwath::LightTargetedExperiment& transition_exp, const TransformationDescription& trafo, const std::vector& swath_maps, - TransitionGroupMapType& transition_group_map); + TransitionGroupMapType& transition_group_map, + const OpenSwathIsotopeGeneratorCacher& isotopeCacher); /** @brief Prepares the internal mappings of peptides and proteins. * @@ -186,6 +188,7 @@ namespace OpenMS const TransformationDescription & trafo, const std::vector& swath_maps, FeatureMap& output, + const OpenSwathIsotopeGeneratorCacher& isotopeCacher, bool ms1only = false) const; /** @brief Set the flag for strict mapping @@ -268,7 +271,9 @@ namespace OpenMS * @param swath_maps Optional SWATH-MS (DIA) map corresponding from which * the chromatograms were extracted. Use empty map if no * data is available. + * @param OpenSwathIsotopeGeneratorCacher cache containing precomputed isotope distributions * @return a struct of type OpenSwath_Ind_Scores containing either target or decoy values + * */ OpenSwath_Ind_Scores scoreIdentification_(MRMTransitionGroupType& transition_group_identification, OpenSwathScoring& scorer, @@ -276,7 +281,8 @@ namespace OpenMS const std::vector & native_ids_detection, const double det_intensity_ratio_score, const double det_mi_ratio_score, - const std::vector& swath_maps) const; + const std::vector& swath_maps, + const OpenSwathIsotopeGeneratorCacher& isotopeCacher) const; void prepareFeatureOutput_(OpenMS::MRMFeature& mrmfeature, bool ms1only, int charge) const; diff --git a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.h b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.h index ddd635a386e..360369ab169 100644 --- a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.h +++ b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.h @@ -110,7 +110,7 @@ namespace OpenMS IsotopeDistribution addEntry(double mass); /** @breif gets the cached isotope distribution for the provided mass - * if no isotope distribution exist within the halfMassStep_ than create and cache a new distribution + * if no isotope distribution exist within the halfMassStep_ then create and cache a new distribution * * @param mass - mass to fetch the isotope distribution */ @@ -122,7 +122,26 @@ namespace OpenMS * mannmass - ??? */ //std::vector> get(double mz, int charge, const double mannmass = 1.00048) const; + + /** @brief computes a missed cache but does not add corresponding mass to cache + */ + IsotopeDistribution computeMiss(double mass) const; + + /** @brief gets form the cached isotope distribution for the provided mass + * + * if no isotope distribution exist within the halfMassStep_ then compute on the spot without caching + */ + IsotopeDistribution getImmutable(double mass) const; + + /** @brief gets from cached isotope distribution for provided m/z and charge + * if no isotope distribution exists within the halfMassStep_ then create and cache a new distribution + */ std::vector> get(double mz, int charge, const double mannmass = 1.00048); + + + /** @brief gets from cached isotope distribution for provided m/z and charge + * if no isotope distribution exists within the halfMassStep_ then create (but do not cache) a new distribution + */ + std::vector> getImmutable(double mz, int charge, const double mannmass = 1.00048) const; }; } - diff --git a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathScoring.h b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathScoring.h index b05e88fd76d..ac0014255e6 100644 --- a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathScoring.h +++ b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathScoring.h @@ -42,8 +42,9 @@ #include // scoring -#include #include +#include +#include #include #include @@ -190,7 +191,8 @@ namespace OpenMS std::vector& mzerror_ppm, const double drift_lower, const double drift_upper, - const double drift_target); + const double drift_target, + const OpenSwathIsotopeGeneratorCacher& isotopeCacher); /** @brief Score a single chromatographic feature using the precursor map. * @@ -203,6 +205,7 @@ namespace OpenMS * @param scores The object to store the result * @param drift_lower Drift time lower extraction boundary * @param drift_upper Drift time upper extraction boundary + * @param isotopCacher cache of previously computed isotope distributions * */ void calculatePrecursorDIAScores(const OpenSwath::SpectrumAccessPtr& ms1_map, @@ -212,7 +215,8 @@ namespace OpenMS const CompoundType& compound, OpenSwath_Scores& scores, double drift_lower, - double drift_upper); + double drift_upper, + const OpenSwathIsotopeGeneratorCacher& isotopeCacher); /** @brief Score a single chromatographic feature using DIA / SWATH scores. * @@ -225,6 +229,7 @@ namespace OpenMS * @param scores The object to store the result * @param drift_lower Drift time lower extraction boundary * @param drift_upper Drift time upper extraction boundary + * @param isotopeCacher cached of previous computed isotope distributions * */ void calculateDIAIdScores(OpenSwath::IMRMFeature* imrmfeature, @@ -233,7 +238,8 @@ namespace OpenMS const OpenMS::DIAScoring & diascoring, OpenSwath_Scores & scores, double drift_lower, - double drift_upper); + double drift_upper, + const OpenSwathIsotopeGeneratorCacher& isotopeCacher); /** @brief Computing the normalized library intensities from the transition objects * diff --git a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathWorkflow.h b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathWorkflow.h index 7d288b67b64..e2de7d038c0 100644 --- a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathWorkflow.h +++ b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathWorkflow.h @@ -55,6 +55,7 @@ #include #include #include +#include // Algorithms #include @@ -242,7 +243,7 @@ namespace OpenMS * calibrants) perform RT and m/z correction in SWATH-MS data. Currently * supports (non-)linear correction of RT against library RT as well * as (non-)linear correction of m/z error as a function of m/z. - * + * * @note The relevant algorithms are implemented in MRMRTNormalizer for RT * calibration and SwathMapMassCorrection for m/z calibration. * @@ -383,10 +384,10 @@ namespace OpenMS /** * @brief Execute all steps in an \ref UTILS_OpenSwathWorkflow "OpenSwath" analysis * - * The workflow will perform a complete OpenSWATH analysis. Optionally, - * a calibration of m/z and retention time (mapping peptides to normalized - * space and correcting m/z error) can be performed beforehand using the - * OpenSwathCalibrationWorkflow class. + * The workflow will perform a complete OpenSWATH analysis. Optionally, + * a calibration of m/z and retention time (mapping peptides to normalized + * space and correcting m/z error) can be performed beforehand using the + * OpenSwathCalibrationWorkflow class. * * For diaPASEF workflows where ion mobility windows are overlapping, precursors may be found in multiple SWATHs. * In this case, precursors are only extracted from the SWATH in which they are most centered across ion mobility diff --git a/src/openms/source/ANALYSIS/OPENSWATH/DIAHelper.cpp b/src/openms/source/ANALYSIS/OPENSWATH/DIAHelper.cpp index 2eb89bfb9a0..a1eaa20e8c7 100644 --- a/src/openms/source/ANALYSIS/OPENSWATH/DIAHelper.cpp +++ b/src/openms/source/ANALYSIS/OPENSWATH/DIAHelper.cpp @@ -42,6 +42,9 @@ #include + +#include + namespace OpenMS::DIAHelpers { @@ -444,10 +447,9 @@ namespace OpenMS::DIAHelpers /// given a peak of experimental mz and intensity, add isotope pattern to a "spectrum". void addSinglePeakIsotopes2Spec(double mz, double ity, std::vector >& isotope_masses, //[out] - Size nr_isotopes, int charge) + Size nr_isotopes, int charge, OpenSwathIsotopeGeneratorCacher isotopeCacher) { - std::vector > isotopes; - getAveragineIsotopeDistribution(mz, isotopes, charge, nr_isotopes); + std::vector > isotopes = isotopeCacher.getImmutable(mz, charge); // note nr_isotopes is already set for (Size j = 0; j < isotopes.size(); ++j) { isotopes[j].second *= ity; //multiple isotope intensity by spec intensity diff --git a/src/openms/source/ANALYSIS/OPENSWATH/DIAPrescoring.cpp b/src/openms/source/ANALYSIS/OPENSWATH/DIAPrescoring.cpp index f312a5380da..c1e4d6a6553 100644 --- a/src/openms/source/ANALYSIS/OPENSWATH/DIAPrescoring.cpp +++ b/src/openms/source/ANALYSIS/OPENSWATH/DIAPrescoring.cpp @@ -33,6 +33,7 @@ // -------------------------------------------------------------------------- #include +#include #include #include @@ -46,6 +47,8 @@ #include #include + + namespace OpenMS { @@ -80,7 +83,7 @@ namespace OpenMS void DiaPrescore::operator()(const OpenSwath::SpectrumAccessPtr& swath_ptr, OpenSwath::LightTargetedExperiment& transition_exp_used, - OpenSwath::IDataFrameWriter* ivw, double drift_start, double drift_end) const + OpenSwath::IDataFrameWriter* ivw, double drift_start, double drift_end, const OpenSwathIsotopeGeneratorCacher& isotopeCacher) const { //getParams(); typedef std::map > Mmap; @@ -122,7 +125,7 @@ namespace OpenMS double score1; double score2; //OpenSwath::LightPeptide pep; - score(spec, beg->second, score1, score2, drift_start, drift_end); + score(spec, beg->second, score1, score2, drift_start, drift_end, isotopeCacher); score1v.push_back(score1); score2v.push_back(score2); @@ -140,7 +143,8 @@ namespace OpenMS double& dotprod, double& manhattan, double drift_start, - double drift_end) const + double drift_end, + const OpenSwathIsotopeGeneratorCacher& isotopeCacher) const { std::vector > res; std::vector > spectrumWIso, spectrumWIsoNegPreIso; @@ -155,7 +159,8 @@ namespace OpenMS transition.getLibraryIntensity(), spectrumWIso, nr_isotopes_, - chg); + chg, + isotopeCacher); } // duplicate since we will add differently weighted preIsotope intensities spectrumWIsoNegPreIso.reserve(spectrumWIso.size()); diff --git a/src/openms/source/ANALYSIS/OPENSWATH/DIAScoring.cpp b/src/openms/source/ANALYSIS/OPENSWATH/DIAScoring.cpp index 5c636cf9ea9..c1168441464 100644 --- a/src/openms/source/ANALYSIS/OPENSWATH/DIAScoring.cpp +++ b/src/openms/source/ANALYSIS/OPENSWATH/DIAScoring.cpp @@ -127,14 +127,14 @@ namespace OpenMS // DIA / SWATH scoring void DIAScoring::dia_isotope_scores(const std::vector& transitions, std::vector& spectrum, - OpenSwath::IMRMFeature* mrmfeature, double& isotope_corr, double& isotope_overlap, double drift_start, double drift_end) const + OpenSwath::IMRMFeature* mrmfeature, double& isotope_corr, double& isotope_overlap, double drift_start, double drift_end, const OpenSwathIsotopeGeneratorCacher isotopeCacher) const { isotope_corr = 0; isotope_overlap = 0; // first compute a map of relative intensities from the feature, then compute the score std::map intensities; getFirstIsotopeRelativeIntensities_(transitions, mrmfeature, intensities); - diaIsotopeScoresSub_(transitions, spectrum, intensities, isotope_corr, isotope_overlap, drift_start, drift_end); + diaIsotopeScoresSub_(transitions, spectrum, intensities, isotope_corr, isotope_overlap, drift_start, drift_end, isotopeCacher); } void DIAScoring::dia_massdiff_score(const std::vector& transitions, @@ -213,6 +213,7 @@ namespace OpenMS // although precursor_mz can be received from the empirical formula (if non-empty), the actual precursor could be // slightly different. And also for compounds, usually the neutral sum_formula without adducts is given. // Therefore calculate the isotopes based on the formula but place them at precursor_mz + // For emperical formula estimation we do not use isotope cacher std::vector isotopes_int; getIsotopeIntysFromExpSpec_(precursor_mz, spectrum, isotopes_int, sum_formula.getCharge(), drift_start, drift_end); @@ -245,13 +246,13 @@ namespace OpenMS void DIAScoring::dia_ms1_isotope_scores_averagine(double precursor_mz, const std::vector& spectrum, double& isotope_corr, double& isotope_overlap, - int charge_state, double drift_start, double drift_end) const + int charge_state, double drift_start, double drift_end, const OpenSwathIsotopeGeneratorCacher isotopeCacher) const { std::vector exp_isotopes_int; getIsotopeIntysFromExpSpec_(precursor_mz, spectrum, exp_isotopes_int, charge_state, drift_start, drift_end); - CoarseIsotopePatternGenerator solver(dia_nr_isotopes_ + 1); // NOTE: this is a rough estimate of the neutral mz value since we would not know the charge carrier for negative ions - IsotopeDistribution isotope_dist = solver.estimateFromPeptideWeight(std::fabs(precursor_mz * charge_state)); + + IsotopeDistribution isotope_dist = isotopeCacher.getImmutable(std::fabs(precursor_mz * charge_state)); //TODO this was originally +1 dia_nr_isotopes double max_ratio; int nr_occurrences; @@ -302,10 +303,10 @@ namespace OpenMS } void DIAScoring::score_with_isotopes(std::vector& spectrum, const std::vector& transitions, - double& dotprod, double& manhattan, double drift_start, double drift_end) const + double& dotprod, double& manhattan, double drift_start, double drift_end, const OpenSwathIsotopeGeneratorCacher isotopeCacher) const { - OpenMS::DiaPrescore dp(dia_extract_window_, dia_nr_isotopes_, dia_nr_charges_); - dp.score(spectrum, transitions, dotprod, manhattan, drift_start, drift_end); + OpenMS::DiaPrescore dp(dia_extract_window_, dia_nr_isotopes_, dia_nr_charges_); // note with isotope cacher cannot set dia_nr_isotopes basically just a filler here + dp.score(spectrum, transitions, dotprod, manhattan, drift_start, drift_end, isotopeCacher); } /////////////////////////////////////////////////////////////////////////// @@ -329,7 +330,8 @@ namespace OpenMS double& isotope_corr, double& isotope_overlap, double drift_start, - double drift_end) const + double drift_end, + const OpenSwathIsotopeGeneratorCacher& isotopeCacher) const { std::vector isotopes_int; double max_ratio; @@ -362,7 +364,7 @@ namespace OpenMS // calculate the scores: // isotope correlation (forward) and the isotope overlap (backward) scores - double score = scoreIsotopePattern_(isotopes_int, transitions[k].getProductMZ(), putative_fragment_charge); + double score = scoreIsotopePattern_(isotopes_int, transitions[k].getProductMZ(), putative_fragment_charge, isotopeCacher); isotope_corr += score * rel_intensity; largePeaksBeforeFirstIsotope_(spectrum, transitions[k].getProductMZ(), isotopes_int[0], nr_occurences, max_ratio, drift_start, drift_end); isotope_overlap += nr_occurences * rel_intensity; @@ -420,16 +422,15 @@ namespace OpenMS double DIAScoring::scoreIsotopePattern_(const std::vector& isotopes_int, double product_mz, - int putative_fragment_charge) const + int putative_fragment_charge, + const OpenSwathIsotopeGeneratorCacher& isotopeCacher) const { OPENMS_PRECONDITION(putative_fragment_charge != 0, "Charge needs to be set to != 0"); // charge can be positive and negative IsotopeDistribution isotope_dist; - // create the theoretical distribution from the peptide weight - CoarseIsotopePatternGenerator solver(dia_nr_isotopes_ + 1); // NOTE: this is a rough estimate of the neutral mz value since we would not know the charge carrier for negative ions - isotope_dist = solver.estimateFromPeptideWeight(std::fabs(product_mz * putative_fragment_charge)); + isotope_dist = isotopeCacher.getImmutable(std::fabs(product_mz * putative_fragment_charge)); return scoreIsotopePattern_(isotopes_int, isotope_dist); } //end of dia_isotope_corr_sub diff --git a/src/openms/source/ANALYSIS/OPENSWATH/MRMFeatureFinderScoring.cpp b/src/openms/source/ANALYSIS/OPENSWATH/MRMFeatureFinderScoring.cpp index 78b75680860..d994e8aa8f5 100644 --- a/src/openms/source/ANALYSIS/OPENSWATH/MRMFeatureFinderScoring.cpp +++ b/src/openms/source/ANALYSIS/OPENSWATH/MRMFeatureFinderScoring.cpp @@ -45,6 +45,7 @@ // Helpers #include +#include #include #include @@ -171,7 +172,8 @@ namespace OpenMS FeatureMap& output, const TargetedExperiment& transition_exp_, const TransformationDescription& trafo, - const PeakMap& swath_map) + const PeakMap& swath_map, + const OpenSwathIsotopeGeneratorCacher& isotopeCacher) { OpenSwath::LightTargetedExperiment transition_exp; OpenSwathDataAccessHelper::convertTargetedExp(transition_exp_, transition_exp); @@ -188,15 +190,16 @@ namespace OpenMS std::vector swath_ptrs; swath_ptrs.push_back(m); - pickExperiment(chromatogram_ptr, output, transition_exp, trafo, swath_ptrs, transition_group_map); + pickExperiment(chromatogram_ptr, output, transition_exp, trafo, swath_ptrs, transition_group_map, isotopeCacher); } void MRMFeatureFinderScoring::pickExperiment(const OpenSwath::SpectrumAccessPtr& input, FeatureMap& output, const OpenSwath::LightTargetedExperiment& transition_exp, - const TransformationDescription& trafo, + const TransformationDescription& trafo, const std::vector& swath_maps, - TransitionGroupMapType& transition_group_map) + TransitionGroupMapType& transition_group_map, + const OpenSwathIsotopeGeneratorCacher& isotopeCacher) { // // Step 1 @@ -257,7 +260,7 @@ namespace OpenMS } trgroup_picker.pickTransitionGroup(transition_group); - scorePeakgroups(trgroup_it->second, trafo, swath_maps, output); + scorePeakgroups(trgroup_it->second, trafo, swath_maps, output, isotopeCacher); } endProgress(); @@ -327,11 +330,12 @@ namespace OpenMS const std::vector& native_ids_detection, const double det_intensity_ratio_score, const double det_mi_ratio_score, - const std::vector& swath_maps) const + const std::vector& swath_maps, + const OpenSwathIsotopeGeneratorCacher& isotopeCacher) const { MRMFeature idmrmfeature = trgr_ident.getFeaturesMuteable()[feature_idx]; OpenSwath::IMRMFeature* idimrmfeature; - idimrmfeature = new MRMFeatureOpenMS(idmrmfeature); + idimrmfeature = new MRMFeatureOpenMS(idmrmfeature); // get drift time upper/lower offset (this assumes that all chromatograms // are derived from the same precursor with the same drift time) @@ -357,7 +361,7 @@ namespace OpenMS OpenSwath::ISignalToNoisePtr snptr(new OpenMS::SignalToNoiseOpenMS< MSChromatogram >( trgr_ident.getChromatogram(trgr_ident.getTransitions()[i].getNativeID()), sn_win_len_, sn_bin_count_, write_log_messages_)); - if ( (snptr->getValueAtRT(idmrmfeature.getRT()) > uis_threshold_sn_) + if ( (snptr->getValueAtRT(idmrmfeature.getRT()) > uis_threshold_sn_) && (idmrmfeature.getFeature(trgr_ident.getTransitions()[i].getNativeID()).getIntensity() > uis_threshold_peak_area_)) { signal_noise_estimators_identification.push_back(snptr); @@ -369,7 +373,7 @@ namespace OpenMS if (!native_ids_identification.empty()) { scorer.calculateChromatographicIdScores(idimrmfeature, - native_ids_identification, + native_ids_identification, native_ids_detection, signal_noise_estimators_identification, idscores); @@ -456,9 +460,10 @@ namespace OpenMS { OpenSwath_Scores tmp_scores; - scorer.calculateDIAIdScores(idimrmfeature, + + scorer.calculateDIAIdScores(idimrmfeature, trgr_ident.getTransition(native_ids_identification[i]), - swath_maps, diascoring_, tmp_scores, drift_lower, drift_upper); + swath_maps, diascoring_, tmp_scores, drift_lower, drift_upper, isotopeCacher); ind_isotope_correlation.push_back(tmp_scores.isotope_correlation); ind_isotope_overlap.push_back(tmp_scores.isotope_overlap); @@ -474,9 +479,10 @@ namespace OpenMS } void MRMFeatureFinderScoring::scorePeakgroups(MRMTransitionGroupType& transition_group, - const TransformationDescription& trafo, + const TransformationDescription& trafo, const std::vector& swath_maps, - FeatureMap& output, + FeatureMap& output, + const OpenSwathIsotopeGeneratorCacher& isotopeCacher, bool ms1only) const { if (PeptideRefMap_.empty()) @@ -570,7 +576,7 @@ namespace OpenMS OPENMS_LOG_DEBUG << "Scoring feature " << (mrmfeature) << " == " << mrmfeature.getMetaValue("PeptideRef") << " [ expected RT " << PeptideRefMap_.at(mrmfeature.getMetaValue("PeptideRef"))->rt << " / " << expected_rt << " ]" << - " with " << transition_group_detection.size() << " transitions and " << + " with " << transition_group_detection.size() << " transitions and " << transition_group_detection.getChromatograms().size() << " chromatograms" << std::endl; int group_size = boost::numeric_cast(transition_group_detection.size()); @@ -578,7 +584,7 @@ namespace OpenMS { delete imrmfeature; // free resources before continuing throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, - "Error: Transition group " + transition_group_detection.getTransitionGroupID() + + "Error: Transition group " + transition_group_detection.getTransitionGroupID() + " has no chromatograms."); } bool swath_present = (!swath_maps.empty() && swath_maps[0].sptr->getNrSpectra() > 0); @@ -600,15 +606,15 @@ namespace OpenMS scores.sn_ratio = mrmscore_.calcSNScore(imrmfeature, ms1_signal_noise_estimators); // everything below S/N 1 can be set to zero (and the log safely applied) if (scores.sn_ratio < 1) - { + { scores.log_sn_score = 0; } else - { + { scores.log_sn_score = std::log(scores.sn_ratio); } - if (su_.use_sn_score_) - { + if (su_.use_sn_score_) + { mrmfeature.addScore("sn_ratio", scores.sn_ratio); mrmfeature.addScore("var_log_sn_score", scores.log_sn_score); // compute subfeature log-SN values @@ -640,10 +646,10 @@ namespace OpenMS mrmfeature.addScore("var_norm_rt_score", scores.norm_rt_score); } - // full spectra scores + // full spectra scores if (ms1_map_ && ms1_map_->getNrSpectra() > 0 && mrmfeature.getMZ() > 0) { - scorer.calculatePrecursorDIAScores(ms1_map_, diascoring_, precursor_mz, imrmfeature->getRT(), *pep, scores, drift_lower, drift_upper); + scorer.calculatePrecursorDIAScores(ms1_map_, diascoring_, precursor_mz, imrmfeature->getRT(), *pep, scores, drift_lower, drift_upper, isotopeCacher); } if (su_.use_ms1_fullscan) { @@ -702,7 +708,7 @@ namespace OpenMS scorer.calculateDIAScores(imrmfeature, transition_group_detection.getTransitions(), swath_maps, ms1_map_, diascoring_, *pep, scores, masserror_ppm, - drift_lower, drift_upper, drift_target); + drift_lower, drift_upper, drift_target, isotopeCacher); mrmfeature.setMetaValue("masserror_ppm", masserror_ppm); } if (sonar_present && su_.use_sonar_scores) @@ -733,14 +739,14 @@ namespace OpenMS { OpenSwath_Ind_Scores idscores = scoreIdentification_(transition_group_identification, scorer, feature_idx, native_ids_detection, det_intensity_ratio_score, - det_mi_ratio_score, swath_maps); + det_mi_ratio_score, swath_maps, isotopeCacher); mrmfeature.IDScoresAsMetaValue(false, idscores); } if (su_.use_uis_scores && !transition_group_identification_decoy.getTransitions().empty()) { OpenSwath_Ind_Scores idscores = scoreIdentification_(transition_group_identification_decoy, scorer, feature_idx, native_ids_detection, det_intensity_ratio_score, - det_mi_ratio_score, swath_maps); + det_mi_ratio_score, swath_maps, isotopeCacher); mrmfeature.IDScoresAsMetaValue(true, idscores); } @@ -956,7 +962,7 @@ namespace OpenMS mrmfeature.setMetaValue("missedCleavages", pd.peptideCount(pep_hit_.getSequence()) - 1); } - // set protein accession numbers + // set protein accession numbers for (Size k = 0; k < pep->protein_refs.size(); k++) { PeptideEvidence pe; diff --git a/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.cpp b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.cpp index 40ca45c3cd7..64cdabefc54 100644 --- a/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.cpp +++ b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.cpp @@ -37,6 +37,7 @@ #include #include #include +#include namespace OpenMS { @@ -91,6 +92,100 @@ namespace OpenMS return dist; } + IsotopeDistribution OpenSwathIsotopeGeneratorCacher::computeMiss(double mass) const + { + OPENMS_LOG_DEBUG << "Cache miss :(" << std::endl; + CoarseIsotopePatternGenerator tempSolver = solver_; //copy the isotope generator to enable const signature + return tempSolver.estimateFromPeptideWeight(mass); + } + + IsotopeDistribution OpenSwathIsotopeGeneratorCacher::getImmutable(double mass) const + { + /* + // print the cachedIsotopeDistribution list + for (const auto &[key, value]:cachedIsotopeDistributions_) + { + std::cout << "key is: " << key << std::endl; + } + */ + + if (mass <= (cachedIsotopeDistributions_.begin()->first + halfMassStep_) ) // all cached greater than target (with tolerance) + { + //std::cout << "All cached elements strictly greater than target (with tolerance)" << std::endl; + if ( (cachedIsotopeDistributions_.begin()->first - mass) <= halfMassStep_) + { + //std::cout << "first element matches"; + return cachedIsotopeDistributions_.begin()->second; + } + else + { + //std::cout << "first element does not match"; + return computeMiss(mass); + } + } + else // mass is somewhere in the middle or all elements are less than mass + { + auto upperBound = cachedIsotopeDistributions_.upper_bound(mass); + auto prevEle = upperBound; + prevEle--; + //std::cout << "ptr previous: " << prevEle->first << std::endl; + //std::cout << "upper bound (first element not less than " << mass << ": " << upperBound->first << std::endl; + + if (upperBound == cachedIsotopeDistributions_.end()) + { + //std::cout << "upper bound is at the end, meaning all elements are less than or equal to mass" << std::endl; + if (mass - prevEle->first <= halfMassStep_) + { + //std::cout << "last element is in range" << std::endl; + return prevEle->second; + } + else + { + //std::cout << "last element is not in range" << std::endl; + return computeMiss(mass); + } + } + else if ((mass - prevEle->first) > halfMassStep_) + { + //std::cout << "mass" << mass << std::endl; + //std::cout << "Previous element is too far away (half mass step: " << (mass - prevEle->first) << ")" << std::endl; + if ((upperBound->first - mass) <= halfMassStep_) + { + + //std::cout << "upper bound in range, match!" << std::endl; + return upperBound->second; + } + else + { + //std::cout << "upper bound not in range, new entry" << std::endl; + return computeMiss(mass); + } + } + else //prevEle in range + { + //std::cout << "previous element is in range" << std::endl; + if ((upperBound->first - mass) > halfMassStep_) + { + //std::cout << "upper bound not in range match previous element" << std::endl; + } + else // both elements in range + { + //std::cout << "both elements in range see which is closer" << std::endl; + if ((upperBound->first - mass) <= (mass - prevEle->first)) + { + //std::cout << "upper bound is closer" << std::endl; + return upperBound->second; + } + else + { + //std::cout << "previous element is closer" << std::endl; + return prevEle->second; + } + } + } + } + } + IsotopeDistribution OpenSwathIsotopeGeneratorCacher::get(double mass) { for (const auto &[key, value]:cachedIsotopeDistributions_) @@ -241,6 +336,21 @@ namespace OpenMS } */ + std::vector> OpenSwathIsotopeGeneratorCacher::getImmutable(double product_mz, int charge, const double mannmass) const + { + IsotopeDistribution distribution = getImmutable(product_mz * charge); + + double currentIsotopeMz = product_mz; // mz value of current isotope + + std::vector > isotopes_spec; + for (IsotopeDistribution::Iterator it = distribution.begin(); it != distribution.end(); ++it) + { + isotopes_spec.emplace_back(currentIsotopeMz, it->getIntensity()); + currentIsotopeMz += mannmass / charge; + } + + return isotopes_spec; + } diff --git a/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathScoring.cpp b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathScoring.cpp index b42354ccaf2..c01c1d82437 100644 --- a/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathScoring.cpp +++ b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathScoring.cpp @@ -43,6 +43,7 @@ #include #include #include +#include // auxiliary #include @@ -135,7 +136,8 @@ namespace OpenMS std::vector& masserror_ppm, const double drift_lower, const double drift_upper, - const double drift_target) + const double drift_target, + const OpenSwathIsotopeGeneratorCacher& isotopeCacher) { OPENMS_PRECONDITION(imrmfeature != nullptr, "Feature to be scored cannot be null"); OPENMS_PRECONDITION(transitions.size() > 0, "There needs to be at least one transition."); @@ -187,7 +189,7 @@ namespace OpenMS // DIA dotproduct and manhattan score based on library intensity and sum formula if present if (su_.use_ms2_isotope_scores) { - diascoring.score_with_isotopes(spectra, transitions, scores.dotprod_score_dia, scores.manhatt_score_dia, drift_lower, drift_upper); + diascoring.score_with_isotopes(spectra, transitions, scores.dotprod_score_dia, scores.manhatt_score_dia, drift_lower, drift_upper, isotopeCacher); // Isotope correlation / overlap score: Is this peak part of an // isotopic pattern or is it the monoisotopic peak in an isotopic @@ -196,7 +198,7 @@ namespace OpenMS // not optimal for metabolites - but better than nothing, given that for // most fragments we don't really know their composition diascoring - .dia_isotope_scores(transitions, spectra, imrmfeature, scores.isotope_correlation, scores.isotope_overlap, drift_lower, drift_upper); + .dia_isotope_scores(transitions, spectra, imrmfeature, scores.isotope_correlation, scores.isotope_overlap, drift_lower, drift_upper, isotopeCacher); } // Peptide-specific scores (only useful, when product transitions are REAL fragments, e.g. not in FFID) @@ -215,7 +217,7 @@ namespace OpenMS double precursor_mz = transitions[0].precursor_mz; double rt = imrmfeature->getRT(); - calculatePrecursorDIAScores(ms1_map, diascoring, precursor_mz, rt, compound, scores, drift_lower, drift_upper); + calculatePrecursorDIAScores(ms1_map, diascoring, precursor_mz, rt, compound, scores, drift_lower, drift_upper, isotopeCacher); } @@ -240,7 +242,7 @@ namespace OpenMS double rt, const CompoundType& compound, OpenSwath_Scores & scores, - double drift_lower, double drift_upper) + double drift_lower, double drift_upper, const OpenSwathIsotopeGeneratorCacher& isotopeCacher) { // Compute precursor-level scores: // - compute mass difference in ppm @@ -261,6 +263,7 @@ namespace OpenMS { if (!compound.sequence.empty()) { + // if computing the isotope distribution from an emperical formula than cannot use the caching. diascoring.dia_ms1_isotope_scores(precursor_mz, ms1_spectrum, scores.ms1_isotope_correlation, scores.ms1_isotope_overlap, AASequence::fromString(compound.sequence).getFormula(Residue::Full, precursor_charge), drift_lower, drift_upper); @@ -269,7 +272,7 @@ namespace OpenMS { diascoring.dia_ms1_isotope_scores_averagine(precursor_mz, ms1_spectrum, scores.ms1_isotope_correlation, - scores.ms1_isotope_overlap, precursor_charge, drift_lower, drift_upper); + scores.ms1_isotope_overlap, precursor_charge, drift_lower, drift_upper, isotopeCacher); } } else @@ -290,7 +293,7 @@ namespace OpenMS { diascoring.dia_ms1_isotope_scores_averagine(precursor_mz, ms1_spectrum, scores.ms1_isotope_correlation, - scores.ms1_isotope_overlap, precursor_charge, drift_lower, drift_upper); + scores.ms1_isotope_overlap, precursor_charge, drift_lower, drift_upper, isotopeCacher); } } } @@ -301,7 +304,7 @@ namespace OpenMS const std::vector& swath_maps, const OpenMS::DIAScoring & diascoring, OpenSwath_Scores & scores, - double drift_lower, double drift_upper) + double drift_lower, double drift_upper, const OpenSwathIsotopeGeneratorCacher& isotopeCacher) { OPENMS_PRECONDITION(imrmfeature != nullptr, "Feature to be scored cannot be null"); OPENMS_PRECONDITION(swath_maps.size() > 0, "There needs to be at least one swath map."); @@ -344,7 +347,8 @@ namespace OpenMS scores.isotope_overlap, putative_product_charge, drift_lower, - drift_upper); + drift_upper, + isotopeCacher); // Mass deviation score diascoring.dia_ms1_massdiff_score(transition.getProductMZ(), spectrum, scores.massdev_score, drift_lower, drift_upper); } diff --git a/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathWorkflow.cpp b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathWorkflow.cpp index 5c426efc5f9..d51583bdab7 100644 --- a/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathWorkflow.cpp +++ b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathWorkflow.cpp @@ -180,7 +180,8 @@ namespace OpenMS OpenSwath::SpectrumAccessPtr chromatogram_ptr = OpenSwath::SpectrumAccessPtr(new OpenMS::SpectrumAccessOpenMS(xic_map)); featureFinder.setStrictFlag(false); // TODO remove this, it should be strict (e.g. all transitions need to be present for RT norm) - featureFinder.pickExperiment(chromatogram_ptr, featureFile, transition_exp_used, empty_trafo, empty_swath_maps, transition_group_map); + OpenSwathIsotopeGeneratorCacher isotopeCacher(2,1); //TODO initialize this right + featureFinder.pickExperiment(chromatogram_ptr, featureFile, transition_exp_used, empty_trafo, empty_swath_maps, transition_group_map, isotopeCacher); // 4. Find most likely correct feature for each compound and add it to the // "pairs" vector by computing pairs of iRT and real RT. @@ -929,6 +930,8 @@ namespace OpenMS trafo_inv.invert(); MRMFeatureFinderScoring featureFinder; + + std::cout << "JOSH" << feature_finder_param.getValue("Scoring:DIAScoring:dia_nr_isotopes") << std::endl; MRMTransitionGroupPicker trgroup_picker; // To ensure multi-threading safe access to the individual spectra, we @@ -990,6 +993,13 @@ namespace OpenMS } std::vector to_tsv_output, to_osw_output; + + // initialize the OpenSwathIsotopeGeneratorCacher, for now hardcode it but come back + //OpenSwathIsotopeGeneratorCacher(const Size max_isotope, const double massStep, const bool round_masses = false): + OpenSwathIsotopeGeneratorCacher isotopeCacher = OpenSwathIsotopeGeneratorCacher(2, 0.5); + isotopeCacher.initialize(100.5, 400.5, 1); + + /////////////////////////////////// // Start of main function // Iterating over all the assays @@ -1059,7 +1069,7 @@ namespace OpenMS // 3. / 4. Process the MRMTransitionGroup: find peakgroups and score them trgroup_picker.pickTransitionGroup(transition_group); - featureFinder.scorePeakgroups(transition_group, trafo, swath_maps, output, ms1only); + featureFinder.scorePeakgroups(transition_group, trafo, swath_maps, output, isotopeCacher, ms1only); // Ensure that a detection transition is used to derive features for output if (detection_assay_it == nullptr && !output.empty()) diff --git a/src/openms/source/TRANSFORMATIONS/FEATUREFINDER/FeatureFinderAlgorithmMetaboIdent.cpp b/src/openms/source/TRANSFORMATIONS/FEATUREFINDER/FeatureFinderAlgorithmMetaboIdent.cpp index 67006c3e808..04bc2bab741 100644 --- a/src/openms/source/TRANSFORMATIONS/FEATUREFINDER/FeatureFinderAlgorithmMetaboIdent.cpp +++ b/src/openms/source/TRANSFORMATIONS/FEATUREFINDER/FeatureFinderAlgorithmMetaboIdent.cpp @@ -40,6 +40,7 @@ #include #include +#include #include #include #include @@ -71,8 +72,8 @@ namespace OpenMS defaults_.setMinFloat("extract:mz_window", 0.0); defaults_.setValue( - "extract:rt_window", - 0.0, + "extract:rt_window", + 0.0, "RT window size (in sec.) for chromatogram extraction. If set, this parameter takes precedence over 'extract:rt_quantile'.", vector{"advanced"}); defaults_.setMinFloat("extract:rt_window", 0.0); @@ -81,7 +82,7 @@ namespace OpenMS defaults_.setMinInt("extract:n_isotopes", 2); defaults_.setValue( "extract:isotope_pmin", - 0.0, + 0.0, "Minimum probability for an isotope to be included in the assay for a peptide. If set, this parameter takes precedence over 'extract:n_isotopes'.", vector{"advanced"}); defaults_.setMinFloat("extract:isotope_pmin", 0.0); @@ -92,20 +93,20 @@ namespace OpenMS defaults_.setValue("detect:peak_width", 60.0, "Expected elution peak width in seconds, for smoothing (Gauss filter). Also determines the RT extration window, unless set explicitly via 'extract:rt_window'."); defaults_.setMinFloat("detect:peak_width", 0.0); defaults_.setValue( - "detect:min_peak_width", - 0.2, + "detect:min_peak_width", + 0.2, "Minimum elution peak width. Absolute value in seconds if 1 or greater, else relative to 'peak_width'.", vector{"advanced"}); defaults_.setMinFloat("detect:min_peak_width", 0.0); defaults_.setValue( - "detect:signal_to_noise", - 0.8, + "detect:signal_to_noise", + 0.8, "Signal-to-noise threshold for OpenSWATH feature detection", vector{"advanced"}); defaults_.setMinFloat("detect:signal_to_noise", 0.1); - defaults_.setSectionDescription("detect", "Parameters for detecting features in extracted ion chromatograms"); + defaults_.setSectionDescription("detect", "Parameters for detecting features in extracted ion chromatograms"); // parameters for model fitting (via ElutionModelFitter): defaults_.setValue("model:type", "symmetric", "Type of elution model to fit to features"); @@ -161,16 +162,16 @@ namespace OpenMS candidates_out_ = (string)param_.getValue("candidates_out"); } - void FeatureFinderAlgorithmMetaboIdent::run(const vector& metaboIdentTable, - FeatureMap& features, + void FeatureFinderAlgorithmMetaboIdent::run(const vector& metaboIdentTable, + FeatureMap& features, const String& spectra_file) { // if proper mzML is annotated in MS data use this as reference. Otherwise, overwrite with spectra_file information. - features.setPrimaryMSRunPath({spectra_file}, ms_data_); - + features.setPrimaryMSRunPath({spectra_file}, ms_data_); + if (ms_data_.empty()) { - OPENMS_LOG_WARN << "Warning: No MS1 scans in:"<< spectra_file << endl; + OPENMS_LOG_WARN << "Warning: No MS1 scans in:"<< spectra_file << endl; return; } @@ -196,8 +197,8 @@ namespace OpenMS // totally breaks the OpenSWATH feature detection (no features found)! params.setValue("TransitionGroupPicker:PeakPickerMRM:signal_to_noise", signal_to_noise_); - - params.setValue("TransitionGroupPicker:PeakPickerMRM:write_sn_log_messages", "false"); + + params.setValue("TransitionGroupPicker:PeakPickerMRM:write_sn_log_messages", "false"); params.setValue("TransitionGroupPicker:recalculate_peaks", "true"); params.setValue("TransitionGroupPicker:PeakPickerMRM:peak_width", -1.0); params.setValue("TransitionGroupPicker:PeakPickerMRM:method", @@ -230,8 +231,9 @@ namespace OpenMS OPENMS_LOG_INFO << "Detecting chromatographic peaks..." << endl; OpenMS_Log_info.remove(cout); // suppress status output from OpenSWATH + OpenSwathIsotopeGeneratorCacher isotopeCacher(2,1); //TODO fill in with actual values feat_finder_.pickExperiment(chrom_data_, features, library_, - TransformationDescription(), ms_data_); + TransformationDescription(), ms_data_, isotopeCacher); OpenMS_Log_info.insert(cout); OPENMS_LOG_INFO << "Found " << features.size() << " feature candidates in total." << endl; @@ -243,7 +245,7 @@ namespace OpenMS // write auxiliary output: // features.setProteinIdentifications(proteins); features.ensureUniqueId(); - + // sort features: sort(features.begin(), features.end(), feature_compare_); @@ -287,7 +289,7 @@ namespace OpenMS if (features.empty()) { OPENMS_LOG_INFO << "No features left after filtering." << endl; - } + } } if (features.empty()) return; @@ -468,7 +470,7 @@ namespace OpenMS transition.setCompoundRef(target_id); library_.addTransition(transition); isotope_probs_[transition_name] = iso.getIntensity(); - + ++counter; } } @@ -917,9 +919,9 @@ namespace OpenMS } void FeatureFinderAlgorithmMetaboIdent::setMSData(const PeakMap& m) - { - ms_data_ = m; - + { + ms_data_ = m; + vector& specs = ms_data_.getSpectra(); // keep only MS1 @@ -930,9 +932,9 @@ namespace OpenMS } void FeatureFinderAlgorithmMetaboIdent::setMSData(PeakMap&& m) - { - ms_data_ = std::move(m); - + { + ms_data_ = std::move(m); + vector& specs = ms_data_.getSpectra(); // keep only MS1 diff --git a/src/openms/source/TRANSFORMATIONS/FEATUREFINDER/FeatureFinderIdentificationAlgorithm.cpp b/src/openms/source/TRANSFORMATIONS/FEATUREFINDER/FeatureFinderIdentificationAlgorithm.cpp index 9aad07cc8ab..1ed20f2d48a 100644 --- a/src/openms/source/TRANSFORMATIONS/FEATUREFINDER/FeatureFinderIdentificationAlgorithm.cpp +++ b/src/openms/source/TRANSFORMATIONS/FEATUREFINDER/FeatureFinderIdentificationAlgorithm.cpp @@ -40,6 +40,7 @@ #include #include +#include #include #include #include @@ -83,22 +84,22 @@ namespace OpenMS defaults_.setMinInt("extract:n_isotopes", 2); defaults_.setValue( "extract:isotope_pmin", - 0.0, + 0.0, "Minimum probability for an isotope to be included in the assay for a peptide. If set, this parameter takes precedence over 'extract:n_isotopes'.", {"advanced"}); defaults_.setMinFloat("extract:isotope_pmin", 0.0); defaults_.setMaxFloat("extract:isotope_pmin", 1.0); defaults_.setValue( - "extract:rt_quantile", - 0.95, + "extract:rt_quantile", + 0.95, "Quantile of the RT deviations between aligned internal and external IDs to use for scaling the RT extraction window", {"advanced"}); defaults_.setMinFloat("extract:rt_quantile", 0.0); defaults_.setMaxFloat("extract:rt_quantile", 1.0); defaults_.setValue( - "extract:rt_window", - 0.0, + "extract:rt_window", + 0.0, "RT window size (in sec.) for chromatogram extraction. If set, this parameter takes precedence over 'extract:rt_quantile'.", {"advanced"}); defaults_.setMinFloat("extract:rt_window", 0.0); @@ -108,15 +109,15 @@ namespace OpenMS defaults_.setValue("detect:peak_width", 60.0, "Expected elution peak width in seconds, for smoothing (Gauss filter). Also determines the RT extration window, unless set explicitly via 'extract:rt_window'."); defaults_.setMinFloat("detect:peak_width", 0.0); defaults_.setValue( - "detect:min_peak_width", - 0.2, + "detect:min_peak_width", + 0.2, "Minimum elution peak width. Absolute value in seconds if 1 or greater, else relative to 'peak_width'.", {"advanced"}); defaults_.setMinFloat("detect:min_peak_width", 0.0); defaults_.setValue( - "detect:signal_to_noise", - 0.8, + "detect:signal_to_noise", + 0.8, "Signal-to-noise threshold for OpenSWATH feature detection", {"advanced"}); defaults_.setMinFloat("detect:signal_to_noise", 0.1); @@ -145,14 +146,14 @@ namespace OpenMS String score_metavalues = "peak_apices_sum,var_xcorr_coelution,var_xcorr_shape,var_library_sangle,var_intensity_score,sn_ratio,var_log_sn_score,var_elution_model_fit_score,xx_lda_prelim_score,var_ms1_isotope_correlation_score,var_ms1_isotope_overlap_score,var_massdev_score,main_var_xx_swath_prelim_score"; defaults_.setValue( - "svm:predictors", - score_metavalues, + "svm:predictors", + score_metavalues, "Names of OpenSWATH scores to use as predictors for the SVM (comma-separated list)", {"advanced"}); defaults_.setValue( - "svm:min_prob", - 0.0, + "svm:min_prob", + 0.0, "Minimum probability of correctness, as predicted by the SVM, required to retain a feature candidate", {"advanced"}); defaults_.setMinFloat("svm:min_prob", 0.0); @@ -191,28 +192,28 @@ namespace OpenMS void FeatureFinderIdentificationAlgorithm::setMSData(const PeakMap& ms_data) { - ms_data_ = ms_data; - + ms_data_ = ms_data; + vector& specs = ms_data_.getSpectra(); // keep only MS1 specs.erase( std::remove_if(specs.begin(), specs.end(), [](const MSSpectrum & s) { return s.getMSLevel() != 1; }), - specs.end()); + specs.end()); } void FeatureFinderIdentificationAlgorithm::setMSData(PeakMap&& ms_data) { - ms_data_ = std::move(ms_data); - + ms_data_ = std::move(ms_data); + vector& specs = ms_data_.getSpectra(); // keep only MS1 specs.erase( std::remove_if(specs.begin(), specs.end(), [](const MSSpectrum & s) { return s.getMSLevel() != 1; }), - specs.end()); + specs.end()); } PeakMap& FeatureFinderIdentificationAlgorithm::getChromatograms() @@ -299,9 +300,9 @@ namespace OpenMS params.setValue("TransitionGroupPicker:recalculate_peaks", "true"); params.setValue("TransitionGroupPicker:PeakPickerMRM:peak_width", -1.0); params.setValue("TransitionGroupPicker:PeakPickerMRM:method", - "corrected"); + "corrected"); params.setValue("TransitionGroupPicker:PeakPickerMRM:write_sn_log_messages", "false"); // disabled in OpenSWATH - + feat_finder_.setParameters(params); feat_finder_.setLogType(ProgressLogger::NONE); feat_finder_.setStrictFlag(false); @@ -506,8 +507,9 @@ namespace OpenMS { OpenMS_Log_info.remove(cout); } + OpenSwathIsotopeGeneratorCacher isotopeCacher(2, 1); // TODO fix this is just initialized randomly for testing feat_finder_.pickExperiment(chrom_data_, features, library_, - TransformationDescription(), ms_data_); + TransformationDescription(), ms_data_, isotopeCacher); if (debug_level_ < 1) { OpenMS_Log_info.insert(cout); // revert logging change @@ -1001,8 +1003,8 @@ namespace OpenMS /// generate transitions (isotopic traces) for a peptide ion and add them to the library: void FeatureFinderIdentificationAlgorithm::generateTransitions_( - const String& peptide_id, - double mz, + const String& peptide_id, + double mz, Int charge, const IsotopeDistribution& iso_dist) { @@ -1032,16 +1034,16 @@ namespace OpenMS { if (n_pos < svm_n_parts_) { - String msg = "Not enough positive observations for " + + String msg = "Not enough positive observations for " + String(svm_n_parts_) + "-fold cross-validation" + note + "."; - throw Exception::MissingInformation(__FILE__, __LINE__, + throw Exception::MissingInformation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, msg); } if (n_neg < svm_n_parts_) { - String msg = "Not enough negative observations for " + + String msg = "Not enough negative observations for " + String(svm_n_parts_) + "-fold cross-validation" + note + "."; - throw Exception::MissingInformation(__FILE__, __LINE__, + throw Exception::MissingInformation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, msg); } } @@ -1320,7 +1322,7 @@ namespace OpenMS if (!quantify_decoys_) { if (hit.metaValueExists("target_decoy") && hit.getMetaValue("target_decoy") == "decoy") - { + { unassignedIDs_.push_back(peptide); return; } @@ -1413,7 +1415,7 @@ namespace OpenMS if (valid_obs.size() < half_win_size + 1) { String msg = "Not enough observations for intensity-bias filtering."; - throw Exception::MissingInformation(__FILE__, __LINE__, + throw Exception::MissingInformation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, msg); } srand(time(nullptr)); // seed random number generator @@ -1627,7 +1629,7 @@ namespace OpenMS std::vector predictions; svm.predict(predictions); - OPENMS_POSTCONDITION(predictions.size() == features.size(), + OPENMS_POSTCONDITION(predictions.size() == features.size(), "SVM predictions for all features expected"); for (Size i = 0; i < features.size(); ++i) { @@ -1656,7 +1658,7 @@ namespace OpenMS else if (feature_class == "unknown") { svm_probs_external_.insert(best_quality); - if (best_quality >= quality_cutoff) + if (best_quality >= quality_cutoff) { best_feature.setOverallQuality(best_quality); ++n_external_features_; @@ -1759,7 +1761,7 @@ namespace OpenMS prob_it->second.second); OPENMS_LOG_INFO << "Estimated FDR of features detected based on 'external' IDs: " << fdr * 100.0 << "%" << endl; - fdr = (fdr * n_external_features_) / (n_external_features_ + + fdr = (fdr * n_external_features_) / (n_external_features_ + n_internal_features_); OPENMS_LOG_INFO << "Estimated FDR of all detected features: " << fdr * 100.0 << "%" << endl; From 1d8a9363847508839c9d6f77260f6c8b0d4181d9 Mon Sep 17 00:00:00 2001 From: jcharkow Date: Fri, 23 Dec 2022 10:20:10 -0500 Subject: [PATCH 05/13] further integration with openswathworkflow now reads out the number of isotopes from command line dia_nr_isotopes when using this full size cache, the performance suffers. Takes significantly longer with the cache --- .../OpenSwathIsotopeGeneratorCacher.cpp | 69 ++++++------ .../ANALYSIS/OPENSWATH/OpenSwathWorkflow.cpp | 13 +-- .../OpenSwathIsotopeGeneratorCacher_test.cpp | 103 +++++++++++++++++- 3 files changed, 142 insertions(+), 43 deletions(-) diff --git a/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.cpp b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.cpp index 64cdabefc54..e22eeecf5ac 100644 --- a/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.cpp +++ b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.cpp @@ -105,10 +105,11 @@ namespace OpenMS // print the cachedIsotopeDistribution list for (const auto &[key, value]:cachedIsotopeDistributions_) { - std::cout << "key is: " << key << std::endl; + //std::cout << "key is: " << key << std::endl; } */ + //std::cout << "JOSH start get immutable" << std::endl; if (mass <= (cachedIsotopeDistributions_.begin()->first + halfMassStep_) ) // all cached greater than target (with tolerance) { //std::cout << "All cached elements strictly greater than target (with tolerance)" << std::endl; @@ -167,6 +168,7 @@ namespace OpenMS if ((upperBound->first - mass) > halfMassStep_) { //std::cout << "upper bound not in range match previous element" << std::endl; + return prevEle->second; } else // both elements in range { @@ -190,20 +192,20 @@ namespace OpenMS { for (const auto &[key, value]:cachedIsotopeDistributions_) { - std::cout << "key is: " << key << std::endl; + //std::cout << "key is: " << key << std::endl; } if (mass <= (cachedIsotopeDistributions_.begin()->first + halfMassStep_) ) // all cached greater than target (with tolerance) { - std::cout << "All cached elements strictly greater than target (with tolerance)" << std::endl; + //std::cout << "All cached elements strictly greater than target (with tolerance)" << std::endl; if ( (cachedIsotopeDistributions_.begin()->first - mass) <= halfMassStep_) { - std::cout << "first element matches"; + //std::cout << "first element matches"; return cachedIsotopeDistributions_.begin()->second; } else { - std::cout << "first element does not match"; + //std::cout << "first element does not match"; return addEntry(mass); } } @@ -212,57 +214,57 @@ namespace OpenMS auto upperBound = cachedIsotopeDistributions_.upper_bound(mass); auto prevEle = upperBound; prevEle--; - std::cout << "ptr previous: " << prevEle->first << std::endl; - std::cout << "upper bound (first element not less than " << mass << ": " << upperBound->first << std::endl; + //std::cout << "ptr previous: " << prevEle->first << std::endl; + //std::cout << "upper bound (first element not less than " << mass << ": " << upperBound->first << std::endl; if (upperBound == cachedIsotopeDistributions_.end()) { - std::cout << "upper bound is at the end, meaning all elements are less than or equal to mass" << std::endl; + //std::cout << "upper bound is at the end, meaning all elements are less than or equal to mass" << std::endl; if (mass - prevEle->first <= halfMassStep_) { - std::cout << "last element is in range" << std::endl; + //std::cout << "last element is in range" << std::endl; return prevEle->second; } else { - std::cout << "last element is not in range" << std::endl; + //std::cout << "last element is not in range" << std::endl; return addEntry(mass); } } else if ((mass - prevEle->first) > halfMassStep_) { - std::cout << "mass" << mass << std::endl; - std::cout << "Previous element is too far away (half mass step: " << (mass - prevEle->first) << ")" << std::endl; + //std::cout << "mass" << mass << std::endl; + //std::cout << "Previous element is too far away (half mass step: " << (mass - prevEle->first) << ")" << std::endl; if ((upperBound->first - mass) <= halfMassStep_) { - std::cout << "upper bound in range, match!" << std::endl; + //std::cout << "upper bound in range, match!" << std::endl; return upperBound->second; } else { - std::cout << "upper bound not in range, new entry" << std::endl; + //std::cout << "upper bound not in range, new entry" << std::endl; return addEntry(mass); } } else //prevEle in range { - std::cout << "previous element is in range" << std::endl; + //std::cout << "previous element is in range" << std::endl; if ((upperBound->first - mass) > halfMassStep_) { - std::cout << "upper bound not in range match previous element" << std::endl; + //std::cout << "upper bound not in range match previous element" << std::endl; } else // both elements in range { - std::cout << "both elements in range see which is closer" << std::endl; + //std::cout << "both elements in range see which is closer" << std::endl; if ((upperBound->first - mass) <= (mass - prevEle->first)) { - std::cout << "upper bound is closer" << std::endl; + //std::cout << "upper bound is closer" << std::endl; return upperBound->second; } else { - std::cout << "previous element is closer" << std::endl; + //std::cout << "previous element is closer" << std::endl; return prevEle->second; } } @@ -276,15 +278,15 @@ namespace OpenMS /* if ((ele == cachedIsotopeDistributions_.end()) & (previousEle == cachedIsotopeDistributions_.end()) | (ele == cachedIsotopeDistributions_.begin()) // all cached elements are greater than target mass { - std::cout << "All elements greater than target mass, creating entry" << std::endl; + //std::cout << "All elements greater than target mass, creating entry" << std::endl; return addEnrtry(mass); } else if ((ele == cachedIsotopeDistributions_.end()) & (previousEle != cachedIsotopeDistributions_.end())) // last element might match { - std::cout << "Last element might match" << std::endl; + //std::cout << "Last element might match" << std::endl; if (std::abs(previousEle->first - mass) < halfStep_) { - std::cout << "Last element matches" << std::endl; + //std::cout << "Last element matches" << std::endl; return previousEle->second; } else @@ -295,19 +297,19 @@ namespace OpenMS //else if ((ele == cachedIsotopeDistributions_.begin()) & (previousEle != cachedIsotopeDistributions_.end())) // last element might match // // at the end of the array, possible that we still have a match though if this is just slightly over the last element - std::cout << "At the end of the array..." << std::endl; + //std::cout << "At the end of the array..." << std::endl; if (std::abs(previousEle->first - mass) < std::abs(ele->first - mass)) // check if the previous entry is within range { double rslt = std::abs(previousEle->first - mass) < std::abs(ele->first - mass); - std::cout << "rslt is: " << rslt << std::endl; - std::cout << "Taking previous element" << std::endl; + //std::cout << "rslt is: " << rslt << std::endl; + //std::cout << "Taking previous element" << std::endl; return ele->second; } else // previous entry not within range { double rslt = std::abs(previousEle->first - mass) < std::abs(ele->first - mass); - std::cout << "rslt is: " << rslt << std::endl; - std::cout << "No match creating entry" << std::endl; + //std::cout << "rslt is: " << rslt << std::endl; + //std::cout << "No match creating entry" << std::endl; return addEntry(mass); } @@ -315,8 +317,8 @@ namespace OpenMS if (std::abs(previousEle->first - mass) < std::abs(ele->first - mass)) // check if the previous entry is closer than the first entry { double rslt = std::abs(previousEle->first - mass) < std::abs(ele->first - mass); - std::cout << "rslt is: " << rslt << std::endl; - std::cout << "Taking previous element" << std::endl; + //std::cout << "rslt is: " << rslt << std::endl; + //std::cout << "Taking previous element" << std::endl; ele = previousEle; } @@ -324,12 +326,12 @@ namespace OpenMS { double rslt = std::abs(ele->first - mass) > halfMassStep_; - std::cout << "Closest element is not in range, creating entry" << " closest element: " << ele->first << " delta: " << rslt << std::endl; + //std::cout << "Closest element is not in range, creating entry" << " closest element: " << ele->first << " delta: " << rslt << std::endl; return addEntry(mass); } else // there is a current cached distribution that can be used! { - std::cout << "Using the cache" << std::endl; + //std::cout << "Using the cache" << std::endl; return ele->second; } @@ -338,8 +340,11 @@ namespace OpenMS std::vector> OpenSwathIsotopeGeneratorCacher::getImmutable(double product_mz, int charge, const double mannmass) const { + //std::cout << "Calling sub get immutable" << std::endl; IsotopeDistribution distribution = getImmutable(product_mz * charge); - + //std::cout << "done sub get immutable" << std::endl; + //std::cout << "fetching distribution size" << std::endl; + //std::cout << "distribition size is: " << distribution.size() << std::endl; double currentIsotopeMz = product_mz; // mz value of current isotope std::vector > isotopes_spec; diff --git a/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathWorkflow.cpp b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathWorkflow.cpp index d51583bdab7..a5d5013308a 100644 --- a/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathWorkflow.cpp +++ b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathWorkflow.cpp @@ -180,7 +180,9 @@ namespace OpenMS OpenSwath::SpectrumAccessPtr chromatogram_ptr = OpenSwath::SpectrumAccessPtr(new OpenMS::SpectrumAccessOpenMS(xic_map)); featureFinder.setStrictFlag(false); // TODO remove this, it should be strict (e.g. all transitions need to be present for RT norm) - OpenSwathIsotopeGeneratorCacher isotopeCacher(2,1); //TODO initialize this right + + OpenSwathIsotopeGeneratorCacher isotopeCacher(feature_finder_param.getValue("DIAScoring:dia_nr_isotopes"), 1); + isotopeCacher.initialize(200.5, 2001.5, 1); featureFinder.pickExperiment(chromatogram_ptr, featureFile, transition_exp_used, empty_trafo, empty_swath_maps, transition_group_map, isotopeCacher); // 4. Find most likely correct feature for each compound and add it to the @@ -930,8 +932,6 @@ namespace OpenMS trafo_inv.invert(); MRMFeatureFinderScoring featureFinder; - - std::cout << "JOSH" << feature_finder_param.getValue("Scoring:DIAScoring:dia_nr_isotopes") << std::endl; MRMTransitionGroupPicker trgroup_picker; // To ensure multi-threading safe access to the individual spectra, we @@ -994,11 +994,10 @@ namespace OpenMS std::vector to_tsv_output, to_osw_output; - // initialize the OpenSwathIsotopeGeneratorCacher, for now hardcode it but come back - //OpenSwathIsotopeGeneratorCacher(const Size max_isotope, const double massStep, const bool round_masses = false): - OpenSwathIsotopeGeneratorCacher isotopeCacher = OpenSwathIsotopeGeneratorCacher(2, 0.5); - isotopeCacher.initialize(100.5, 400.5, 1); + // initialize the OpenSwathIsotopeGeneratorCacher + OpenSwathIsotopeGeneratorCacher isotopeCacher = OpenSwathIsotopeGeneratorCacher(feature_finder_param.getValue("DIAScoring:dia_nr_isotopes"), 1); + isotopeCacher.initialize(200.5, 2001.5, 1); /////////////////////////////////// // Start of main function diff --git a/src/tests/class_tests/openms/source/OpenSwathIsotopeGeneratorCacher_test.cpp b/src/tests/class_tests/openms/source/OpenSwathIsotopeGeneratorCacher_test.cpp index acfce1351c9..083a2dc23ff 100644 --- a/src/tests/class_tests/openms/source/OpenSwathIsotopeGeneratorCacher_test.cpp +++ b/src/tests/class_tests/openms/source/OpenSwathIsotopeGeneratorCacher_test.cpp @@ -196,17 +196,112 @@ START_SECTION(IsotopeDistribution OpenSwathIsotopeGeneratorCacher::get(double ma TEST_REAL_SIMILAR(cachedDistribution.getContainer()[i].getIntensity(), testDistribution.getContainer()[i].getIntensity()) } - - - - } END_SECTION +START_SECTION(std::vector> OpenSwathIsotopeGeneratorCacher::get(double product_mz, int charge, const double mannmass) ) +{ + // Setup + int maxIsotope = 4; + double massStep = 5; + IsotopeDistribution cachedDistribution; + IsotopeDistribution testDistribution; + OpenSwathIsotopeGeneratorCacher isotopeCacher(maxIsotope, massStep); + isotopeCacher.initialize(100, 120, massStep); // since non inclusive massEnd should generate cache for 100 and 101 + // Case #1: + auto dist = isotopeCacher.get(100., 1); + TEST_EQUAL(dist.size(), 4); + + std::vector> test; + test.push_back(std::make_pair(100., 0.9496341)); + test.push_back(std::make_pair(101., 0.0473560)); + test.push_back(std::make_pair(102., 0.0029034)); + test.push_back(std::make_pair(103., 0.0001064)); + for (Size i =0; i > tmp; + OpenMS::DIAHelpers::getAveragineIsotopeDistribution(100., tmp); + TEST_EQUAL(tmp.size() == 4, true); + + double mass1[] = { 100, 101.00048, 102.00096, 103.00144 }; + double int1[] = + { 0.9496341, 0.0473560, 0.0029034, 0.0001064 }; + + double * mm = &mass1[0]; + double * ii = &int1[0]; + for (unsigned int i = 0; i < tmp.size(); ++i, ++mm, ++ii) { + + std::cout << "mass :" << std::setprecision(10) << tmp[i].first + << "intensity :" << tmp[i].second << std::endl; + TEST_REAL_SIMILAR(tmp[i].first, *mm); + TEST_REAL_SIMILAR(tmp[i].second, *ii); + } + + tmp.clear(); + OpenMS::DIAHelpers::getAveragineIsotopeDistribution(30., tmp); + double mass2[] = { 30, 31.0005, 32.001, 33.0014 }; + double int2[] = { 0.987254, 0.012721, 2.41038e-05, 2.28364e-08 }; + mm = &mass2[0]; + ii = &int2[0]; + for (unsigned int i = 0; i < tmp.size(); ++i, ++mm, ++ii) { + std::cout << "mass :" << tmp[i].first << "intensity :" << tmp[i].second + << std::endl; + std::cout << "mass :" << std::setprecision(10) << tmp[i].first + << "intensity :" << tmp[i].second << std::endl; + std::cout << i << "dm" << *mm - tmp[i].first << " di " << *ii - tmp[i].second << std::endl; + TEST_REAL_SIMILAR(tmp[i].first, *mm) + TEST_REAL_SIMILAR(tmp[i].second, *ii) + } + + tmp.clear(); + OpenMS::DIAHelpers::getAveragineIsotopeDistribution(110., tmp); + for (unsigned int i = 0; i < tmp.size(); ++i) { + std::cout << "mass :" << tmp[i].first << "intensity :" << tmp[i].second + << std::endl; + } + + tmp.clear(); + OpenMS::DIAHelpers::getAveragineIsotopeDistribution(120., tmp); + for (unsigned int i = 0; i < tmp.size(); ++i) { + std::cout << "mass :" << tmp[i].first << "intensity :" << tmp[i].second + << std::endl; + } + + tmp.clear(); + OpenMS::DIAHelpers::getAveragineIsotopeDistribution(300., tmp); + for (unsigned int i = 0; i < tmp.size(); ++i) { + std::cout << "mass :" << tmp[i].first << "intensity :" << tmp[i].second + << std::endl; + } + + tmp.clear(); + OpenMS::DIAHelpers::getAveragineIsotopeDistribution(500., tmp); + for (unsigned int i = 0; i < tmp.size(); ++i) { + std::cout << "mass :" << tmp[i].first << "intensity :" << tmp[i].second + << std::endl; + } + +} +END_SECTION +*/ From 858f2a17819d97fd1276b7bbe01c66fac682f29e Mon Sep 17 00:00:00 2001 From: jcharkow Date: Wed, 28 Dec 2022 12:24:54 -0500 Subject: [PATCH 06/13] [INTERNAL] prevent copying of cache Cache was actually taking longer because it was getting copied with function called unintentionally. Pass by address in all cases in order to prevent copying. --- .../OpenMS/ANALYSIS/OPENSWATH/DIAHelper.h | 2 +- .../OpenMS/ANALYSIS/OPENSWATH/DIAScoring.h | 6 ++--- .../OpenSwathIsotopeGeneratorCacher.h | 6 +++++ .../ANALYSIS/OPENSWATH/OpenSwathWorkflow.h | 1 + .../source/ANALYSIS/OPENSWATH/DIAHelper.cpp | 2 +- .../source/ANALYSIS/OPENSWATH/DIAScoring.cpp | 6 ++--- .../ANALYSIS/OPENSWATH/OpenSwathWorkflow.cpp | 25 +++++++++++++++---- 7 files changed, 35 insertions(+), 13 deletions(-) diff --git a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAHelper.h b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAHelper.h index 6b4991e4acf..662774701c7 100644 --- a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAHelper.h +++ b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAHelper.h @@ -169,7 +169,7 @@ namespace OpenMS /// Old + new peaks are pushed to @p isotopeMasses OPENMS_DLLAPI void addSinglePeakIsotopes2Spec(double mz, double ity, std::vector >& isotope_masses, //[out] - Size nr_isotopes, int charge, const OpenSwathIsotopeGeneratorCacher isotopeCacher); + Size nr_isotopes, int charge, const OpenSwathIsotopeGeneratorCacher& isotopeCacher); /// sorts vector of pairs by first OPENMS_DLLAPI void sortByFirst(std::vector >& tmp); diff --git a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAScoring.h b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAScoring.h index 50d93e351fc..dc82c7c37af 100644 --- a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAScoring.h +++ b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAScoring.h @@ -117,7 +117,7 @@ namespace OpenMS double& isotope_overlap, double drift_lower, double drift_upper, - const OpenSwathIsotopeGeneratorCacher isotopeCacher) const; + const OpenSwathIsotopeGeneratorCacher& isotopeCacher) const; /// Massdiff scores, see class description void dia_massdiff_score(const std::vector& transitions, @@ -142,7 +142,7 @@ namespace OpenMS /// Precursor isotope scores for precursors (peptides and metabolites) void dia_ms1_isotope_scores_averagine(double precursor_mz, const std::vector& spectrum, - double& isotope_corr, double& isotope_overlap, int charge_state, double drift_start, double drift_end, const OpenSwathIsotopeGeneratorCacher isotopeCacher) const; + double& isotope_corr, double& isotope_overlap, int charge_state, double drift_start, double drift_end, const OpenSwathIsotopeGeneratorCacher& isotopeCacher) const; void dia_ms1_isotope_scores(double precursor_mz, const std::vector& spectrum, double& isotope_corr, double& isotope_overlap, const EmpiricalFormula& sum_formula, double drift_start, double drift_end) const; @@ -158,7 +158,7 @@ namespace OpenMS double& manhattan, double drift_start, double drift_end, - const OpenSwathIsotopeGeneratorCacher isotopeCacher) const; + const OpenSwathIsotopeGeneratorCacher& isotopeCacher) const; //@} private: diff --git a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.h b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.h index 360369ab169..2708a36204f 100644 --- a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.h +++ b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.h @@ -143,5 +143,11 @@ namespace OpenMS * if no isotope distribution exists within the halfMassStep_ then create (but do not cache) a new distribution */ std::vector> getImmutable(double mz, int charge, const double mannmass = 1.00048) const; + + +private: + OpenSwathIsotopeGeneratorCacher(OpenSwathIsotopeGeneratorCacher&); + OpenSwathIsotopeGeneratorCacher& operator=(const OpenSwathIsotopeGeneratorCacher&); + }; } diff --git a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathWorkflow.h b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathWorkflow.h index e2de7d038c0..17f01eb5c9c 100644 --- a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathWorkflow.h +++ b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathWorkflow.h @@ -546,6 +546,7 @@ namespace OpenMS FeatureMap& output, OpenSwathTSVWriter & tsv_writer, OpenSwathOSWWriter & osw_writer, + const OpenSwathIsotopeGeneratorCacher & isotopeCacher, int nr_ms1_isotopes = 0, bool ms1only = false) const; diff --git a/src/openms/source/ANALYSIS/OPENSWATH/DIAHelper.cpp b/src/openms/source/ANALYSIS/OPENSWATH/DIAHelper.cpp index a1eaa20e8c7..2bba17250ec 100644 --- a/src/openms/source/ANALYSIS/OPENSWATH/DIAHelper.cpp +++ b/src/openms/source/ANALYSIS/OPENSWATH/DIAHelper.cpp @@ -447,7 +447,7 @@ namespace OpenMS::DIAHelpers /// given a peak of experimental mz and intensity, add isotope pattern to a "spectrum". void addSinglePeakIsotopes2Spec(double mz, double ity, std::vector >& isotope_masses, //[out] - Size nr_isotopes, int charge, OpenSwathIsotopeGeneratorCacher isotopeCacher) + Size nr_isotopes, int charge, const OpenSwathIsotopeGeneratorCacher& isotopeCacher) { std::vector > isotopes = isotopeCacher.getImmutable(mz, charge); // note nr_isotopes is already set for (Size j = 0; j < isotopes.size(); ++j) diff --git a/src/openms/source/ANALYSIS/OPENSWATH/DIAScoring.cpp b/src/openms/source/ANALYSIS/OPENSWATH/DIAScoring.cpp index c1168441464..3b1e6a2c1ed 100644 --- a/src/openms/source/ANALYSIS/OPENSWATH/DIAScoring.cpp +++ b/src/openms/source/ANALYSIS/OPENSWATH/DIAScoring.cpp @@ -127,7 +127,7 @@ namespace OpenMS // DIA / SWATH scoring void DIAScoring::dia_isotope_scores(const std::vector& transitions, std::vector& spectrum, - OpenSwath::IMRMFeature* mrmfeature, double& isotope_corr, double& isotope_overlap, double drift_start, double drift_end, const OpenSwathIsotopeGeneratorCacher isotopeCacher) const + OpenSwath::IMRMFeature* mrmfeature, double& isotope_corr, double& isotope_overlap, double drift_start, double drift_end, const OpenSwathIsotopeGeneratorCacher& isotopeCacher) const { isotope_corr = 0; isotope_overlap = 0; @@ -246,7 +246,7 @@ namespace OpenMS void DIAScoring::dia_ms1_isotope_scores_averagine(double precursor_mz, const std::vector& spectrum, double& isotope_corr, double& isotope_overlap, - int charge_state, double drift_start, double drift_end, const OpenSwathIsotopeGeneratorCacher isotopeCacher) const + int charge_state, double drift_start, double drift_end, const OpenSwathIsotopeGeneratorCacher& isotopeCacher) const { std::vector exp_isotopes_int; getIsotopeIntysFromExpSpec_(precursor_mz, spectrum, exp_isotopes_int, charge_state, drift_start, drift_end); @@ -303,7 +303,7 @@ namespace OpenMS } void DIAScoring::score_with_isotopes(std::vector& spectrum, const std::vector& transitions, - double& dotprod, double& manhattan, double drift_start, double drift_end, const OpenSwathIsotopeGeneratorCacher isotopeCacher) const + double& dotprod, double& manhattan, double drift_start, double drift_end, const OpenSwathIsotopeGeneratorCacher& isotopeCacher) const { OpenMS::DiaPrescore dp(dia_extract_window_, dia_nr_isotopes_, dia_nr_charges_); // note with isotope cacher cannot set dia_nr_isotopes basically just a filler here dp.score(spectrum, transitions, dotprod, manhattan, drift_start, drift_end, isotopeCacher); diff --git a/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathWorkflow.cpp b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathWorkflow.cpp index a5d5013308a..0a6f8fea5b1 100644 --- a/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathWorkflow.cpp +++ b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathWorkflow.cpp @@ -547,9 +547,14 @@ namespace OpenMS boost::shared_ptr empty_exp = boost::shared_ptr(new MSExperiment); const OpenSwath::LightTargetedExperiment& transition_exp_used = transition_exp; + + OpenSwathIsotopeGeneratorCacher isotopeCacher = OpenSwathIsotopeGeneratorCacher(feature_finder_param.getValue("DIAScoring:dia_nr_isotopes"), 1); + isotopeCacher.initialize(200.5, 2001.5, 1); + + scoreAllChromatograms_(std::vector(), ms1_chromatograms, swath_maps, transition_exp_used, feature_finder_param, trafo, - cp.rt_extraction_window, featureFile, tsv_writer, osw_writer, ms1_isotopes, true); + cp.rt_extraction_window, featureFile, tsv_writer, osw_writer, isotopeCacher, ms1_isotopes, true); // write features to output if so desired std::vector< OpenMS::MSChromatogram > chromatograms; @@ -639,6 +644,10 @@ namespace OpenMS else { }; + // set up isotope cacher, to be used by all threads + OpenSwathIsotopeGeneratorCacher isotopeCacher = OpenSwathIsotopeGeneratorCacher(feature_finder_param.getValue("DIAScoring:dia_nr_isotopes"), 1); + isotopeCacher.initialize(200.5, 2001.5, 1); + // (iv) Perform extraction and scoring of fragment ion chromatograms (MS2) // We set dynamic scheduling such that the maps are worked on in the order // in which they were given to the program / acquired. This gives much @@ -731,6 +740,7 @@ namespace OpenMS SignedSize nr_batches = (transition_exp_used_all.getCompounds().size() / batch_size); + #ifdef _OPENMP #ifdef MT_ENABLE_NESTED_OPENMP // If we have a multiple of threads_outer_loop_ here, then use nested @@ -813,7 +823,7 @@ namespace OpenMS std::vector< OpenSwath::SwathMap > tmp = {swath_maps[i]}; tmp.back().sptr = current_swath_map_inner; scoreAllChromatograms_(chrom_exp.getChromatograms(), ms1_chromatograms, tmp, transition_exp_used, - feature_finder_param, trafo, cp.rt_extraction_window, featureFile, tsv_writer, osw_writer, ms1_isotopes); + feature_finder_param, trafo, cp.rt_extraction_window, featureFile, tsv_writer, osw_writer, isotopeCacher, ms1_isotopes); // Step 4: write all chromatograms and features out into an output object / file // (this needs to be done in a critical section since we only have one @@ -925,6 +935,7 @@ namespace OpenMS FeatureMap& output, OpenSwathTSVWriter & tsv_writer, OpenSwathOSWWriter & osw_writer, + const OpenSwathIsotopeGeneratorCacher& isotopeCacher, int nr_ms1_isotopes, bool ms1only) const { @@ -995,9 +1006,9 @@ namespace OpenMS std::vector to_tsv_output, to_osw_output; // initialize the OpenSwathIsotopeGeneratorCacher - OpenSwathIsotopeGeneratorCacher isotopeCacher = OpenSwathIsotopeGeneratorCacher(feature_finder_param.getValue("DIAScoring:dia_nr_isotopes"), 1); + //OpenSwathIsotopeGeneratorCacher isotopeCacher = OpenSwathIsotopeGeneratorCacher(feature_finder_param.getValue("DIAScoring:dia_nr_isotopes"), 1); - isotopeCacher.initialize(200.5, 2001.5, 1); + //isotopeCacher.initialize(200.5, 2001.5, 1); /////////////////////////////////// // Start of main function @@ -1345,8 +1356,12 @@ namespace OpenMS // Step 3: score these extracted transitions FeatureMap featureFile; + + OpenSwathIsotopeGeneratorCacher isotopeCacher = OpenSwathIsotopeGeneratorCacher(feature_finder_param.getValue("DIAScoring:dia_nr_isotopes"), 1); + isotopeCacher.initialize(200.5, 2001.5, 1); + scoreAllChromatograms_(chrom_exp.getChromatograms(), ms1_chromatograms, used_maps, transition_exp_used, - feature_finder_param, trafo, cp.rt_extraction_window, featureFile, tsv_writer, osw_writer); + feature_finder_param, trafo, cp.rt_extraction_window, featureFile, tsv_writer, osw_writer, isotopeCacher); // Step 4: write all chromatograms and features out into an output object / file // (this needs to be done in a critical section since we only have one From 2a51c549b59f30a757cc4dcde94fb7a77c3239fc Mon Sep 17 00:00:00 2001 From: jcharkow Date: Wed, 28 Dec 2022 12:29:25 -0500 Subject: [PATCH 07/13] [TEST] add tests for getImmutable This gets without mutating the cache (such that it is compatible with multthreading) --- .../OpenSwathIsotopeGeneratorCacher_test.cpp | 102 +++++++++++++++++- 1 file changed, 100 insertions(+), 2 deletions(-) diff --git a/src/tests/class_tests/openms/source/OpenSwathIsotopeGeneratorCacher_test.cpp b/src/tests/class_tests/openms/source/OpenSwathIsotopeGeneratorCacher_test.cpp index 083a2dc23ff..df41cb21929 100644 --- a/src/tests/class_tests/openms/source/OpenSwathIsotopeGeneratorCacher_test.cpp +++ b/src/tests/class_tests/openms/source/OpenSwathIsotopeGeneratorCacher_test.cpp @@ -122,7 +122,7 @@ START_SECTION(IsotopeDistribution OpenSwathIsotopeGeneratorCacher::get(double ma TEST_REAL_SIMILAR(cachedDistribution.getContainer()[i].getIntensity(), testDistribution.getContainer()[i].getIntensity()) } - // ########## Case #2 get(107.5) Should fetch distribution of mass 110 because it is the closest to the requested mass and it is less than halfStep away ################# + // ########## Case #2 get(107.5) Should fetch distribution of mass 110 because it is the closest to the requested mass and it is less than halfStep away (rounding up) ################# std::cout << "Case 2" << " get(" << 107.5 << ")" << std::endl; cachedDistribution = isotopeCacher.get(107.5); testDistribution = isotopeGenerator.estimateFromPeptideWeight(110); @@ -185,7 +185,7 @@ START_SECTION(IsotopeDistribution OpenSwathIsotopeGeneratorCacher::get(double ma } // ########## Case #7 get(89) Should fetch distribution of mass 90.5 because this is close - std::cout << "Case 6" << std::endl; + std::cout << "Case 7" << std::endl; cachedDistribution = isotopeCacher.get(89); testDistribution = isotopeGenerator.estimateFromPeptideWeight(90.5); // don't need to recreate this @@ -200,6 +200,104 @@ START_SECTION(IsotopeDistribution OpenSwathIsotopeGeneratorCacher::get(double ma END_SECTION +START_SECTION(IsotopeDistribution OpenSwathIsotopeGeneratorCacher::getImmutable(double mass)) +{ + // Setup + int maxIsotope = 2; + double massStep = 5; + IsotopeDistribution cachedDistribution; + IsotopeDistribution testDistribution; + OpenSwathIsotopeGeneratorCacher isotopeCacher(maxIsotope, massStep); + isotopeCacher.initialize(100, 120, massStep); // since non inclusive massEnd should generate cache for 100 and 101 + CoarseIsotopePatternGenerator isotopeGenerator(maxIsotope, massStep); + + + // ############ Case #1 getImmutable(100) Should fetch distribution of mass 100 because it is in the spectrum ############### + std::cout << "Case 1" << std::endl; + cachedDistribution = isotopeCacher.getImmutable(100); + testDistribution = isotopeGenerator.estimateFromPeptideWeight(100); + + TEST_EQUAL(testDistribution.size(), cachedDistribution.size()) + for (Size i = 0; i != testDistribution.size(); ++i) + { + TEST_EQUAL(round(cachedDistribution.getContainer()[i].getMZ()), testDistribution.getContainer()[i].getMZ()) + TEST_REAL_SIMILAR(cachedDistribution.getContainer()[i].getIntensity(), testDistribution.getContainer()[i].getIntensity()) + } + + // ########## Case #2 getImmutable(107.5) Should fetch distribution of mass 110 because it is the closest to the requested mass and it is less than halfStep away ################# + std::cout << "Case 2" << " getImmutable(" << 107.5 << ")" << std::endl; + cachedDistribution = isotopeCacher.getImmutable(107.5); + testDistribution = isotopeGenerator.estimateFromPeptideWeight(110); + + TEST_EQUAL(testDistribution.size(), cachedDistribution.size()) + for (Size i = 0; i != testDistribution.size(); ++i) + { + TEST_EQUAL(round(cachedDistribution.getContainer()[i].getMZ()), testDistribution.getContainer()[i].getMZ()) + TEST_REAL_SIMILAR(cachedDistribution.getContainer()[i].getIntensity(), testDistribution.getContainer()[i].getIntensity()) + } + + + // ########## Case #3 getImmutable(103) Should fetch distribution of mass 105 because it is the closest to the requested mass (always round up if halfway of step) and it is less than halfStep away ################# + + std::cout << "Case 3" << std::endl; + cachedDistribution = isotopeCacher.getImmutable(103); + testDistribution = isotopeGenerator.estimateFromPeptideWeight(105); + + TEST_EQUAL(testDistribution.size(), cachedDistribution.size()) + for (Size i = 0; i != testDistribution.size(); ++i) + { + TEST_EQUAL(round(cachedDistribution.getContainer()[i].getMZ()), testDistribution.getContainer()[i].getMZ()) + TEST_REAL_SIMILAR(cachedDistribution.getContainer()[i].getIntensity(), testDistribution.getContainer()[i].getIntensity()) + } + + // ########## Case #4 getImmutable(200.4) Should fetch distribution of mass 200.4 because there is no cached distribution close to requested mass + std::cout << "Case 4" << std::endl; + cachedDistribution = isotopeCacher.getImmutable(200.4); + testDistribution = isotopeGenerator.estimateFromPeptideWeight(200.4); // don't need to recreate this + + TEST_EQUAL(testDistribution.size(), cachedDistribution.size()) + for (Size i = 0; i != testDistribution.size(); ++i) + { + TEST_EQUAL(round(cachedDistribution.getContainer()[i].getMZ()), testDistribution.getContainer()[i].getMZ()) + TEST_REAL_SIMILAR(cachedDistribution.getContainer()[i].getIntensity(), testDistribution.getContainer()[i].getIntensity()) + } + + // ########## Case #6 getImmutable(90.5) Should fetch distribution of mass 90.5 because no value is close to this + std::cout << "Case 6" << std::endl; + cachedDistribution = isotopeCacher.getImmutable(90.5); + testDistribution = isotopeGenerator.estimateFromPeptideWeight(90.5); // don't need to recreate this + + TEST_EQUAL(testDistribution.size(), cachedDistribution.size()) + for (Size i = 0; i != testDistribution.size(); ++i) + { + TEST_EQUAL(round(cachedDistribution.getContainer()[i].getMZ()), testDistribution.getContainer()[i].getMZ()) + TEST_REAL_SIMILAR(cachedDistribution.getContainer()[i].getIntensity(), testDistribution.getContainer()[i].getIntensity()) + } +} + +END_SECTION + +START_SECTION(IsotopeDistribution OpenSwathIsotopeGeneratorCacher::getImmutable(double mass)) +{ + // Setup + int maxIsotope = 2; + double massStep = 1; + IsotopeDistribution cachedDistribution; + OpenSwathIsotopeGeneratorCacher isotopeCacher(maxIsotope, massStep); + isotopeCacher.initialize(200.5, 2001.5, massStep); // since non inclusive massEnd should generate cache for 100 and 101 + CoarseIsotopePatternGenerator isotopeGenerator(maxIsotope, massStep); + + // ############ Case #1 get(100) Should fetch distribution of mass 100 because it is in the spectrum ############### + std::cout << "testing 580.309" << std::endl; + cachedDistribution = isotopeCacher.getImmutable(580.309); + + std::cout << "testing 709.351" << std::endl; + cachedDistribution = isotopeCacher.getImmutable(709.351); + +} +END_SECTION + + START_SECTION(std::vector> OpenSwathIsotopeGeneratorCacher::get(double product_mz, int charge, const double mannmass) ) { From 1a28de62a11d98c307fae8a7eeb200d8f75e684c Mon Sep 17 00:00:00 2001 From: jcharkow Date: Wed, 28 Dec 2022 12:30:19 -0500 Subject: [PATCH 08/13] [NOP][FIX] bug fix and code cleanup clean up code, and ensure that all tests are passing --- .../OpenSwathIsotopeGeneratorCacher.cpp | 155 ++---------------- 1 file changed, 16 insertions(+), 139 deletions(-) diff --git a/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.cpp b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.cpp index e22eeecf5ac..15d18294d66 100644 --- a/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.cpp +++ b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.cpp @@ -41,8 +41,6 @@ namespace OpenMS { - - // OpenSwathIsotopeGeneratorCacher::~OpenSwathIsotopeGeneratorCacher() = default; // note these should be mass values rather than m/z values void OpenSwathIsotopeGeneratorCacher::initialize(double massStart, double massEnd, double massStep) @@ -83,7 +81,6 @@ namespace OpenMS return cachedIsotopeDistributions_; } - IsotopeDistribution OpenSwathIsotopeGeneratorCacher::addEntry(double mass) { IsotopeDistribution dist = solver_.estimateFromPeptideWeight(mass); @@ -94,22 +91,14 @@ namespace OpenMS IsotopeDistribution OpenSwathIsotopeGeneratorCacher::computeMiss(double mass) const { - OPENMS_LOG_DEBUG << "Cache miss :(" << std::endl; CoarseIsotopePatternGenerator tempSolver = solver_; //copy the isotope generator to enable const signature return tempSolver.estimateFromPeptideWeight(mass); } + // note in this implementation we are guarenteed that all elements are equally spaced IsotopeDistribution OpenSwathIsotopeGeneratorCacher::getImmutable(double mass) const { - /* - // print the cachedIsotopeDistribution list - for (const auto &[key, value]:cachedIsotopeDistributions_) - { - //std::cout << "key is: " << key << std::endl; - } - */ - //std::cout << "JOSH start get immutable" << std::endl; if (mass <= (cachedIsotopeDistributions_.begin()->first + halfMassStep_) ) // all cached greater than target (with tolerance) { //std::cout << "All cached elements strictly greater than target (with tolerance)" << std::endl; @@ -126,75 +115,29 @@ namespace OpenMS } else // mass is somewhere in the middle or all elements are less than mass { - auto upperBound = cachedIsotopeDistributions_.upper_bound(mass); - auto prevEle = upperBound; - prevEle--; - //std::cout << "ptr previous: " << prevEle->first << std::endl; - //std::cout << "upper bound (first element not less than " << mass << ": " << upperBound->first << std::endl; + // map the mass to a function that only the bins are whole numbers, therefore we will be rounding to the nearest bin + double roundErr = 1.0 / massStep_; // map + double massRounded = std::round(mass * roundErr) / roundErr; - if (upperBound == cachedIsotopeDistributions_.end()) - { - //std::cout << "upper bound is at the end, meaning all elements are less than or equal to mass" << std::endl; - if (mass - prevEle->first <= halfMassStep_) - { - //std::cout << "last element is in range" << std::endl; - return prevEle->second; - } - else - { - //std::cout << "last element is not in range" << std::endl; - return computeMiss(mass); - } - } - else if ((mass - prevEle->first) > halfMassStep_) - { - //std::cout << "mass" << mass << std::endl; - //std::cout << "Previous element is too far away (half mass step: " << (mass - prevEle->first) << ")" << std::endl; - if ((upperBound->first - mass) <= halfMassStep_) - { + auto foundEle = cachedIsotopeDistributions_.find(massRounded); - //std::cout << "upper bound in range, match!" << std::endl; - return upperBound->second; - } - else - { - //std::cout << "upper bound not in range, new entry" << std::endl; - return computeMiss(mass); - } + if (foundEle != cachedIsotopeDistributions_.end()){ + //std::cout << "ele found" << std::endl; + return foundEle->second; } - else //prevEle in range + else { - //std::cout << "previous element is in range" << std::endl; - if ((upperBound->first - mass) > halfMassStep_) - { - //std::cout << "upper bound not in range match previous element" << std::endl; - return prevEle->second; - } - else // both elements in range - { - //std::cout << "both elements in range see which is closer" << std::endl; - if ((upperBound->first - mass) <= (mass - prevEle->first)) - { - //std::cout << "upper bound is closer" << std::endl; - return upperBound->second; - } - else - { - //std::cout << "previous element is closer" << std::endl; - return prevEle->second; - } - } + OPENMS_LOG_WARN << "Cache Miss" << std::endl; + //std::cout << "ele not found" << std::endl; + return computeMiss(mass); } } + throw Exception::Postcondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Function should never end up here, all possible conditions exhausted"); + return cachedIsotopeDistributions_.begin()->second; } IsotopeDistribution OpenSwathIsotopeGeneratorCacher::get(double mass) { - for (const auto &[key, value]:cachedIsotopeDistributions_) - { - //std::cout << "key is: " << key << std::endl; - } - if (mass <= (cachedIsotopeDistributions_.begin()->first + halfMassStep_) ) // all cached greater than target (with tolerance) { //std::cout << "All cached elements strictly greater than target (with tolerance)" << std::endl; @@ -270,74 +213,10 @@ namespace OpenMS } } } + throw Exception::Postcondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Function should never end up here, all possible conditions exhausted"); + return cachedIsotopeDistributions_.begin()->second; } - - - -/* - if ((ele == cachedIsotopeDistributions_.end()) & (previousEle == cachedIsotopeDistributions_.end()) | (ele == cachedIsotopeDistributions_.begin()) // all cached elements are greater than target mass - { - //std::cout << "All elements greater than target mass, creating entry" << std::endl; - return addEnrtry(mass); - } - else if ((ele == cachedIsotopeDistributions_.end()) & (previousEle != cachedIsotopeDistributions_.end())) // last element might match - { - //std::cout << "Last element might match" << std::endl; - if (std::abs(previousEle->first - mass) < halfStep_) - { - //std::cout << "Last element matches" << std::endl; - return previousEle->second; - } - else - { - return addEntry(mass); - } - } - //else if ((ele == cachedIsotopeDistributions_.begin()) & (previousEle != cachedIsotopeDistributions_.end())) // last element might match - // - // at the end of the array, possible that we still have a match though if this is just slightly over the last element - //std::cout << "At the end of the array..." << std::endl; - if (std::abs(previousEle->first - mass) < std::abs(ele->first - mass)) // check if the previous entry is within range - { - double rslt = std::abs(previousEle->first - mass) < std::abs(ele->first - mass); - //std::cout << "rslt is: " << rslt << std::endl; - //std::cout << "Taking previous element" << std::endl; - return ele->second; - } - else // previous entry not within range - { - double rslt = std::abs(previousEle->first - mass) < std::abs(ele->first - mass); - //std::cout << "rslt is: " << rslt << std::endl; - //std::cout << "No match creating entry" << std::endl; - return addEntry(mass); - } - - // determine if the current element or the element before it is closer - if (std::abs(previousEle->first - mass) < std::abs(ele->first - mass)) // check if the previous entry is closer than the first entry - { - double rslt = std::abs(previousEle->first - mass) < std::abs(ele->first - mass); - //std::cout << "rslt is: " << rslt << std::endl; - //std::cout << "Taking previous element" << std::endl; - ele = previousEle; - } - - if (std::abs(ele->first - mass) > halfMassStep_) // element found is more than halfMassStep_ away from target mass so generate a new entry - { - double rslt = std::abs(ele->first - mass) > halfMassStep_; - - //std::cout << "Closest element is not in range, creating entry" << " closest element: " << ele->first << " delta: " << rslt << std::endl; - return addEntry(mass); - } - else // there is a current cached distribution that can be used! - { - //std::cout << "Using the cache" << std::endl; - - return ele->second; - } - } - */ - std::vector> OpenSwathIsotopeGeneratorCacher::getImmutable(double product_mz, int charge, const double mannmass) const { //std::cout << "Calling sub get immutable" << std::endl; @@ -357,8 +236,6 @@ namespace OpenMS return isotopes_spec; } - - std::vector> OpenSwathIsotopeGeneratorCacher::get(double product_mz, int charge, const double mannmass) { IsotopeDistribution distribution = get(product_mz * charge); From 46d3889048467794adcb5012bac813896557d752 Mon Sep 17 00:00:00 2001 From: jcharkow Date: Wed, 28 Dec 2022 16:47:22 -0500 Subject: [PATCH 09/13] [TEST] fix unit tests Fix unit tests so that they pass including DIAPrescoring_test, DIAScoring_test, MRMFeatureFinderScoring_test and MRMFeatureScoring_test --- .../openms/source/DIAPrescoring_test.cpp | 12 ++-- .../openms/source/DIAScoring_test.cpp | 50 +++++++++------ .../source/MRMFeatureFinderScoring_test.cpp | 64 ++++++++++--------- .../openms/source/MRMFeatureScoring_test.cpp | 9 ++- 4 files changed, 78 insertions(+), 57 deletions(-) diff --git a/src/tests/class_tests/openms/source/DIAPrescoring_test.cpp b/src/tests/class_tests/openms/source/DIAPrescoring_test.cpp index 002a7b49f41..f3a041ff910 100644 --- a/src/tests/class_tests/openms/source/DIAPrescoring_test.cpp +++ b/src/tests/class_tests/openms/source/DIAPrescoring_test.cpp @@ -51,6 +51,10 @@ START_TEST(DiaPrescore2, "$Id$") DiaPrescore* ptr = nullptr; DiaPrescore* nullPointer = nullptr; + +OpenSwathIsotopeGeneratorCacher isotopeCacher(4, 1); // here have 4 isotopes +isotopeCacher.initialize(1, 1, 1); // make empty so cache is not used + START_SECTION(DiaPrescore()) { ptr = new DiaPrescore(); @@ -123,7 +127,7 @@ START_SECTION ( test score function with perfect first transition and ion mobili std::vector sptrArr; sptrArr.push_back(sptr); - diaprescore.score(sptrArr, transitions , dotprod, manhattan, -1, -1); + diaprescore.score(sptrArr, transitions , dotprod, manhattan, -1, -1, isotopeCacher); //std::cout << "dotprod : " << dotprod << std::endl; //std::cout << "manhattan : " << manhattan << std::endl; // >> exp = [240, 74, 39, 15, 0] @@ -198,7 +202,7 @@ START_SECTION ( test score function missing first transition ) std::vector sptrArr; sptrArr.push_back(sptr); - diaprescore.score(sptrArr, transitions , dotprod, manhattan, -1, -1); + diaprescore.score(sptrArr, transitions , dotprod, manhattan, -1, -1, isotopeCacher); //std::cout << "dotprod : " << dotprod << std::endl; //std::cout << "manhattan : " << manhattan << std::endl; // >> exp = [240, 74, 39, 15, 0] @@ -270,7 +274,7 @@ START_SECTION ( test score function with shifted first transition ) std::vector sptrArr; sptrArr.push_back(sptr); - diaprescore.score(sptrArr, transitions , dotprod, manhattan, -1, -1); + diaprescore.score(sptrArr, transitions , dotprod, manhattan, -1, -1, isotopeCacher); //std::cout << "dotprod : " << dotprod << std::endl; //std::cout << "manhattan : " << manhattan << std::endl; // >> exp = [240, 74, 39, 15, 0] @@ -367,7 +371,7 @@ START_SECTION ( test score function missing first transition due to different io std::vector sptrArr; sptrArr.push_back(sptr); - diaprescore.score(sptrArr, transitions , dotprod, manhattan, PRECURSOR_ION_MOBILITY - (ION_MOBILITY_WIDTH / 2.0), PRECURSOR_ION_MOBILITY + (ION_MOBILITY_WIDTH / 2.0)); + diaprescore.score(sptrArr, transitions , dotprod, manhattan, PRECURSOR_ION_MOBILITY - (ION_MOBILITY_WIDTH / 2.0), PRECURSOR_ION_MOBILITY + (ION_MOBILITY_WIDTH / 2.0), isotopeCacher); //std::cout << "dotprod : " << dotprod << std::endl; //std::cout << "manhattan : " << manhattan << std::endl; // >> exp = [240, 74, 39, 15, 0] diff --git a/src/tests/class_tests/openms/source/DIAScoring_test.cpp b/src/tests/class_tests/openms/source/DIAScoring_test.cpp index 3a842f70fa1..92ae670434e 100644 --- a/src/tests/class_tests/openms/source/DIAScoring_test.cpp +++ b/src/tests/class_tests/openms/source/DIAScoring_test.cpp @@ -87,7 +87,7 @@ OpenSwath::SpectrumPtr prepareSpectrum() 599.97, 599.98, 599.99, 600.0, 600.01, 600.02, 600.03, 600.97, 600.98, 600.99, 601.0, 601.01, 601.02, 601.03, - // note that this peak at 602 is special since it is integrated from + // note that this peak at 602 is special since it is integrated from // [(600+2*1.0033548) - 0.025, (600+2*1.0033548) + 0.025] = [601.9817096 to 602.0317096] 601.97, 601.98, 601.99, 602.0, 602.01, 602.02, 602.03, 602.99, 603.0, 603.01 @@ -151,6 +151,9 @@ p_dia_large.setValue("dia_extraction_window", 0.5); DIAScoring* ptr = nullptr; DIAScoring* nullPointer = nullptr; +OpenSwathIsotopeGeneratorCacher isotopeCacher((int) p_dia.getValue("dia_nr_isotopes") + 1, 1); +isotopeCacher.initialize(1, 1, 1); // make empty so cache is not used + START_SECTION(DIAScoring()) { ptr = new DIAScoring(); @@ -235,7 +238,7 @@ mock_tr2.product_mz = 600; mock_tr2.fragment_charge = 1; mock_tr2.transition_name = "group2"; -START_SECTION([EXTRA] forward void dia_isotope_scores(const std::vector & transitions, std::vector spectrum, OpenSwath::IMRMFeature * mrmfeature, int putative_fragment_charge, double & isotope_corr, double & isotope_overlap, double drift_start, double drift_end)) +START_SECTION([EXTRA] forward void dia_isotope_scores(const std::vector & transitions, std::vector spectrum, OpenSwath::IMRMFeature * mrmfeature, int putative_fragment_charge, double & isotope_corr, double & isotope_overlap, double drift_start, double drift_end, OpenSwathIsotopeGeneratorCacher& isotopeCacher)) { std::vector sptr = prepareSpectrumArr(); MockMRMFeature * imrmfeature_test = new MockMRMFeature(); @@ -243,13 +246,14 @@ START_SECTION([EXTRA] forward void dia_isotope_scores(const std::vectorm_intensity = 0.7f; std::vector transitions; - // Try with transition at 600 m/z + // Try with transition at 600 m/z transitions.push_back(mock_tr2); DIAScoring diascoring; diascoring.setParameters(p_dia); double isotope_corr = 0, isotope_overlap = 0; - diascoring.dia_isotope_scores(transitions, sptr, imrmfeature_test, isotope_corr, isotope_overlap, -1, -1); + + diascoring.dia_isotope_scores(transitions, sptr, imrmfeature_test, isotope_corr, isotope_overlap, -1, -1, isotopeCacher); // >> exp = [240, 74, 37, 15, 0] // >> theo = [1, 0.325757771553019, 0.0678711748364005, 0.0105918703087134, 0.00134955223787482] @@ -263,7 +267,7 @@ START_SECTION([EXTRA] forward void dia_isotope_scores(const std::vector & transitions, std::vector spectrum, OpenSwath::IMRMFeature * mrmfeature, int putative_fragment_charge, double & isotope_corr, double & isotope_overlap, double drift_start, double drift_end)) +START_SECTION([EXTRA] forward negative charge: void dia_isotope_scores(const std::vector & transitions, std::vector spectrum, OpenSwath::IMRMFeature * mrmfeature, int putative_fragment_charge, double & isotope_corr, double & isotope_overlap, double drift_start, double drift_end, OpenSwathIsotopeGeneratorCacher& isotopeCacher)) { std::vector sptr = prepareSpectrumArr(); MockMRMFeature * imrmfeature_test = new MockMRMFeature(); @@ -278,7 +282,7 @@ START_SECTION([EXTRA] forward negative charge: void dia_isotope_scores(const std DIAScoring diascoring; diascoring.setParameters(p_dia); double isotope_corr = 0, isotope_overlap = 0; - diascoring.dia_isotope_scores(transitions, sptr, imrmfeature_test, isotope_corr, isotope_overlap, -1, -1); + diascoring.dia_isotope_scores(transitions, sptr, imrmfeature_test, isotope_corr, isotope_overlap, -1, -1, isotopeCacher); // >> exp = [240, 74, 37, 15, 0] // >> theo = [1, 0.325757771553019, 0.0678711748364005, 0.0105918703087134, 0.00134955223787482] @@ -292,7 +296,7 @@ START_SECTION([EXTRA] forward negative charge: void dia_isotope_scores(const std } END_SECTION -START_SECTION([EXTRA] forward negative charge: void dia_isotope_scores(const std::vector & transitions, std::vector spectrum, OpenSwath::IMRMFeature * mrmfeature, int putative_fragment_charge, double & isotope_corr, double & isotope_overlap, double drift_start, double drift_end)) +START_SECTION([EXTRA] forward negative charge: void dia_isotope_scores(const std::vector & transitions, std::vector spectrum, OpenSwath::IMRMFeature * mrmfeature, int putative_fragment_charge, double & isotope_corr, double & isotope_overlap, double drift_start, double drift_end, OpenSwathIsotopeGeneratorCacher& isotopeCacher)) { std::vector sptr = prepareSpectrumArr(); MockMRMFeature * imrmfeature_test = new MockMRMFeature(); @@ -307,7 +311,7 @@ START_SECTION([EXTRA] forward negative charge: void dia_isotope_scores(const std DIAScoring diascoring; diascoring.setParameters(p_dia); double isotope_corr = 0, isotope_overlap = 0; - diascoring.dia_isotope_scores(transitions, sptr, imrmfeature_test, isotope_corr, isotope_overlap, -1, -1); + diascoring.dia_isotope_scores(transitions, sptr, imrmfeature_test, isotope_corr, isotope_overlap, -1, -1, isotopeCacher); // >> exp = [240, 74, 37, 15, 0] // >> theo = [1, 0.325757771553019, 0.0678711748364005, 0.0105918703087134, 0.00134955223787482] @@ -321,7 +325,7 @@ START_SECTION([EXTRA] forward negative charge: void dia_isotope_scores(const std } END_SECTION -START_SECTION([EXTRA] backward void dia_isotope_scores(const std::vector & transitions, std::vector spectrum, OpenSwath::IMRMFeature * mrmfeature, int putative_fragment_charge, double & isotope_corr, double & isotope_overlap, double drift_start, double drift_end)) +START_SECTION([EXTRA] backward void dia_isotope_scores(const std::vector & transitions, std::vector spectrum, OpenSwath::IMRMFeature * mrmfeature, int putative_fragment_charge, double & isotope_corr, double & isotope_overlap, double drift_start, double drift_end, OpenSwathIsotopeGeneratorCacher& isotopeCacher)) { std::vector sptr = prepareSpectrumArr(); MockMRMFeature * imrmfeature_test = new MockMRMFeature(); @@ -329,14 +333,14 @@ START_SECTION([EXTRA] backward void dia_isotope_scores(const std::vectorm_intensity = 0.3f; std::vector transitions; - // Try with transition at 500 m/z + // Try with transition at 500 m/z // This peak is not monoisotopic (e.g. at 499 there is another, more intense, peak) transitions.push_back(mock_tr1); DIAScoring diascoring; diascoring.setParameters(p_dia); double isotope_corr = 0, isotope_overlap = 0; - diascoring.dia_isotope_scores(transitions, sptr, imrmfeature_test, isotope_corr, isotope_overlap, -1, -1); + diascoring.dia_isotope_scores(transitions, sptr, imrmfeature_test, isotope_corr, isotope_overlap, -1, -1, isotopeCacher); // >> exp = [74, 39, 15, 0, 0] // >> theo = [1, 0.266799519434277, 0.0486475002325161, 0.0066525896497495, 0.000747236543377621] @@ -349,7 +353,7 @@ START_SECTION([EXTRA] backward void dia_isotope_scores(const std::vector &transitions, std::vector spectrum, OpenSwath::IMRMFeature *mrmfeature, double &isotope_corr, double &isotope_overlap, double drift_start, double drift_end) ) +START_SECTION ( void dia_isotope_scores(const std::vector< TransitionType > &transitions, std::vector spectrum, OpenSwath::IMRMFeature *mrmfeature, double &isotope_corr, double &isotope_overlap, double drift_start, double drift_end, OpenSwathIsotopeGeneratorCacher& isotopeCacher) ) { std::vector sptr = prepareSpectrumArr(); @@ -364,7 +368,7 @@ START_SECTION ( void dia_isotope_scores(const std::vector< TransitionType > &tra DIAScoring diascoring; diascoring.setParameters(p_dia); double isotope_corr = 0, isotope_overlap = 0; - diascoring.dia_isotope_scores(transitions, sptr, imrmfeature_test, isotope_corr, isotope_overlap, -1, -1); + diascoring.dia_isotope_scores(transitions, sptr, imrmfeature_test, isotope_corr, isotope_overlap, -1, -1, isotopeCacher); // see above for the two individual numbers (forward and backward) TEST_REAL_SIMILAR(isotope_corr, 0.995335798317618 * 0.7 + 0.959692139694113 * 0.3) @@ -373,8 +377,8 @@ START_SECTION ( void dia_isotope_scores(const std::vector< TransitionType > &tra } END_SECTION -START_SECTION(void dia_ms1_isotope_scores(double precursor_mz, std::vector spectrum, size_t charge_state, - double& isotope_corr, double& isotope_overlap, double drift_start, double drift_end)) +START_SECTION(void dia_ms1_isotope_scores(double precursor_mz, std::vector spectrum, size_t charge_state, + double& isotope_corr, double& isotope_overlap, double drift_start, double drift_end, OpenSwathIsotopeGeneratorCacher& isotopeCacher)) { std::vector sptr = prepareSpectrumArr(); @@ -388,7 +392,7 @@ START_SECTION(void dia_ms1_isotope_scores(double precursor_mz, std::vector>> theo = [0.57277789564886, 0.305415548811564, 0.0952064968352544, 0.0218253361702587, 0.00404081869309618] // >>> exp = [74, 0, 39, 0, 15] @@ -419,7 +423,7 @@ START_SECTION(void dia_ms1_isotope_scores(double precursor_mz, std::vector> exp = [240, 74, 39, 15, 0] // >> theo = [0.755900817146293, 0.201673974754608, 0.0367726851778834, 0.00502869795238462, 0.000564836713740715] @@ -460,7 +464,7 @@ START_SECTION (void dia_massdiff_score(const std::vector< TransitionType > &tran END_SECTION START_SECTION ( bool DIAScoring::dia_ms1_massdiff_score(double precursor_mz, transitions, std::vector spectrum, double& ppm_score, double drift_start, double drift_end) ) -{ +{ std::vector sptr = prepareShiftedSpectrum(); DIAScoring diascoring; diascoring.setParameters(p_dia_large); @@ -526,7 +530,11 @@ START_SECTION ( void dia_by_ion_score(std::vector spectrum, AASequ } END_SECTION -START_SECTION( void score_with_isotopes(std::vector spectrum, const std::vector< TransitionType > &transitions, double &dotprod, double &manhattan)) + +OpenSwathIsotopeGeneratorCacher isotopeCacher2(p_dia.getValue("dia_nr_isotopes"), 1); +isotopeCacher2.initialize(1, 1, 1); // make empty so cache is not used + +START_SECTION( void score_with_isotopes(std::vector spectrum, const std::vector< TransitionType > &transitions, double &dotprod, double &manhattan, OpenSwathIsotopeGeneratorCacher& isotopeCacher)) { OpenSwath::LightTransition mock_tr1; mock_tr1.product_mz = 500.; @@ -550,7 +558,7 @@ START_SECTION( void score_with_isotopes(std::vector spectrum, cons diascoring.setParameters(p_dia); double dotprod, manhattan; - diascoring.score_with_isotopes(sptr,transitions,dotprod,manhattan, -1, -1); + diascoring.score_with_isotopes(sptr,transitions,dotprod,manhattan, -1, -1, isotopeCacher2); TEST_REAL_SIMILAR (dotprod, 0.43738312458795); TEST_REAL_SIMILAR (manhattan, 0.55743322213368); diff --git a/src/tests/class_tests/openms/source/MRMFeatureFinderScoring_test.cpp b/src/tests/class_tests/openms/source/MRMFeatureFinderScoring_test.cpp index 64d6abff6f7..12931a69c73 100644 --- a/src/tests/class_tests/openms/source/MRMFeatureFinderScoring_test.cpp +++ b/src/tests/class_tests/openms/source/MRMFeatureFinderScoring_test.cpp @@ -1,32 +1,32 @@ // -------------------------------------------------------------------------- -// OpenMS -- Open-Source Mass Spectrometry +// OpenMS -- Open-Source Mass Spectrometry // -------------------------------------------------------------------------- // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen, // ETH Zurich, and Freie Universitaet Berlin 2002-2022. -// +// // This software is released under a three-clause BSD license: // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. -// * Neither the name of any author or any participating institution -// may be used to endorse or promote products derived from this software +// * Neither the name of any author or any participating institution +// may be used to endorse or promote products derived from this software // without specific prior written permission. -// For a full list of authors, refer to the file AUTHORS. +// For a full list of authors, refer to the file AUTHORS. // -------------------------------------------------------------------------- // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING -// INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; -// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, -// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR -// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING +// INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// // -------------------------------------------------------------------------- // $Maintainer: Hannes Roest $ // $Authors: Hannes Roest $ @@ -58,6 +58,9 @@ START_TEST(MRMFeatureFinderScoring, "$Id$") MRMFeatureFinderScoring* ptr = nullptr; MRMFeatureFinderScoring* nullPointer = nullptr; +OpenSwathIsotopeGeneratorCacher isotopeCacher(1, 1); +isotopeCacher.initialize(1, 1, 1); // not important for test, just leave empty + START_SECTION(MRMFeatureFinderScoring()) { ptr = new MRMFeatureFinderScoring(); @@ -78,7 +81,7 @@ START_SECTION(void pickExperiment(OpenSwath::SpectrumAccessPtr input, FeatureMap // picker_param.setValue("TransitionGroupPicker:PeakPickerMRM:method", "legacy"); // old parameters // picker_param.setValue("TransitionGroupPicker:PeakPickerMRM:peak_width", 40.0); // old parameters // ff.setParameters(picker_param); - + MRMFeature feature; FeatureMap featureFile; TransformationDescription trafo; @@ -95,19 +98,18 @@ START_SECTION(void pickExperiment(OpenSwath::SpectrumAccessPtr input, FeatureMap TraMLFile().load(OPENMS_GET_TEST_DATA_PATH("OpenSwath_generic_input.TraML"), transition_exp_); OpenSwathDataAccessHelper::convertTargetedExp(transition_exp_, transitions); } - // Pick features in the experiment #ifdef USE_SP_INTERFACE OpenSwath::SpectrumAccessPtr swath_ptr = SimpleOpenMSSpectraFactory::getSpectrumAccessOpenMSPtr(swath_map); OpenSwath::SpectrumAccessPtr chromatogram_ptr = SimpleOpenMSSpectraFactory::getSpectrumAccessOpenMSPtr(exp); std::vector< OpenSwath::SwathMap > swath_maps(1); swath_maps[0].sptr = swath_ptr; - ff.pickExperiment(chromatogram_ptr, featureFile, transitions, trafo, swath_maps, transition_group_map); + ff.pickExperiment(chromatogram_ptr, featureFile, transitions, trafo, swath_maps, transition_group_map, isotopeCacher); #else - ff.pickExperiment(exp, featureFile, transitions, trafo, *swath_map, transition_group_map); + ff.pickExperiment(exp, featureFile, transitions, trafo, *swath_map, transition_group_map, isotopeCacher); #endif - // Test the number of features found + // Test the number of features found TEST_EQUAL(transition_group_map.size(), 2) /////////////////////////////////////////////////////////////////////////// @@ -115,7 +117,7 @@ START_SECTION(void pickExperiment(OpenSwath::SpectrumAccessPtr input, FeatureMap transition_group = transition_group_map["tr_gr1"]; TEST_EQUAL(transition_group.size(), 2) TEST_EQUAL(transition_group.getFeatures().size(), 1) - + // Look closely at the feature we found in the first group feature = transition_group.getFeatures()[0]; TOLERANCE_ABSOLUTE(0.1); @@ -171,7 +173,7 @@ START_SECTION(void pickExperiment(OpenSwath::SpectrumAccessPtr input, FeatureMap // test legacy parameters { - + Param picker_param = ff.getDefaults(); picker_param.setValue("TransitionGroupPicker:PeakPickerMRM:method", "legacy"); // old parameters picker_param.setValue("TransitionGroupPicker:PeakPickerMRM:peak_width", 40.0); // old parameters @@ -179,16 +181,16 @@ START_SECTION(void pickExperiment(OpenSwath::SpectrumAccessPtr input, FeatureMap transition_group_map.clear(); featureFile.clear(); - + // Pick features in the experiment #ifdef USE_SP_INTERFACE OpenSwath::SpectrumAccessPtr swath_ptr = SimpleOpenMSSpectraFactory::getSpectrumAccessOpenMSPtr(swath_map); OpenSwath::SpectrumAccessPtr chromatogram_ptr = SimpleOpenMSSpectraFactory::getSpectrumAccessOpenMSPtr(exp); std::vector< OpenSwath::SwathMap > swath_maps(1); swath_maps[0].sptr = swath_ptr; - ff.pickExperiment(chromatogram_ptr, featureFile, transitions, trafo, swath_maps, transition_group_map); + ff.pickExperiment(chromatogram_ptr, featureFile, transitions, trafo, swath_maps, transition_group_map, isotopeCacher); #else - ff.pickExperiment(exp, featureFile, transitions, trafo, *swath_map, transition_group_map); + ff.pickExperiment(exp, featureFile, transitions, trafo, *swath_map, transition_group_map, isotopeCacher); #endif /////////////////////////////////////////////////////////////////////////// @@ -230,7 +232,7 @@ START_SECTION(void pickExperiment(OpenSwath::SpectrumAccessPtr input, FeatureMap END_SECTION START_SECTION(void pickExperiment(OpenSwath::SpectrumAccessPtr input, FeatureMap< Feature > &output, OpenSwath::LightTargetedExperiment &transition_exp, TransformationDescription trafo, OpenSwath::SpectrumAccessPtr swath_map, TransitionGroupMapType &transition_group_map)) -{ +{ MRMFeatureFinderScoring ff; Param ff_param = MRMFeatureFinderScoring().getDefaults(); Param scores_to_use; @@ -251,7 +253,7 @@ START_SECTION(void pickExperiment(OpenSwath::SpectrumAccessPtr input, FeatureMap boost::shared_ptr exp (new PeakMap); OpenSwath::LightTargetedExperiment transitions; MzMLFile().load(OPENMS_GET_TEST_DATA_PATH("OpenSwath_generic_input.mzML"), *exp); - { + { TargetedExperiment transition_exp_; TraMLFile().load(OPENMS_GET_TEST_DATA_PATH("OpenSwath_identification_input.TraML"), transition_exp_); OpenSwathDataAccessHelper::convertTargetedExp(transition_exp_, transitions); @@ -263,12 +265,12 @@ START_SECTION(void pickExperiment(OpenSwath::SpectrumAccessPtr input, FeatureMap OpenSwath::SpectrumAccessPtr chromatogram_ptr = SimpleOpenMSSpectraFactory::getSpectrumAccessOpenMSPtr(exp); std::vector< OpenSwath::SwathMap > swath_maps(1); swath_maps[0].sptr = swath_ptr; - ff.pickExperiment(chromatogram_ptr, featureFile, transitions, trafo, swath_maps, transition_group_map); + ff.pickExperiment(chromatogram_ptr, featureFile, transitions, trafo, swath_maps, transition_group_map, isotopeCacher); #else - ff.pickExperiment(exp, featureFile, transitions, trafo, *swath_map, transition_group_map); + ff.pickExperiment(exp, featureFile, transitions, trafo, *swath_map, transition_group_map, isotopeCacher); #endif - // Test the number of features found + // Test the number of features found TEST_EQUAL(transition_group_map.size(), 2) /////////////////////////////////////////////////////////////////////////// @@ -348,7 +350,7 @@ START_SECTION(void mapExperimentToTransitionList(OpenSwath::SpectrumAccessPtr in ff.mapExperimentToTransitionList(exp, transitions, transition_group_map, trafo, -1); #endif - // Test the number of features found + // Test the number of features found TEST_EQUAL(transition_group_map.size(), 2) /////////////////////////////////////////////////////////////////////////// @@ -374,14 +376,14 @@ START_SECTION(void mapExperimentToTransitionList(OpenSwath::SpectrumAccessPtr in TEST_EQUAL(transition_group.hasChromatogram("tr3"), true) TEST_EQUAL(transition_group.hasChromatogram("tr4"), true) TEST_EQUAL(transition_group.hasChromatogram("tr5"), true) - + TEST_EQUAL(transition_group.getChromatogram("tr5").getNativeID(), "tr5") TEST_EQUAL(transition_group.getTransition("tr5").getNativeID(), "tr5") } END_SECTION -START_SECTION( void scorePeakgroups(MRMTransitionGroupType& transition_group, TransformationDescription & trafo, OpenSwath::SpectrumAccessPtr swath_map, FeatureMap& output) ) +START_SECTION( void scorePeakgroups(MRMTransitionGroupType& transition_group, TransformationDescription & trafo, OpenSwath::SpectrumAccessPtr swath_map, FeatureMap& output) ) { NOT_TESTABLE // tested above } diff --git a/src/tests/class_tests/openms/source/MRMFeatureScoring_test.cpp b/src/tests/class_tests/openms/source/MRMFeatureScoring_test.cpp index b299ed3de14..ddef1156f14 100644 --- a/src/tests/class_tests/openms/source/MRMFeatureScoring_test.cpp +++ b/src/tests/class_tests/openms/source/MRMFeatureScoring_test.cpp @@ -69,6 +69,9 @@ START_TEST(MRMScoring, "$Id$") OpenSwath::MRMScoring* ptr = nullptr; OpenSwath::MRMScoring* nullPointer = nullptr; +OpenSwathIsotopeGeneratorCacher isotopeCacher(4, 1); // here have 4 isotopes +isotopeCacher.initialize(1, 1, 1); // make empty so cache is not used + START_SECTION(MRMScoring()) { ptr = new OpenSwath::MRMScoring(); @@ -177,6 +180,10 @@ START_SECTION((virtual void test_dia_scores())) p_dia.setValue("dia_nr_charges", 4); diascoring.setParameters(p_dia); + + OpenSwathIsotopeGeneratorCacher isotopeCacher2((int) p_dia.getValue("dia_nr_isotopes") + 1, 1); + isotopeCacher2.initialize(1, 1, 1); // make empty so cache is not used + // calculate the normalized library intensity (expected value of the intensities) // Numpy // arr1 = [ 0,1,3,5,2,0 ]; @@ -201,7 +208,7 @@ START_SECTION((virtual void test_dia_scores())) std::vector sptrArr; sptrArr.push_back(sptr); - diascoring.dia_isotope_scores(transitions, sptrArr, imrmfeature, isotope_corr, isotope_overlap, -1, -1); + diascoring.dia_isotope_scores(transitions, sptrArr, imrmfeature, isotope_corr, isotope_overlap, -1, -1, isotopeCacher2); delete imrmfeature; From 463acdca5e6fb8f6dcbbd913f819a84df0c63aca Mon Sep 17 00:00:00 2001 From: jcharkow Date: Wed, 28 Dec 2022 16:48:26 -0500 Subject: [PATCH 10/13] Update function signatures update function signatures of other tools to allow building --- src/topp/OpenSwathAnalyzer.cpp | 11 +++++-- src/topp/OpenSwathRTNormalizer.cpp | 45 +++++++++++++++------------- src/utils/OpenSwathDIAPreScoring.cpp | 5 +++- 3 files changed, 37 insertions(+), 24 deletions(-) diff --git a/src/topp/OpenSwathAnalyzer.cpp b/src/topp/OpenSwathAnalyzer.cpp index b5b7e70f8eb..eb5ed6ede98 100644 --- a/src/topp/OpenSwathAnalyzer.cpp +++ b/src/topp/OpenSwathAnalyzer.cpp @@ -227,8 +227,11 @@ class TOPPOpenSwathAnalyzer : public TOPPBase OpenMS::MRMFeatureFinderScoring::TransitionGroupMapType transition_group_map; OpenSwath::SpectrumAccessPtr chromatogram_ptr = SimpleOpenMSSpectraFactory::getSpectrumAccessOpenMSPtr(exp); std::vector< OpenSwath::SwathMap > empty_maps; + + OpenSwathIsotopeGeneratorCacher isotopeCacher(feature_finder_param.getValue("DIAScoring:dia_nr_isotopes"), 1); + isotopeCacher.initialize(200.5, 2001.5, 1); featureFinder.pickExperiment(chromatogram_ptr, out_featureFile, - transition_exp, trafo, empty_maps, transition_group_map); + transition_exp, trafo, empty_maps, transition_group_map, isotopeCacher); out_featureFile.ensureUniqueId(); addDataProcessing_(out_featureFile, getProcessingInfo_(DataProcessing::QUANTITATION)); FeatureXMLFile().store(out, out_featureFile); @@ -281,8 +284,12 @@ class TOPPOpenSwathAnalyzer : public TOPPBase OpenSwath::SpectrumAccessPtr chromatogram_ptr = SimpleOpenMSSpectraFactory::getSpectrumAccessOpenMSPtr(exp); std::vector< OpenSwath::SwathMap > swath_maps(1); swath_maps[0].sptr = swath_ptr; + + + OpenSwathIsotopeGeneratorCacher isotopeCacher(feature_finder_param.getValue("DIAScoring:dia_nr_isotopes"), 1); + isotopeCacher.initialize(200.5, 2001.5, 1); featureFinder.pickExperiment(chromatogram_ptr, featureFile, - transition_exp_used, trafo, swath_maps, transition_group_map); + transition_exp_used, trafo, swath_maps, transition_group_map, isotopeCacher); // write all features and the protein identifications from tmp_featureFile into featureFile #ifdef _OPENMP diff --git a/src/topp/OpenSwathRTNormalizer.cpp b/src/topp/OpenSwathRTNormalizer.cpp index 095a3cd36a6..c701adc57ad 100644 --- a/src/topp/OpenSwathRTNormalizer.cpp +++ b/src/topp/OpenSwathRTNormalizer.cpp @@ -1,32 +1,32 @@ // -------------------------------------------------------------------------- -// OpenMS -- Open-Source Mass Spectrometry +// OpenMS -- Open-Source Mass Spectrometry // -------------------------------------------------------------------------- // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen, // ETH Zurich, and Freie Universitaet Berlin 2002-2022. -// +// // This software is released under a three-clause BSD license: // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. -// * Neither the name of any author or any participating institution -// may be used to endorse or promote products derived from this software +// * Neither the name of any author or any participating institution +// may be used to endorse or promote products derived from this software // without specific prior written permission. -// For a full list of authors, refer to the file AUTHORS. +// For a full list of authors, refer to the file AUTHORS. // -------------------------------------------------------------------------- // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING -// INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; -// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, -// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR -// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING +// INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// // -------------------------------------------------------------------------- // $Maintainer: Hannes Roest, George Rosenberger $ // $Authors: Hannes Roest, George Rosenberger $ @@ -110,7 +110,7 @@ class TOPPOpenSwathRTNormalizer : public TOPPBase registerInputFile_("tr", "", "", "transition file with the RT peptides ('TraML' or 'csv')"); setValidFormats_("tr", ListUtils::create("csv,traML")); - + registerOutputFile_("out", "", "", "output file"); setValidFormats_("out", ListUtils::create("trafoXML")); @@ -199,7 +199,7 @@ class TOPPOpenSwathRTNormalizer : public TOPPBase std::map PeptideRTMap; for (Size i = 0; i < targeted_exp.getCompounds().size(); i++) { - PeptideRTMap[targeted_exp.getCompounds()[i].id] = targeted_exp.getCompounds()[i].rt; + PeptideRTMap[targeted_exp.getCompounds()[i].id] = targeted_exp.getCompounds()[i].rt; } MzMLFile f; @@ -232,7 +232,7 @@ class TOPPOpenSwathRTNormalizer : public TOPPBase f.load(file_list[i], *xic_map.get()); // Initialize the featureFile and set its parameters (disable for example - // the RT score since here do not know the RT transformation) + // the RT score since here do not know the RT transformation) MRMFeatureFinderScoring featureFinder; Param scoring_params = getParam_().copy("algorithm:", true); scoring_params.setValue("Scores:use_rt_score", "false"); @@ -244,12 +244,15 @@ class TOPPOpenSwathRTNormalizer : public TOPPBase } featureFinder.setParameters(scoring_params); featureFinder.setStrictFlag(false); - + std::vector< OpenSwath::SwathMap > swath_maps(1); swath_maps[0].sptr = SimpleOpenMSSpectraFactory::getSpectrumAccessOpenMSPtr(swath_map); OpenSwath::SpectrumAccessPtr chromatogram_ptr = SimpleOpenMSSpectraFactory::getSpectrumAccessOpenMSPtr(xic_map); OpenMS::MRMFeatureFinderScoring::TransitionGroupMapType transition_group_map; - featureFinder.pickExperiment(chromatogram_ptr, featureFile, targeted_exp, trafo, swath_maps, transition_group_map); + + OpenSwathIsotopeGeneratorCacher isotopeCacher(scoring_params.getValue("DIAScoring:dia_nr_isotopes"), 1); + isotopeCacher.initialize(200.5, 2001.5, 1); + featureFinder.pickExperiment(chromatogram_ptr, featureFile, targeted_exp, trafo, swath_maps, transition_group_map, isotopeCacher); // add all the chromatograms to the output for (Size k = 0; k < xic_map->getChromatograms().size(); k++) @@ -259,7 +262,7 @@ class TOPPOpenSwathRTNormalizer : public TOPPBase // find most likely correct feature for each group and add it to the // "pairs" vector by computing pairs of iRT and real RT - std::map res = OpenSwathHelper::simpleFindBestFeature(transition_group_map, + std::map res = OpenSwathHelper::simpleFindBestFeature(transition_group_map, estimateBestPeptides, pepEstimationParams.getValue("OverallQualityCutoff")); for (std::map::iterator it = res.begin(); it != res.end(); ++it) { @@ -286,11 +289,11 @@ class TOPPOpenSwathRTNormalizer : public TOPPBase RTNormParams.getValue("RANSACMaxIterations"), max_rt_threshold, RTNormParams.getValue("RANSACSamplingSize")); } - else if (outlier_method == "none") + else if (outlier_method == "none") { pairs_corrected = pairs; } - else + else { throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("Illegal argument '") + outlier_method + "' used for outlierMethod (valid: 'iter_residual', 'iter_jackknife', 'ransac', 'none')."); diff --git a/src/utils/OpenSwathDIAPreScoring.cpp b/src/utils/OpenSwathDIAPreScoring.cpp index 9ed443e3c85..9dd247bebcf 100644 --- a/src/utils/OpenSwathDIAPreScoring.cpp +++ b/src/utils/OpenSwathDIAPreScoring.cpp @@ -200,7 +200,10 @@ class DIAPreScoring : //std::cout << "using data frame writer for storing data. Outfile :" << out << std::endl; OpenSwath::IDataFrameWriter* dfw = new OpenSwath::CSVWriter(fname); OpenMS::DiaPrescore dp; - dp.operator()(spectrumAccess, transition_exp_used, dfw, -1, -1); //note IM not supported here yet + + OpenSwathIsotopeGeneratorCacher isotopeCacher(feature_finder_param.getValue("DIAScoring:dia_nr_isotopes"), 1); + isotopeCacher.initialize(200.5, 2001.5, 1); + dp.operator()(spectrumAccess, transition_exp_used, dfw, -1, -1, isotopeCacher); //note IM not supported here yet delete dfw; //featureFinder.pickExperiment(chromatogram_ptr, out_featureFile, //transition_exp_used, trafo, swath_ptr, transition_group_map); From 558b4be3d71fd17d910e392a2f419489ee13ece2 Mon Sep 17 00:00:00 2001 From: jcharkow Date: Tue, 3 Jan 2023 15:52:59 -0500 Subject: [PATCH 11/13] [FIX] discrepancy in nr isotopes taken score behaviour was undefined because taking X isotopes for theoretical spectrum and X+1 isotopes for actual spectrum leading to non reproducible scoring. Edit cache to produce an extra isotope. Also revert getImmutable back to using upper bound because the rounding was a bit buggy. --- .../OpenSwathIsotopeGeneratorCacher.cpp | 118 +++++++++++++++++- .../ANALYSIS/OPENSWATH/OpenSwathWorkflow.cpp | 11 +- 2 files changed, 118 insertions(+), 11 deletions(-) diff --git a/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.cpp b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.cpp index 15d18294d66..c19cb8e7bbe 100644 --- a/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.cpp +++ b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathIsotopeGeneratorCacher.cpp @@ -46,6 +46,7 @@ namespace OpenMS void OpenSwathIsotopeGeneratorCacher::initialize(double massStart, double massEnd, double massStep) { massStep_ = massStep; + //std::cout << "initializing from " << massStart << " to " << massEnd << " with step " << massStep_ << std::endl; for (double mass=massStart; mass < massEnd; mass += massStep_) { // create the theoretical distribution @@ -99,6 +100,7 @@ namespace OpenMS IsotopeDistribution OpenSwathIsotopeGeneratorCacher::getImmutable(double mass) const { + //std::cout << "Begin search with mass " << mass << std::endl; if (mass <= (cachedIsotopeDistributions_.begin()->first + halfMassStep_) ) // all cached greater than target (with tolerance) { //std::cout << "All cached elements strictly greater than target (with tolerance)" << std::endl; @@ -116,22 +118,132 @@ namespace OpenMS else // mass is somewhere in the middle or all elements are less than mass { // map the mass to a function that only the bins are whole numbers, therefore we will be rounding to the nearest bin - double roundErr = 1.0 / massStep_; // map + //double roundErr = 1.0 / massStep_; // map + //double roundErr = 2.0; + auto upperBound = cachedIsotopeDistributions_.upper_bound(mass); + auto prevEle = upperBound; + prevEle--; + //std::cout << "ptr previous: " << prevEle->first << std::endl; + //std::cout << "upper bound (first element greater or equal to) " << mass << ": " << upperBound->first << std::endl; + + if (upperBound == cachedIsotopeDistributions_.end()) + { + //std::cout << "upper bound is at the end, meaning all elements are less than or equal to mass" << std::endl; + if (mass - prevEle->first <= halfMassStep_) + { + //std::cout << "last element is in range" << std::endl; + return prevEle->second; + } + else + { + //std::cout << "last element is not in range" << std::endl; + return computeMiss(mass); + } + } + else if ((mass - prevEle->first) > halfMassStep_) + { + //std::cout << "mass" << mass << std::endl; + //std::cout << "Previous element is too far away (half mass step: " << (mass - prevEle->first) << ")" << std::endl; + if ((upperBound->first - mass) <= halfMassStep_) + { + + //std::cout << "upper bound in range, match!" << std::endl; + /* + std::cout << "cache match is : " << upperBound->first << std::endl; + std::cout << "cache match is : " << upperBound->first << std::endl; + std::cout << "values are: "; + for (auto i:upperBound->second) + { + std::cout << i << " " << std::endl; + } + std::cout << std::endl; + */ + return upperBound->second; + } + else + { + //std::cout << "upper bound not in range, new entry" << std::endl; + return computeMiss(mass); + } + } + else //prevEle in range + { + //std::cout << "previous element is in range" << std::endl; + if ((upperBound->first - mass) > halfMassStep_) + { + //std::cout << "upper bound not in range match previous element" << std::endl; + /* + std::cout << "cache match is: " << prevEle->first << std::endl; + + std::cout << "cache match is: " << prevEle->first << std::endl; + std::cout << "values are: "; + + for (auto i:prevEle->second) + { + std::cout << i << " " << std::endl; + } + std::cout << std::endl; + */ + return prevEle->second; + } + else // both elements in range + { +// std::cout << "both elements in range see which is closer" << std::endl; + if ((upperBound->first - mass) <= (mass - prevEle->first)) + { +// std::cout << "upper bound is closer" << std::endl; + /* + std::cout << "cache match is : " << upperBound->first << std::endl; + + std::cout << "values are: "; + for (auto i:upperBound->second) + { + std::cout << i << " " << std::endl; + } + std::cout << std::endl; + */ + + return upperBound->second; + } + else + { + //std::cout << "previous element is closer" << std::endl; + + std::cout << "cache match is: " << prevEle->first << std::endl; + std::cout << "values are: "; + /* + for (auto i:prevEle->second) + { + std::cout << i << " " << std::endl; + } + */ + std::cout << std::endl; + return prevEle->second; + } + } + } + } + + /* + std::cout << "roundErr is: " << roundErr << std::endl; double massRounded = std::round(mass * roundErr) / roundErr; + std::cout << "mass rounded: " << massRounded << std::endl; auto foundEle = cachedIsotopeDistributions_.find(massRounded); + std::cout << "found element is " << foundEle->first << std::endl; if (foundEle != cachedIsotopeDistributions_.end()){ - //std::cout << "ele found" << std::endl; + std::cout << "ele found" << std::endl; return foundEle->second; } else { OPENMS_LOG_WARN << "Cache Miss" << std::endl; - //std::cout << "ele not found" << std::endl; + std::cout << "ele not found" << std::endl; return computeMiss(mass); } } + */ throw Exception::Postcondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Function should never end up here, all possible conditions exhausted"); return cachedIsotopeDistributions_.begin()->second; } diff --git a/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathWorkflow.cpp b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathWorkflow.cpp index 0a6f8fea5b1..b699a0bd84c 100644 --- a/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathWorkflow.cpp +++ b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathWorkflow.cpp @@ -181,7 +181,7 @@ namespace OpenMS featureFinder.setStrictFlag(false); // TODO remove this, it should be strict (e.g. all transitions need to be present for RT norm) - OpenSwathIsotopeGeneratorCacher isotopeCacher(feature_finder_param.getValue("DIAScoring:dia_nr_isotopes"), 1); + OpenSwathIsotopeGeneratorCacher isotopeCacher((int) feature_finder_param.getValue("DIAScoring:dia_nr_isotopes") + 1, 1); isotopeCacher.initialize(200.5, 2001.5, 1); featureFinder.pickExperiment(chromatogram_ptr, featureFile, transition_exp_used, empty_trafo, empty_swath_maps, transition_group_map, isotopeCacher); @@ -548,7 +548,7 @@ namespace OpenMS const OpenSwath::LightTargetedExperiment& transition_exp_used = transition_exp; - OpenSwathIsotopeGeneratorCacher isotopeCacher = OpenSwathIsotopeGeneratorCacher(feature_finder_param.getValue("DIAScoring:dia_nr_isotopes"), 1); + OpenSwathIsotopeGeneratorCacher isotopeCacher = OpenSwathIsotopeGeneratorCacher(feature_finder_param.getValue("DIAScoring:dia_nr_isotopes") + 1, 1); isotopeCacher.initialize(200.5, 2001.5, 1); @@ -645,7 +645,7 @@ namespace OpenMS }; // set up isotope cacher, to be used by all threads - OpenSwathIsotopeGeneratorCacher isotopeCacher = OpenSwathIsotopeGeneratorCacher(feature_finder_param.getValue("DIAScoring:dia_nr_isotopes"), 1); + OpenSwathIsotopeGeneratorCacher isotopeCacher = OpenSwathIsotopeGeneratorCacher((int) feature_finder_param.getValue("DIAScoring:dia_nr_isotopes") + 1, 1); isotopeCacher.initialize(200.5, 2001.5, 1); // (iv) Perform extraction and scoring of fragment ion chromatograms (MS2) @@ -1005,11 +1005,6 @@ namespace OpenMS std::vector to_tsv_output, to_osw_output; - // initialize the OpenSwathIsotopeGeneratorCacher - //OpenSwathIsotopeGeneratorCacher isotopeCacher = OpenSwathIsotopeGeneratorCacher(feature_finder_param.getValue("DIAScoring:dia_nr_isotopes"), 1); - - //isotopeCacher.initialize(200.5, 2001.5, 1); - /////////////////////////////////// // Start of main function // Iterating over all the assays From b62b1e171d0f2a20d329f0489702d287b4b4562e Mon Sep 17 00:00:00 2001 From: jcharkow Date: Tue, 3 Jan 2023 15:55:05 -0500 Subject: [PATCH 12/13] linear resampling speedup don't create new linear resampler each time in order to speed up workflow --- .../OPENSWATH/MRMTransitionGroupPicker.h | 54 +++++++++---------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/MRMTransitionGroupPicker.h b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/MRMTransitionGroupPicker.h index b743a7746c5..860d9f38828 100644 --- a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/MRMTransitionGroupPicker.h +++ b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/MRMTransitionGroupPicker.h @@ -125,8 +125,8 @@ namespace OpenMS String native_id = chromatogram.getNativeID(); // only pick detecting transitions (skip all others) - if (transition_group.getTransitions().size() > 0 && - transition_group.hasTransition(native_id) && + if (transition_group.getTransitions().size() > 0 && + transition_group.hasTransition(native_id) && !transition_group.getTransition(native_id).isDetectingTransition() ) { continue; @@ -209,7 +209,7 @@ namespace OpenMS bool skip = false; for (Size j = 0; j < i; j++) { - if ((double)mrm_feature.getMetaValue("leftWidth") >= (double)features[j].getMetaValue("leftWidth") && + if ((double)mrm_feature.getMetaValue("leftWidth") >= (double)features[j].getMetaValue("leftWidth") && (double)mrm_feature.getMetaValue("rightWidth") <= (double)features[j].getMetaValue("rightWidth") ) { skip = true; } } @@ -274,7 +274,7 @@ namespace OpenMS // Check for minimal peak width -> return empty feature (Intensity zero) if (use_consensus_) { - if (min_peak_width_ > 0.0 && std::fabs(best_right - best_left) < min_peak_width_) + if (min_peak_width_ > 0.0 && std::fabs(best_right - best_left) < min_peak_width_) { return mrmFeature; } @@ -283,7 +283,7 @@ namespace OpenMS { String outlier = "none"; double qual = computeQuality_(transition_group, picked_chroms, chr_idx, best_left, best_right, outlier); - if (qual < min_qual_) + if (qual < min_qual_) { return mrmFeature; } @@ -335,13 +335,13 @@ namespace OpenMS return mrmFeature; } - /** - + /** + @brief Apex-based peak picking Pick the peak with the closest apex to the consensus apex for each - chromatogram. Use the closest peak for the current peak. - + chromatogram. Use the closest peak for the current peak. + Note that we will only set the closest peak per chromatogram to zero, so if there are two peaks for some transitions, we will have to get to them later. If there is no peak, then we transfer transition boundaries from @@ -350,7 +350,7 @@ namespace OpenMS template void pickApex(std::vector& picked_chroms, const double best_left, const double best_right, const double peak_apex, - double &min_left, double &max_right, + double &min_left, double &max_right, std::vector< double > & left_edges, std::vector< double > & right_edges) { for (Size k = 0; k < picked_chroms.size(); k++) @@ -361,8 +361,8 @@ namespace OpenMS { PeakIntegrator::PeakArea pa_tmp = pi_.integratePeak( // get the peak apex picked_chroms[k], - picked_chroms[k].getFloatDataArrays()[PeakPickerMRM::IDX_LEFTBORDER][i], - picked_chroms[k].getFloatDataArrays()[PeakPickerMRM::IDX_RIGHTBORDER][i]); + picked_chroms[k].getFloatDataArrays()[PeakPickerMRM::IDX_LEFTBORDER][i], + picked_chroms[k].getFloatDataArrays()[PeakPickerMRM::IDX_RIGHTBORDER][i]); if (pa_tmp.apex_pos > 0.0 && std::fabs(pa_tmp.apex_pos - peak_apex) < peak_apex_dist_min) { // update best candidate peak_apex_dist_min = std::fabs(pa_tmp.apex_pos - peak_apex); @@ -370,7 +370,7 @@ namespace OpenMS } } - // Select master peak boundaries, or in the case we found at least one peak, the local peak boundaries + // Select master peak boundaries, or in the case we found at least one peak, the local peak boundaries double l = best_left; double r = best_right; if (min_dist >= 0) @@ -424,7 +424,7 @@ namespace OpenMS local_right = right_edges[k]; } - const SpectrumT& chromatogram = selectChromHelper_(transition_group, transition_group.getTransitions()[k].getNativeID()); + const SpectrumT& chromatogram = selectChromHelper_(transition_group, transition_group.getTransitions()[k].getNativeID()); if (transition_group.getTransitions()[k].isDetectingTransition()) { for (typename SpectrumT::const_iterator it = chromatogram.begin(); it != chromatogram.end(); it++) @@ -434,7 +434,7 @@ namespace OpenMS } // Compute total intensity on transition-level - double transition_total_xic = 0; + double transition_total_xic = 0; for (typename SpectrumT::const_iterator it = chromatogram.begin(); it != chromatogram.end(); it++) { @@ -486,7 +486,7 @@ namespace OpenMS } else if (peak_integration_ == "smoothed") { - if (smoothed_chroms.size() <= k) + if (smoothed_chroms.size() <= k) { throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Tried to calculate peak area and height without any smoothed chromatograms"); @@ -497,7 +497,7 @@ namespace OpenMS { throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("Peak integration chromatogram ") + peak_integration_ + " is not a valid method for MRMTransitionGroupPicker"); - } + } Feature f; double quality = 0; @@ -838,7 +838,7 @@ namespace OpenMS for (Size k = 0; k < picked_chroms.size(); k++) { const SpectrumT& chromatogram = selectChromHelper_(transition_group, picked_chroms[k].getNativeID()); - const SpectrumT used_chromatogram = resampleChromatogram_(chromatogram, + const SpectrumT used_chromatogram = resampleChromatogram_(chromatogram, master_peak_container, best_left - resample_boundary, best_right + resample_boundary); std::vector int_here; @@ -955,7 +955,7 @@ namespace OpenMS double score = shape_score - coel_score - 1.0 * missing_peaks / picked_chroms.size(); - OPENMS_LOG_DEBUG << " Computed score " << score << " (from " << shape_score << + OPENMS_LOG_DEBUG << " Computed score " << score << " (from " << shape_score << " - " << coel_score << " - " << 1.0 * missing_peaks / picked_chroms.size() << ")" << std::endl; return score; @@ -973,7 +973,7 @@ namespace OpenMS template void recalculatePeakBorders_(const std::vector& picked_chroms, double& best_left, double& best_right, double max_z) { - // 1. Collect all seeds that lie within the current seed + // 1. Collect all seeds that lie within the current seed // - Per chromatogram only the most intense one counts, otherwise very // - low intense peaks can contribute disproportionally to the voting // - procedure. @@ -1013,7 +1013,7 @@ namespace OpenMS return; } - // FEATURE IDEA: instead of Z-score use modified Z-score for small data sets + // FEATURE IDEA: instead of Z-score use modified Z-score for small data sets // http://d-scholarship.pitt.edu/7948/1/Seo.pdf // http://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm // 1. calculate median @@ -1031,8 +1031,8 @@ namespace OpenMS / right_borders.size() - mean * mean); std::sort(right_borders.begin(), right_borders.end()); - OPENMS_LOG_DEBUG << " - Recalculating right peak boundaries " << mean << " mean / best " - << best_right << " std " << stdev << " : " << std::fabs(best_right - mean) / stdev + OPENMS_LOG_DEBUG << " - Recalculating right peak boundaries " << mean << " mean / best " + << best_right << " std " << stdev << " : " << std::fabs(best_right - mean) / stdev << " coefficient of variation" << std::endl; // Compare right borders of best transition with the mean @@ -1048,8 +1048,8 @@ namespace OpenMS / left_borders.size() - mean * mean); std::sort(left_borders.begin(), left_borders.end()); - OPENMS_LOG_DEBUG << " - Recalculating left peak boundaries " << mean << " mean / best " - << best_left << " std " << stdev << " : " << std::fabs(best_left - mean) / stdev + OPENMS_LOG_DEBUG << " - Recalculating left peak boundaries " << mean << " mean / best " + << best_left << " std " << stdev << " : " << std::fabs(best_left - mean) / stdev << " coefficient of variation" << std::endl; // Compare left borders of best transition with the mean @@ -1130,8 +1130,7 @@ namespace OpenMS if (end != chromatogram.end()) {end++;} SpectrumT resampled_peak_container = master_peak_container; // copy the master container, which contains the RT values - LinearResamplerAlign lresampler; - lresampler.raster(begin, end, resampled_peak_container.begin(), resampled_peak_container.end()); + lresampler_.raster(begin, end, resampled_peak_container.begin(), resampled_peak_container.end()); return resampled_peak_container; } @@ -1164,6 +1163,7 @@ namespace OpenMS PeakPickerMRM picker_; PeakIntegrator pi_; + LinearResamplerAlign lresampler_; }; } From 96f745f4de954a1eb603035d438c39fb98db5d20 Mon Sep 17 00:00:00 2001 From: jcharkow Date: Sun, 8 Jan 2023 20:08:26 -0500 Subject: [PATCH 13/13] minor fix to build --- src/openms/source/ANALYSIS/OPENSWATH/OpenSwathWorkflow.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathWorkflow.cpp b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathWorkflow.cpp index b699a0bd84c..a0c597cd597 100644 --- a/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathWorkflow.cpp +++ b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathWorkflow.cpp @@ -548,7 +548,7 @@ namespace OpenMS const OpenSwath::LightTargetedExperiment& transition_exp_used = transition_exp; - OpenSwathIsotopeGeneratorCacher isotopeCacher = OpenSwathIsotopeGeneratorCacher(feature_finder_param.getValue("DIAScoring:dia_nr_isotopes") + 1, 1); + OpenSwathIsotopeGeneratorCacher isotopeCacher = OpenSwathIsotopeGeneratorCacher((int) feature_finder_param.getValue("DIAScoring:dia_nr_isotopes") + 1, 1); isotopeCacher.initialize(200.5, 2001.5, 1);