Skip to content

Commit

Permalink
Merge pull request EMSL-Computing#11 from deweycw/lcms
Browse files Browse the repository at this point in the history
Lcms
  • Loading branch information
corilo authored Nov 1, 2021
2 parents 6b1f368 + 5588fa9 commit 9750c6d
Show file tree
Hide file tree
Showing 10 changed files with 14,096 additions and 0 deletions.
450 changes: 450 additions & 0 deletions 15T_Neg_ESI_SRFA.csv

Large diffs are not rendered by default.

494 changes: 494 additions & 0 deletions 15T_Neg_ESI_SRFA.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions corems/molecular_id/factory/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,7 @@ def plot_ms_assigned_unassigned(self, assigned_color= 'b', unassigned_color = 'r
ax.get_yaxis().set_visible(False)
ax.spines['left'].set_visible(False)
plt.legend()

return ax

def plot_mz_error(self, color= 'g'):
Expand Down
689 changes: 689 additions & 0 deletions examples/notebooks/CoreMS_devel_lcms_isotopes.ipynb

Large diffs are not rendered by default.

94 changes: 94 additions & 0 deletions examples/scripts/CoreMS_tutorial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from pathlib import Path

from matplotlib import pyplot

from corems.encapsulation.factory.parameters import MSParameters
from corems.transient.input.brukerSolarix import ReadBrukerSolarix
from corems.mass_spectrum.calc.Calibration import MzDomainCalibration

file_location = "tests/tests_data/ESI_NEG_SRFA.d"

MSParameters.transient.apodization_method = "Hanning"
MSParameters.transient.number_of_truncations = 0
MSParameters.transient.number_of_zero_fills = 1

MSParameters.mass_spectrum.threshold_method = 'relative_abundance'
MSParameters.mass_spectrum.relative_abundance_threshold = 1

#MSParameters.mass_spectrum.threshold_method = 'signal_noise'
#MSParameters.mass_spectrum.s2n_threshold = 50

#MSParameters.mass_spectrum.threshold_method = 'auto'
#MSParameters.mass_spectrum.noise_threshold_std = 32

MSParameters.ms_peak.peak_min_prominence_percent = 1

def import_transient():

with ReadBrukerSolarix(file_location) as bruker_transient:

mass_spectrum = bruker_transient.get_mass_spectrum(plot_result=True, auto_process=True)

mass_spectrum.plot_profile_and_noise_threshold()

print("m/z count", len(mass_spectrum))

print('first m/z', mass_spectrum.mspeaks[0].mz_exp, 'final m/z', mass_spectrum.mspeaks[-1].mz_exp)

return mass_spectrum

mass_spectrum = import_transient()


MSParameters.mass_spectrum.min_calib_ppm_error = -5
MSParameters.mass_spectrum.max_calib_ppm_error = 5

mass_spectrum = import_transient()

ref_file_location = 'tests/tests_data/SRFA.ref'

MzDomainCalibration(mass_spectrum, ref_file_location).run()

from corems.molecular_id.search.molecularFormulaSearch import SearchMolecularFormulas
from corems.molecular_id.factory.classification import HeteroatomsClassification

mass_spectrum.molecular_search_settings.url_database = "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp"

mass_spectrum.molecular_search_settings.error_method = 'None'
mass_spectrum.molecular_search_settings.min_ppm_error = -1
mass_spectrum.molecular_search_settings.max_ppm_error = 1

mass_spectrum.molecular_search_settings.min_dbe = 0
mass_spectrum.molecular_search_settings.max_dbe = 50

mass_spectrum.molecular_search_settings.isProtonated = True
mass_spectrum.molecular_search_settings.isRadical= False
mass_spectrum.molecular_search_settings.isadduct = True

mass_spectrum.molecular_search_settings.usedAtoms['C'] = (1,90)
mass_spectrum.molecular_search_settings.usedAtoms['H'] = (4,200)
mass_spectrum.molecular_search_settings.usedAtoms['O'] = (1,20)
mass_spectrum.molecular_search_settings.usedAtoms['N'] = (0,0)
mass_spectrum.molecular_search_settings.usedAtoms['S'] = (0,0)

SearchMolecularFormulas(mass_spectrum, first_hit=True).run_worker_mass_spectrum()
mass_spectrum.percentile_assigned(report_error=True)

mass_spectrum_by_classes = HeteroatomsClassification(mass_spectrum, choose_molecular_formula=True)
mass_spectrum_by_classes.plot_ms_assigned_unassigned()

for mspeaks in mass_spectrum.sort_by_abundance():
if mspeaks: #or just if mspeak:
for mf in mspeaks:
print(mf.mz_calc, mf.dbe, mf.class_label, mf.string_formated)

#exporting data
mass_spectrum.to_csv("test")

# save pandas Datarame to pickle
mass_spectrum.to_pandas("test")

# get pandas Dataframe
df = mass_spectrum.to_dataframe()

df.head()
66 changes: 66 additions & 0 deletions examples/scripts/LC-ICPMS_metal_peaks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
##### CWD 2021-10-27
#### (1) import .csv with LC-ICPMS data; (2) for each metal, pick peaks; (3) return dictionary with RT as key, metal as value

import warnings
warnings.filterwarnings("ignore")

import sys
sys.path.append("./")

from tqdm import tqdm

import os
import pandas as pd
import numpy as np

from pathlib import Path

import matplotlib.pyplot as plt
# from PySide2.QtWidgets import QFileDialog, QApplication
# from PySide2.QtCore import Qt

from corems.mass_spectra.input import rawFileReader
from corems.molecular_id.factory.classification import HeteroatomsClassification, Labels
from corems.molecular_id.search.priorityAssignment import OxygenPriorityAssignment
from corems.molecular_id.search.molecularFormulaSearch import SearchMolecularFormulas
from corems.encapsulation.factory.parameters import MSParameters

from corems.mass_spectra.calc import SignalProcessing as sp

#Import LC-ICPMS data and plot it:

import csv


def find_metal_peaks(metal, icp_data, sn, min_peak_datapoints):
icpt="Time_" + metal
rt = icp_data[icpt]
icp_signal = icp_data[metal]
max_icp_signal = np.max(icp_signal)
max_height = max_icp_signal
max_prominence = 100
peak_indices = sp.peak_picking_first_derivative(rt, icp_signal, max_height, max_prominence, max_icp_signal, min_peak_datapoints,
signal_threshold=sn, correct_baseline=True)
rt_aps = []
for peak_index in peak_indices:
rt_aps.append(rt[peak_index[1]])

rt_dict = dict.fromkeys(rt_aps,metal)
return rt_dict

icpfile = "tests/tests_data/cwd_211018_day7_8_c18_1uMcobalamin_10uL.csv"
icpdata = np.genfromtxt(icpfile, dtype=float, delimiter=',', names=True)

metal = '59Co'

icp_signal = icpdata[metal]
min_peak_datapoints = 20

rts = find_metal_peaks(metal,icpdata, 1, min_peak_datapoints)
print(rts)

fig, host = plt.subplots()
host.plot(icpdata['Time_'+metal],icpdata[metal])
host.set_xlabel('Time (s)')
host.set_ylabel(metal +' intensity (counts)')
plt.show()
Loading

0 comments on commit 9750c6d

Please sign in to comment.