...
 
Commits (7)
import mfql_Parser
from MS_reader import SpectraUtil
from targets import Targets_util
import parse_adapter
import logging, os
log = logging.getLogger(os.path.basename(__file__))
import time
import pickle
if __name__ == "__main__":
mzml_file = 'test_resources\\small_test\\190321_Serum_Lipidextract_368723_01.mzML'
mfql_file = 'test_resources\\small_test\\170213_CE_pos_MSMS.mfql'
mfql = mfql_Parser.fromFile(mfql_file)
targets = Targets_util.vars2targets(mfql['variables'])
spectra = SpectraUtil.fromFile(mzml_file)
# mzml_file_pickle = mzml_file+'.pkl'
# spectra = pickle.load(mzml_file_pickle)
MS1 = spectra
MS2 = spectra.get_reset_copy()
MS1.set_mode()
MS1.set_ms_level()
MS2.set_mode()
MS2.set_ms_level(2)
PR = targets[0]
FR = targets[1]
MS1_match = MS1.get_nearest(PR._df)
MS2_match = MS2.get_nearest(FR._df)
MS1_match = Targets_util.set_max_daltons(MS1_match)
MS2_match = Targets_util.set_max_daltons(MS2_match)
all_match = Targets_util.makeAllCombo(MS1_match, MS2_match)
st = parse_adapter.suchthat2txt(mfql['suchthat'])
ST = all_match.query(st)
pr_df, fr_df = Targets_util.devideAllCombo(ST)
sum_df = Targets_util.summaryDF(pr_df, quantile=1)
# st_file_pickle = mzml_file+'.st.pkl'
# ST = pickle.load(open(st_file_pickle,'rb'))
averaged_df = Targets_util.lx1_DF(ST)
# plt = Targets_util.lollipop_plot(pr_df.PR_m, pr_df.PR_i)
# plt.show()
# plt = Targets_util.lollipop_plot(fr_df.FR_m, fr_df.FR_i)
# plt.show()
# Targets_util.showAll_lollipop(pr_df, sample = True)
reportCols = parse_adapter.report2exec_txt(mfql['report'])
res = parse_adapter.reportCols2DF(reportCols, averaged_df)
print(res)
\ No newline at end of file
# lextab.py. This file automatically created by PLY (version 3.11). Don't edit!
_tabversion = '3.10'
_lextokens = set(('AND', 'ARROW', 'ARROWR', 'AS', 'CHG', 'COMMA', 'DA', 'DBR', 'DEFINE', 'DIVIDE', 'DOT', 'EQUALS', 'FLOAT', 'GE', 'GT', 'ID', 'IDENTIFY', 'IFA', 'IFF', 'IN', 'INTEGER', 'IS', 'LBRACE', 'LBRACKET', 'LE', 'LPAREN', 'LT', 'LTUPLE', 'MASSRANGE', 'MAXOCC', 'MINOCC', 'MINUS', 'MS1', 'MS2', 'NE', 'NEUTRALLOSS', 'NOT', 'OR', 'PERCENT', 'PLUS', 'PPM', 'QUERYNAME', 'RBRACE', 'RBRACKET', 'REPORT', 'RES', 'RPAREN', 'RTUPLE', 'SEMICOLON', 'SFSTRING', 'STRING', 'SUCHTHAT', 'TIMES', 'TOLERANCE', 'WITH'))
_lexreflags = 64
_lexliterals = ''
_lexstateinfo = {'INITIAL': 'inclusive'}
_lexstatere = {'INITIAL': [('(?P<t_LTUPLE>\\"\\()|(?P<t_RTUPLE>\\)\\")|(?P<t_ID>[a-zA-Z$][a-zA-Z$0-9]*)|(?P<t_comment>[ ]*\\043[^\\n]*)|(?P<t_WS>[ \\t]+)|(?P<t_WS_NL>(([ \\t]*)\\n))|(?P<t_UNDERSCORE>_)|(?P<t_FLOAT>(\\+|-)?((\\d*\\.\\d+)(E[\\+-]?\\d+)?|([1-9]\\d*E[\\+-]?\\d+)))|(?P<t_INTEGER>(\\+|-)?\\d+)|(?P<t_STRING>\\".*?\\")|(?P<t_SFSTRING>\\\'.*?\\\')|(?P<t_IFF><=>)|(?P<t_EQUALS>==)|(?P<t_PLUS>\\+)|(?P<t_TIMES>\\*)|(?P<t_LPAREN>\\()|(?P<t_RPAREN>\\))|(?P<t_LBRACE>\\[)|(?P<t_RBRACE>\\])|(?P<t_LBRACKET>\\{)|(?P<t_RBRACKET>\\})|(?P<t_LE><=)|(?P<t_NE><>)|(?P<t_GE>>=)|(?P<t_ARROW>->)|(?P<t_ARROWR><~)|(?P<t_IFA>=>)|(?P<t_COMMA>\\,)|(?P<t_DOT>\\.)|(?P<t_IS>=)|(?P<t_MINUS>-)|(?P<t_DIVIDE>/)|(?P<t_LT><)|(?P<t_GT>>)|(?P<t_SEMICOLON>;)|(?P<t_PERCENT>%)', [None, ('t_LTUPLE', 'LTUPLE'), ('t_RTUPLE', 'RTUPLE'), ('t_ID', 'ID'), ('t_comment', 'comment'), ('t_WS', 'WS'), ('t_WS_NL', 'WS_NL'), None, None, ('t_UNDERSCORE', 'UNDERSCORE'), (None, 'FLOAT'), None, None, None, None, None, (None, 'INTEGER'), None, (None, 'STRING'), (None, 'SFSTRING'), (None, 'IFF'), (None, 'EQUALS'), (None, 'PLUS'), (None, 'TIMES'), (None, 'LPAREN'), (None, 'RPAREN'), (None, 'LBRACE'), (None, 'RBRACE'), (None, 'LBRACKET'), (None, 'RBRACKET'), (None, 'LE'), (None, 'NE'), (None, 'GE'), (None, 'ARROW'), (None, 'ARROWR'), (None, 'IFA'), (None, 'COMMA'), (None, 'DOT'), (None, 'IS'), (None, 'MINUS'), (None, 'DIVIDE'), (None, 'LT'), (None, 'GT'), (None, 'SEMICOLON'), (None, 'PERCENT')])]}
_lexstateignore = {'INITIAL': ''}
_lexstateerrorf = {'INITIAL': 't_error'}
_lexstateeoff = {}
......@@ -446,7 +446,7 @@ def p_error(p):
detail = "Syntax error at '%s' in file at position %s %s" % (p .value,p.lineno , p.lexpos)
raise SyntaxError(detail)
parser = yacc.yacc()#(debug=0, optimize=0)
parser = yacc.yacc(debug=False, optimize=True)
def fromFile(filename):
with open(filename, 'r') as f:
......
......@@ -92,4 +92,4 @@ def t_error(t):
# build lexer
lexer = lex.lex() #lex.lex(eflags = re.I, debug = 1, optimize = 0)
lexer = lex.lex( debug = False, optimize = True)
......@@ -16,7 +16,7 @@ def ElementSeq2m(elementSeq):
return target._df['m'][0]
def txt(evaluable):
def txt(evaluable, forReport = False):
res = None
if type(evaluable) in [int,float, str]:
res = str(evaluable)
......@@ -38,7 +38,7 @@ def txt(evaluable):
res = f'{evaluable.p_values[0]}_{item}'
elif evaluable.p_rule == 'p_withAttr_id':
if evaluable.p_values[2] == 'chemsc':
res = f'{evaluable.p_values[0]}_target'
res = f'{evaluable.p_values[0]}_target' if not forReport else f'{evaluable.p_values[0]}_chem'
elif evaluable.p_values[2] == 'intensity':
res = f'{evaluable.p_values[0]}_i'
elif evaluable.p_values[2] == 'mass':
......@@ -81,22 +81,21 @@ def report2exec_txt(report):
elif type(reportItem.p_values) in [int,float, str]: # just a string
col = ReportCol(name, None , reportItem.p_values)
else:
col = ReportCol(name, '%s', txt(reportItem.p_values))
col = ReportCol(name, '', txt(reportItem.p_values, forReport=True))
res.append(col)
return res
def reportCols2DF(reportCols, df):
rep_df = pd.DataFrame(index = df.index)
for reportCol in reportCols:
print(reportCol)
if reportCol.col_fortmat is None:
rep_df[reportCol.col_name] = reportCol.col_eval_txt
print(f'1: {reportCol.col_eval_txt}')
elif reportCol.col_fortmat == '':# none given dont format
rep_df[reportCol.col_name] = df.eval(reportCol.col_eval_txt)
else:
eval_res = df.eval(reportCol.col_eval_txt)
col_format = reportCol.col_fortmat
print(f'2: {type(eval_res)} {col_format}')
if type(eval_res) == list:
if type(eval_res) == list: # more than one result
eval_res = zip(*eval_res)
rep_df[reportCol.col_name] = [col_format % tup for tup in eval_res]
......
This diff is collapsed.
This diff is collapsed.
......@@ -105,7 +105,8 @@ class Targets_util():
@staticmethod
def summaryDF(df, prefix='PR_', quantile=0.25):
groups = df.groupby([prefix+'C', prefix+'dbr'])
#TODO try https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.filter.html
groups = df.groupby([prefix+'chem'])
columns = ['C_dbr', 'ppm_mean', 'i_mean', 'i_rsd', 'count']
columns = [prefix+col for col in columns]
tups = []
......@@ -122,6 +123,32 @@ class Targets_util():
df_summary.sort_values(sort_col, ascending =True, inplace = True)
sort_smallest = df_summary[sort_col] <= df_summary[sort_col].quantile(quantile)
return df_summary.loc[sort_smallest]
@staticmethod
def lx1_DF(df):
"""makes the result as similar as lx1 as possible, ie averaged
Arguments:
df {dataframe} -- that contins all the data as from 'suchthat'
Returns:
dataframe -- with the averaged values
"""
columns = df.columns
tups = []
for pr_chem, pr_df in df.groupby('PR_chem'):
for fr_chem, fr_df in pr_df.groupby('FR_chem'):
tup = ()
fv_idx = fr_df.first_valid_index()
for col in columns:
if col[3:] in ['m', 'i']:
tup += (fr_df[col].mean(),)
else:
tup += (fr_df[col][fv_idx],)
tups.append(tup)
df_summary = pd.DataFrame(tups, columns=columns)
return df_summary
@staticmethod
def lollipop_plot(m, i):
......@@ -149,6 +176,10 @@ class Targets_util():
target.set_dbr(*dbrs)
return target
@staticmethod
def vars2targets(vars):
return [Targets_util.var2Target(var) for var in vars]