...
 
Commits (24)
......@@ -2,3 +2,7 @@
*.pyc
lx/mfql/parsetab.py
test_resources/small_test/small_test-report.html
lx2/parser.out
lx2/parsetab.py
lx2/parser.out
lx2/parsetab.py
......@@ -6,5 +6,7 @@ Obj = namedtuple('Obj', 'p_rule p_values')
Func = namedtuple('Func', 'func on')
ElementSeq = namedtuple('ElementSeq', 'txt')
Evaluable = namedtuple('Evaluable', 'operation term_1 term_2')
ReportItem = namedtuple('ReportItem', 'id p_values')
ReportItem = namedtuple('ReportItem', 'id p_values is_PCT_format')
ReportCol = namedtuple('ReportCol', 'col_name col_fortmat, col_eval_txt')
......@@ -383,7 +383,7 @@ def p_expression_struct1(p):
def p_expression_attribute(p):
'''expression : LPAREN expression RPAREN LBRACE ID RBRACE'''
p[0] = Evaluable('p_expression_attribute', p[2], p[5])
p[0] = Obj('p_expression_attribute', (p[2], p[5]))
def p_expression_paren(p):
......@@ -427,11 +427,16 @@ def p_reportContent_single(p):
def p_rContent(p):
'''reportItem : ID IS STRING PERCENT STRING SEMICOLON
'''reportItem : ID IS STRING PERCENT LTUPLE arguments RTUPLE SEMICOLON
| ID IS STRING PERCENT LPAREN arguments RPAREN SEMICOLON
| ID IS expression SEMICOLON'''
p[0] = ReportItem(p[1], p[3:-1])
if len(p) > 5:
is_PCT_format = True
res = ReportItem(p[1], p[3:-1], is_PCT_format)
else:
is_PCT_format = False
res = ReportItem(p[1], p[3], is_PCT_format)
p[0] = res
def p_error(p):
......@@ -444,8 +449,24 @@ def p_error(p):
parser = yacc.yacc()#(debug=0, optimize=0)
def fromFile(filename):
with open(filename, 'rU') as f:
with open(filename, 'r') as f:
mfql_str = f.read()
res = parser.parse(mfql_str, lexer=lexer)
return res
# if __name__ == "__main__":
# filename = 'test_resources\\small_test\\170213_CE_pos_MSMS.mfql'
# with open(filename, 'r') as f:
# mfql_str = f.read()
# lexer.input(mfql_str)
# while True:
# tok = lexer.token()
# if not tok:
# break # No more input
# print(tok)
# mfql_dict = parser.parse(mfql_str, lexer=lexer)
# print(mfql_dict['report'])
# print('done')
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -14,10 +14,19 @@ tokens = keywords + (
'EQUALS', 'IS', 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'LPAREN',
'RPAREN', 'LT', 'LE', 'GT', 'GE', 'IFA', 'IFF', 'NE', 'COMMA', 'SEMICOLON',
'FLOAT', 'STRING', 'ID', 'INTEGER', 'DOT', 'PERCENT', 'LBRACE',
'RBRACE', 'LBRACKET', 'RBRACKET', 'SFSTRING', 'ARROW', 'ARROWR'
'RBRACE', 'LBRACKET', 'RBRACKET', 'SFSTRING', 'ARROW', 'ARROWR',
'LTUPLE', 'RTUPLE'
)
#
# https://stackoverflow.com/questions/2910338/python-yacc-lexer-token-priority
def t_LTUPLE(t):
r'\"\('
return t
def t_RTUPLE(t):
r'\)\"'
return t
def t_ID(t):
r'[a-zA-Z$][a-zA-Z$0-9]*'
......@@ -83,4 +92,4 @@ def t_error(t):
# build lexer
lexer = lex.lex()#= lex.lex(reflags = re.I, debug = 0, optimize = 0)
lexer = lex.lex() #lex.lex(eflags = re.I, debug = 1, optimize = 0)
import warnings
from collections import namedtuple
from data_structs import Obj, ElementSeq, Evaluable, Func, ReportItem, ReportCol
from chemParser import txt2dict
from targets import Targets_util
import pandas as pd
def ElementSeq2m(elementSeq):
txt = elementSeq.txt
tmp = txt2dict(txt)
target = Targets_util(tmp)
target._makeRanges()
target._makeDF()
target._cal_M()
return target._df['m'][0]
def txt(evaluable):
res = None
if type(evaluable) in [int,float, str]:
res = str(evaluable)
elif isinstance(evaluable, list):
if len(evaluable) == 1 :
res = txt(evaluable[0])
elif isinstance(evaluable, Evaluable):
res = f'{txt(evaluable.term_1)} {evaluable.operation.lower()} {txt(evaluable.term_2)}'
elif isinstance(evaluable, Func):
if evaluable.func == 'isOdd':
res = f'{txt(evaluable.on)} % 2 == 0'
if evaluable.func == 'avg':
res = f'{txt(evaluable.on)}' # do nothing
elif isinstance(evaluable, Obj):
if evaluable.p_rule == 'p_withAttr_accessItem_':
if evaluable.p_values[2] == 'chemsc':
item = evaluable.p_values[-2]
if item == 'db': item = 'dbr' # refa: rename db to dbr
res = f'{evaluable.p_values[0]}_{item}'
elif evaluable.p_rule == 'p_withAttr_id':
if evaluable.p_values[2] == 'chemsc':
res = f'{evaluable.p_values[0]}_target'
elif evaluable.p_values[2] == 'intensity':
res = f'{evaluable.p_values[0]}_i'
elif evaluable.p_values[2] == 'mass':
res = f'{evaluable.p_values[0]}_m'
elif evaluable.p_values[2] == 'isobaric':
res = f'{evaluable.p_values[0]}_target'
warnings.warn(f' *** how to deal with isobaric ***')
elif evaluable.p_values[2] == 'errppm':
res = f'{evaluable.p_values[0]}_ppm'
elif evaluable.p_rule == 'p_expression_attribute':
if type(evaluable.p_values[0]) == Obj and \
evaluable.p_values[0].p_rule == 'p_withAttr_id' and \
evaluable.p_values[0].p_values[2] == 'chemsc':
item = evaluable.p_values[1]
if item == 'db': item = 'dbr' # refa: rename db to dbr
res = f'{evaluable.p_values[0].p_values[0]}_{item}'
elif isinstance(evaluable, ElementSeq):
res = f'{ElementSeq2m(evaluable)}'
else:
warnings.warn(f'could not evaluate {evaluable}')
res = str(evaluable)
if res == None:
warnings.warn(f'did not evaluate {evaluable}')
return res
def suchthat2txt(suchthat):
return txt(suchthat)
def report2exec_txt(report):
res = []
for reportItem in report:
name = reportItem.id
if reportItem.is_PCT_format:
pct_format = reportItem.p_values[0]
col_tuple = reportItem.p_values[3]
tuple_txt = [txt(t) for t in col_tuple]
exec_txt = ', '.join(tuple_txt)
col = ReportCol(name, pct_format, exec_txt)
elif type(reportItem.p_values) in [int,float, str]: # just a string
col = ReportCol(name, None , reportItem.p_values)
else:
col = ReportCol(name, '%s', txt(reportItem.p_values))
res.append(col)
return res
def reportCols2DF(reportCols, df):
rep_df = pd.DataFrame(index = df.index)
for reportCol in reportCols:
print(reportCol)
if reportCol.col_fortmat is None:
rep_df[reportCol.col_name] = reportCol.col_eval_txt
print(f'1: {reportCol.col_eval_txt}')
else:
eval_res = df.eval(reportCol.col_eval_txt)
col_format = reportCol.col_fortmat
print(f'2: {type(eval_res)} {col_format}')
if type(eval_res) == list:
eval_res = zip(*eval_res)
rep_df[reportCol.col_name] = [col_format % tup for tup in eval_res]
return rep_df
if __name__ == '__main__':
filename = 'test_resources\\small_test\\170213_CE_pos_MSMS.mfql'
from mfql_Parser import fromFile
mfql_dict = fromFile(filename)
res = suchthat2txt(mfql_dict['suchthat'])
print('\n'.join([str(r) for r in res]))
print('*******************************8')
res = report2exec_txt(mfql_dict['report'])
print('\n'.join([str(r) for r in res]))
This diff is collapsed.
This diff is collapsed.
......@@ -2,6 +2,7 @@ import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import namedtuple
from chemParser import txt2dict
class Targets_util():
......@@ -83,6 +84,8 @@ class Targets_util():
@staticmethod
def devideAllCombo(all_df):
#TODO try https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.filter.html
#df filter
all_df.sort_values(['PR_ppm', 'FR_ppm'], inplace = True)
cols = all_df.columns
pr_cols = [col for col in cols if col.startswith('PR_')]
......@@ -124,7 +127,6 @@ class Targets_util():
def lollipop_plot(m, i):
# https://python-graph-gallery.com/180-basic-lollipop-plot/
# https://stackoverflow.com/questions/1358977/how-to-make-several-plots-on-a-single-page-using-matplotlib
(markerline, stemlines, baseline) =plt.stem(m, i)#, markerfmt=' ')
plt.setp(baseline, visible=False)
return plt
......@@ -138,6 +140,14 @@ class Targets_util():
plt.xlim([g_df[prefix+'m'].min(),g_df[prefix+'m'].max()])
plt.show()
if sample: break #ony show one
@staticmethod
def var2Target(var):
elements = txt2dict(var.object.txt)
target = Targets_util(elements)
dbrs = var.Options.get('dbr',(None, None))
target.set_dbr(*dbrs)
return target
......