Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
lifs-public
lipidxplorer
Commits
1d9ad0dc
Commit
1d9ad0dc
authored
Sep 25, 2019
by
Eduardo Miranda
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
nils file reader improvement
parent
47e72555
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
63 additions
and
23 deletions
+63
-23
lx/fileReader/mzAPI/__init__.py
lx/fileReader/mzAPI/__init__.py
+7
-7
lx/fileReader/mzAPI/mzML.py
lx/fileReader/mzAPI/mzML.py
+53
-13
lx/readSpectra.py
lx/readSpectra.py
+3
-3
No files found.
lx/fileReader/mzAPI/__init__.py
View file @
1d9ad0dc
...
@@ -249,9 +249,8 @@ def make_info_file(data_file, **kwargs):
...
@@ -249,9 +249,8 @@ def make_info_file(data_file, **kwargs):
#Pickle object
#Pickle object
fh
=
open
(
data_file
+
'.mzi'
,
'w'
)
with
open
(
data_file
+
'.mzi'
,
'w'
)
as
fh
:
cPickle
.
dump
(
info_list
,
fh
)
cPickle
.
dump
(
info_list
,
fh
)
fh
.
close
()
else
:
else
:
import
lx.fileReader.mzAPI.mzML
import
lx.fileReader.mzAPI.mzML
lx
.
fileReader
.
mzAPI
.
mzML
.
make_info_file
(
data_file
)
lx
.
fileReader
.
mzAPI
.
mzML
.
make_info_file
(
data_file
)
...
@@ -352,7 +351,7 @@ class mzFile(object):
...
@@ -352,7 +351,7 @@ class mzFile(object):
"""
"""
if
data_file
.
lower
().
endswith
(
'.lnk'
):
if
data_file
.
lower
().
endswith
(
'.lnk'
):
data_file
=
follow_link
(
data_file
)
data_file
=
self
.
follow_link
(
data_file
)
if
data_file
.
lower
().
startswith
(
'http://'
):
if
data_file
.
lower
().
startswith
(
'http://'
):
import
lx.fileReader.mzAPI.mzURL
import
lx.fileReader.mzAPI.mzURL
...
@@ -430,16 +429,17 @@ class mzFile(object):
...
@@ -430,16 +429,17 @@ class mzFile(object):
raise
NotImplementedError
(
'Subclasses must implement this method'
)
raise
NotImplementedError
(
'Subclasses must implement this method'
)
def
scan
(
self
,
time
):
def
scan
(
self
,
scan_id
,
time
):
"""Gets scan based on the specified scan time
"""Gets scan based on the specified scan
id (id attribute in mzML) or the
time
The id will be preferred over the time, and is used as a lookup key in the scan cache.
The scan is a list of (mz, intensity, resolution, baseline,
The scan is a list of (mz, intensity, resolution, baseline,
noise, charge) tuples. Actually only recent versions of the raw
noise, charge) tuples. Actually only recent versions of the raw
file format returns all of those. Normally only mz and
file format returns all of those. Normally only mz and
intensity are filled, the others set to zero.
intensity are filled, the others set to zero.
Example:
Example:
>>> scan = myPeakFile.scan(
20.035
)
>>> scan = myPeakFile.scan(
"controllerType=0 controllerNumber=1 scan=3161", 23.21
)
"""
"""
...
...
lx/fileReader/mzAPI/mzML.py
View file @
1d9ad0dc
...
@@ -24,6 +24,7 @@
...
@@ -24,6 +24,7 @@
#MRM_Q3MS_TEXT As String = "Q3MS "
#MRM_Q3MS_TEXT As String = "Q3MS "
#MRM_SRM_TEXT As String = "SRM ms2"
#MRM_SRM_TEXT As String = "SRM ms2"
#MRM_FullNL_TEXT As String = "Full cnl " ' MRM neutral loss
#MRM_FullNL_TEXT As String = "Full cnl " ' MRM neutral loss
import
mmap
from
lx.exceptions
import
LipidXException
from
lx.exceptions
import
LipidXException
...
@@ -130,9 +131,8 @@ def make_info_file(data_file):
...
@@ -130,9 +131,8 @@ def make_info_file(data_file):
m
=
mzFile
(
data_file
)
m
=
mzFile
(
data_file
)
m
.
_build_info_scans
()
m
.
_build_info_scans
()
fh
=
open
(
data_file
+
'.mzi'
,
'wb'
)
with
open
(
data_file
+
'.mzi'
,
'wb'
)
as
fh
:
cPickle
.
dump
(
m
.
_info_scans
,
fh
)
cPickle
.
dump
(
m
.
_info_scans
,
fh
)
fh
.
close
()
class
mzFile
(
mzAPImzFile
):
class
mzFile
(
mzAPImzFile
):
...
@@ -147,7 +147,6 @@ class mzFile(mzAPImzFile):
...
@@ -147,7 +147,6 @@ class mzFile(mzAPImzFile):
use, there's the _build_info_scans method, but in general it makes
use, there's the _build_info_scans method, but in general it makes
more sense to create an .mzi file.
more sense to create an .mzi file.
"""
"""
_xp_time
=
etree
.
XPath
((
'./mz:scanList/mz:scan/'
_xp_time
=
etree
.
XPath
((
'./mz:scanList/mz:scan/'
'mz:cvParam[@name="scan start time"]/'
'mz:cvParam[@name="scan start time"]/'
'attribute::value'
),
'attribute::value'
),
...
@@ -178,6 +177,7 @@ class mzFile(mzAPImzFile):
...
@@ -178,6 +177,7 @@ class mzFile(mzAPImzFile):
namespaces
=
NSd
,
smart_strings
=
False
)
namespaces
=
NSd
,
smart_strings
=
False
)
_xp_tic
=
etree
.
XPath
(
'./mz:cvParam[@name="total ion current"]/attribute::value'
,
_xp_tic
=
etree
.
XPath
(
'./mz:cvParam[@name="total ion current"]/attribute::value'
,
namespaces
=
NSd
,
smart_strings
=
False
)
namespaces
=
NSd
,
smart_strings
=
False
)
scan_map
=
{}
def
__init__
(
self
,
data_file
,
**
kwargs
):
def
__init__
(
self
,
data_file
,
**
kwargs
):
self
.
file_type
=
'mzml'
self
.
file_type
=
'mzml'
...
@@ -186,18 +186,25 @@ class mzFile(mzAPImzFile):
...
@@ -186,18 +186,25 @@ class mzFile(mzAPImzFile):
if
data_file
.
lower
().
endswith
(
'.mzml.gz'
):
if
data_file
.
lower
().
endswith
(
'.mzml.gz'
):
self
.
fileobj
=
gzip
.
GzipFile
(
data_file
,
mode
=
'rb'
)
self
.
fileobj
=
gzip
.
GzipFile
(
data_file
,
mode
=
'rb'
)
else
:
else
:
self
.
fileobj
=
open
(
data_file
,
mode
=
'rb'
)
with
open
(
data_file
,
"r+b"
)
as
f
:
self
.
mmap_fileobj
=
f
self
.
fileobj
=
mmap
.
mmap
(
f
.
fileno
(),
0
)
if
os
.
path
.
exists
(
data_file
+
'.mzi'
):
if
os
.
path
.
exists
(
data_file
+
'.mzi'
):
self
.
_info_file
=
data_file
+
'.mzi'
self
.
_info_file
=
data_file
+
'.mzi'
info_fh
=
open
(
self
.
_info_file
)
with
open
(
self
.
_info_file
)
as
info_fh
:
self
.
_info_scans
=
cPickle
.
load
(
info_fh
)
self
.
_info_scans
=
cPickle
.
load
(
info_fh
)
info_fh
.
close
()
else
:
else
:
self
.
_info_file
=
None
self
.
_info_file
=
None
self
.
_info_scans
=
None
def
close
(
self
):
def
close
(
self
):
self
.
fileobj
.
close
()
self
.
fileobj
.
close
()
if
self
.
mmap_fileobj
is
not
None
:
self
.
mmap_fileobj
.
close
()
self
.
_info_file
=
None
self
.
_info_scans
=
None
self
.
scan_map
=
{}
def
scan_list
(
self
,
start_time
=
None
,
stop_time
=
None
,
start_mz
=
0
,
stop_mz
=
99999
):
def
scan_list
(
self
,
start_time
=
None
,
stop_time
=
None
,
start_mz
=
0
,
stop_mz
=
99999
):
if
start_time
is
None
:
if
start_time
is
None
:
...
@@ -207,7 +214,7 @@ class mzFile(mzAPImzFile):
...
@@ -207,7 +214,7 @@ class mzFile(mzAPImzFile):
if
self
.
_info_file
:
if
self
.
_info_file
:
return
[
(
i
[
'time'
],
i
[
'mz'
])
for
i
in
self
.
_info_scans
return
[
(
i
[
'time'
],
i
[
'mz'
])
for
i
in
self
.
_info_scans
if
(
start_time
<=
i
[
'time'
]
and
((
not
stop_time
)
or
i
[
'time'
]
<=
stop_time
))
if
(
start_time
<=
i
[
'time'
]
and
((
not
stop_time
)
or
i
[
'time'
]
<=
stop_time
))
and
(
i
[
'scan_
mod
e'
]
==
'MS1'
or
start_mz
<=
i
[
'mz'
]
<=
stop_mz
)
]
and
(
i
[
'scan_
typ
e'
]
==
'MS1'
or
start_mz
<=
i
[
'mz'
]
<=
stop_mz
)
]
scan_list
=
[]
scan_list
=
[]
...
@@ -263,6 +270,10 @@ class mzFile(mzAPImzFile):
...
@@ -263,6 +270,10 @@ class mzFile(mzAPImzFile):
self
.
fileobj
.
seek
(
0
)
self
.
fileobj
.
seek
(
0
)
context
=
etree
.
iterparse
(
self
.
fileobj
,
events
=
(
'end'
,),
context
=
etree
.
iterparse
(
self
.
fileobj
,
events
=
(
'end'
,),
tag
=
'%sspectrum'
%
NS
)
tag
=
'%sspectrum'
%
NS
)
n_ms1
=
0
n_ms2
=
0
n_ms1_filtered
=
0
n_ms2_filtered
=
0
for
event
,
elem
in
context
:
for
event
,
elem
in
context
:
xt
=
self
.
_xp_time
(
elem
)
xt
=
self
.
_xp_time
(
elem
)
if
xt
:
if
xt
:
...
@@ -311,7 +322,7 @@ class mzFile(mzAPImzFile):
...
@@ -311,7 +322,7 @@ class mzFile(mzAPImzFile):
elif
self
.
_xp_frg_pis
(
elem
):
elif
self
.
_xp_frg_pis
(
elem
):
p
=
float
(
self
.
_xp_frg_pis
(
elem
)[
0
])
p
=
float
(
self
.
_xp_frg_pis
(
elem
)[
0
])
else
:
else
:
print
"
this ms2 didn't have
a precursor
m/z
or
a
fragment
scan m/z..."
,
elem
.
get
(
"
id
"
)
print
"
Skipping scan: '%s' ; Could not find
a precursor or fragment
!"
%
(
elem
.
get
(
'
id
'
)
)
else
:
else
:
p
=
precursor
p
=
precursor
...
@@ -325,6 +336,20 @@ class mzFile(mzAPImzFile):
...
@@ -325,6 +336,20 @@ class mzFile(mzAPImzFile):
scan_name
=
elem
.
get
(
'id'
)
scan_name
=
elem
.
get
(
'id'
)
scan_info
.
append
((
time
,
mz
,
scan_name
,
'MS2'
,
scan_mode
,
polarity
,
total_ic
,
0
,
0
))
scan_info
.
append
((
time
,
mz
,
scan_name
,
'MS2'
,
scan_mode
,
polarity
,
total_ic
,
0
,
0
))
# caching here changes the results :-/
# load mz and intensity arrays proactively
mz
,
it
=
zip
(
*
self
.
_scan_from_spec_node
(
elem
,
xt
))
empty
=
[
0
for
i
in
range
(
len
(
mz
))]
self
.
scan_map
[
scan_name
]
=
zip
(
list
(
mz
),
list
(
it
),
empty
,
empty
,
empty
,
empty
)
n_ms2
=
n_ms2
+
1
else
:
#print "Skipping scan: '%s' ; m/z='%f' is out of defined range!"%(elem.get('id'), mz)
n_ms2_filtered
=
n_ms2_filtered
+
1
else
:
#print "Skipping scan: '%s' ; Could not find a precursor!"%(elem.get('id'))
n_ms2_filtered
=
n_ms2_filtered
+
1
else
:
else
:
if
self
.
_xp_prof
(
elem
):
if
self
.
_xp_prof
(
elem
):
scan_mode
=
'p'
scan_mode
=
'p'
...
@@ -333,10 +358,23 @@ class mzFile(mzAPImzFile):
...
@@ -333,10 +358,23 @@ class mzFile(mzAPImzFile):
scan_name
=
elem
.
get
(
'id'
)
scan_name
=
elem
.
get
(
'id'
)
scan_info
.
append
((
time
,
0.0
,
scan_name
,
'MS1'
,
scan_mode
,
polarity
,
total_ic
,
0
,
0
))
scan_info
.
append
((
time
,
0.0
,
scan_name
,
'MS1'
,
scan_mode
,
polarity
,
total_ic
,
0
,
0
))
# caching here changes the results :-/
# load mz and intensity arrays proactively
mz
,
it
=
zip
(
*
self
.
_scan_from_spec_node
(
elem
,
xt
))
empty
=
[
0
for
i
in
range
(
len
(
mz
))]
self
.
scan_map
[
scan_name
]
=
zip
(
list
(
mz
),
list
(
it
),
empty
,
empty
,
empty
,
empty
)
n_ms1
=
n_ms1
+
1
else
:
n_ms1_filtered
=
n_ms1_filtered
+
1
else
:
else
:
print
"this spectrum didn't have a scan time..."
,
elem
.
get
(
"id"
)
print
"this spectrum didn't have a scan time..."
,
elem
.
get
(
"id"
)
n_ms1_filtered
=
n_ms1_filtered
+
1
elem
.
clear
()
elem
.
clear
()
#total_scans = n_ms1+n_ms1_filtered+n_ms2+n_ms2_filtered
#print "Loaded %d of %d scan info objects, %d MS1 scans, %d MS2 scans, filtered %d MS1 scans and %d MS2 scans."%(len(scan_info), total_scans, n_ms1, n_ms2, n_ms1_filtered, n_ms2_filtered)
return
scan_info
return
scan_info
def
scan_time_from_scan_name
(
self
,
scan_name
):
def
scan_time_from_scan_name
(
self
,
scan_name
):
...
@@ -361,10 +399,13 @@ class mzFile(mzAPImzFile):
...
@@ -361,10 +399,13 @@ class mzFile(mzAPImzFile):
else
:
else
:
print
"scan not found:"
,
scan_name
print
"scan not found:"
,
scan_name
def
scan
(
self
,
time
):
def
scan
(
self
,
scan_id
,
time
):
# this implementation takes a few seconds and uses very little memory
# this implementation takes a few seconds and uses very little memory
# (by keeping only the current closest scan)
# (by keeping only the current closest scan)
if
self
.
scan_map
.
has_key
(
scan_id
):
return
self
.
scan_map
[
scan_id
]
if
self
.
_info_file
:
if
self
.
_info_file
:
closest_item
=
self
.
_info_scans
.
closest
(
key
=
'time'
,
value
=
time
)
closest_item
=
self
.
_info_scans
.
closest
(
key
=
'time'
,
value
=
time
)
spec_start
,
spec_size
=
closest_item
[
'offset'
],
closest_item
[
'size'
]
spec_start
,
spec_size
=
closest_item
[
'offset'
],
closest_item
[
'size'
]
...
@@ -374,7 +415,6 @@ class mzFile(mzAPImzFile):
...
@@ -374,7 +415,6 @@ class mzFile(mzAPImzFile):
mz
,
it
=
zip
(
*
self
.
_scan_from_spec_node
(
spec
,
closest_item
[
'time'
],
prefix
=
False
))
mz
,
it
=
zip
(
*
self
.
_scan_from_spec_node
(
spec
,
closest_item
[
'time'
],
prefix
=
False
))
empty
=
[
0
for
i
in
range
(
len
(
mz
))]
empty
=
[
0
for
i
in
range
(
len
(
mz
))]
return
zip
(
list
(
mz
),
list
(
it
),
empty
,
empty
,
empty
,
empty
)
return
zip
(
list
(
mz
),
list
(
it
),
empty
,
empty
,
empty
,
empty
)
#return self._scan_from_spec_node(spec, closest_item['time'], prefix=False)
self
.
fileobj
.
seek
(
0
)
self
.
fileobj
.
seek
(
0
)
context
=
etree
.
iterparse
(
self
.
fileobj
,
events
=
(
'end'
,),
context
=
etree
.
iterparse
(
self
.
fileobj
,
events
=
(
'end'
,),
...
@@ -600,7 +640,7 @@ class mzFileInMemory:
...
@@ -600,7 +640,7 @@ class mzFileInMemory:
# now there's nothing to do, because there's no handle on the mzML file
# now there's nothing to do, because there's no handle on the mzML file
#pass
#pass
def
scan_list
(
self
,
start_timeNone
,
stop_time
=
None
,
start_mz
=
0
,
stop_mz
=
99999
):
def
scan_list
(
self
,
start_time
=
None
,
stop_time
=
None
,
start_mz
=
0
,
stop_mz
=
99999
):
if
start_time
is
None
or
stop_time
is
None
:
if
start_time
is
None
or
stop_time
is
None
:
(
file_start_time
,
file_stop_time
)
=
self
.
time_range
()
(
file_start_time
,
file_stop_time
)
=
self
.
time_range
()
if
start_time
is
None
:
if
start_time
is
None
:
...
...
lx/readSpectra.py
View file @
1d9ad0dc
...
@@ -113,7 +113,7 @@ def add_Sample(
...
@@ -113,7 +113,7 @@ def add_Sample(
else
:
else
:
polarity
=
pol
polarity
=
pol
scan
=
mz_file
.
scan
(
t
)
scan
=
mz_file
.
scan
(
scan_id
=
sn
,
time
=
t
)
# don't consider empty scans
# don't consider empty scans
if
len
(
scan
)
==
0
:
if
len
(
scan
)
==
0
:
...
@@ -159,7 +159,7 @@ def add_Sample(
...
@@ -159,7 +159,7 @@ def add_Sample(
else
:
else
:
polarity
=
pol
polarity
=
pol
scan
=
mz_file
.
scan
(
t
)
scan
=
mz_file
.
scan
(
scan_id
=
sn
,
time
=
t
)
# don't consider empty scans
# don't consider empty scans
if
len
(
scan
)
==
0
:
if
len
(
scan
)
==
0
:
...
@@ -571,7 +571,7 @@ def add_Sample_AVG(
...
@@ -571,7 +571,7 @@ def add_Sample_AVG(
else
:
else
:
polarity
=
pol
polarity
=
pol
scan
=
mz_file
.
scan
(
t
)
scan
=
mz_file
.
scan
(
scan_id
=
sn
,
time
=
t
)
# don't consider empty scans
# don't consider empty scans
if
len
(
scan
)
==
0
:
if
len
(
scan
)
==
0
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment