Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
shinyscreen
Manage
Activity
Members
Labels
Plan
Wiki
External wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Environmental Cheminformatics
shinyscreen
Commits
2f9cdc4a
Commit
2f9cdc4a
authored
1 year ago
by
Todor Kondić
Browse files
Options
Downloads
Patches
Plain Diff
extraction, api: Removed old extraction mechanism.
parent
f99bf3ec
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
NAMESPACE
+0
-3
0 additions, 3 deletions
NAMESPACE
R/api.R
+4
-190
4 additions, 190 deletions
R/api.R
R/extraction.R
+0
-600
0 additions, 600 deletions
R/extraction.R
with
4 additions
and
793 deletions
NAMESPACE
+
0
−
3
View file @
2f9cdc4a
...
...
@@ -6,7 +6,6 @@ export(conf_trans)
export(create_plots)
export(create_stub_gui)
export(extr_data)
export(extract)
export(get_fn_comp)
export(get_fn_conf)
export(get_fn_extr)
...
...
@@ -47,8 +46,6 @@ export(sort_spectra)
export(subset_summary)
export(tk_save_file)
import(data.table)
importFrom(MSnbase,filterMz)
importFrom(MSnbase,readMSData)
importFrom(promises,"%...>%")
importFrom(promises,future_promise)
importFrom(shiny,HTML)
...
...
This diff is collapsed.
Click to expand it.
R/api.R
+
4
−
190
View file @
2f9cdc4a
...
...
@@ -312,26 +312,10 @@ mk_tol_funcs <- function(m) {
}
extr_data2
<-
function
(
m
)
{
message
(
"Stage: extract"
)
if
(
is.null
(
m
$
conf
$
serial
)
||
!
m
$
conf
$
serial
)
{
extr_data_future
(
m
)
}
else
{
message
(
"(extract): Serial extraction."
)
extr_data_serial
(
m
)
}
}
##' @export
extr_data
<-
function
(
m
)
{
fine
=
create_fine_table
(
m
)
## fine_with_rt = fine[!is.na(rt_min)]
## fine_wo_rt = fine[is.na(rt_min)]
## fine_mz_rt = as.matrix(fine[,.(iso_fine_min,iso_fine_max)])
## fine_rt_rt = as.matrix(fine[,.(rt_min,rt_max)])
dpath
=
m
$
run
$
paths
$
data
...
...
@@ -357,15 +341,8 @@ extr_data <-function(m) {
## Extract MS2 chromatograms.
## Create the "coarse" table. Parent masses are known with
## "coarse". We will prefilter our ms2 results based on that...
x
## "coarse". We will prefilter our ms2 results based on that...
coarse
=
create_coarse_table
(
m
)
## Filter ms2 based on coarse. TODO
## coarse_ms2 = coarse[,filter_coarse(lms[[file]],.SD,lfdata[[file]]),
## by="file"]
## Join ms1 chromatogram data to ms2, connecting by the parent scan. TODO
## cgram_ms2 = coarse_ms2[,filter_parent_scans(lms[[file]],.SD,lfdata[[file]])]
cgram_ms2
=
data.table
(
precid
=
integer
(
0
),
...
...
@@ -374,7 +351,9 @@ extr_data <-function(m) {
idx
=
integer
(
0
),
rt
=
numeric
(
0
),
intensity
=
numeric
(
0
))
## Extract MS2 spectra.
spectra
=
empty_spectra_table
()
for
(
fn
in
names
(
lfdata
))
{
...
...
@@ -394,171 +373,6 @@ extr_data <-function(m) {
}
extr_data_future
<-
function
(
m
)
{
## Reduce the comp table to only unique masses (this is because
## different sets can have same masses).
m
$
out
$
tab
$
data
<-
m
$
out
$
tab
$
comp
[,
head
(
.SD
,
1
),
by
=
BASE_KEY
]
m
$
out
$
tab
$
data
[,
set
:=
NULL
]
#This column is meaningless now.
file
<-
m
$
out
$
tab
$
data
[,
unique
(
file
)]
fpaths
<-
file.path
(
m
$
run
$
paths
$
data
,
file
)
allCEs
<-
do.call
(
c
,
args
=
lapply
(
fpaths
,
function
(
fn
)
{
z
<-
MSnbase
::
readMSData
(
files
=
fn
,
msLevel
=
c
(
1
,
2
),
mode
=
"onDisk"
)
unique
(
MSnbase
::
collisionEnergy
(
z
),
fromLast
=
T
)
}))
allCEs
<-
unique
(
allCEs
)
allCEs
<-
allCEs
[
!
is.na
(
allCEs
)]
cols
<-
paste
(
'CE'
,
allCEs
,
sep
=
''
)
vals
<-
rep
(
NA
,
length
(
cols
))
m
$
out
$
tab
$
data
[,(
cols
)
:=
.
(
rep
(
NA
,
.N
))]
file
<-
m
$
out
$
tab
$
data
[,
unique
(
file
)]
ftags
<-
m
$
out
$
tab
$
data
[,
.
(
tag
=
unique
(
tag
)),
by
=
file
]
fpaths
<-
file.path
(
m
$
run
$
paths
$
data
,
ftags
[,
file
])
futuref
<-
m
$
future
tmp
<-
lapply
(
1
:
nrow
(
ftags
),
function
(
ii
)
{
fn
<-
fpaths
[[
ii
]]
the_tag
<-
ftags
[
ii
,
tag
]
message
(
"(extract): Commencing extraction for tag: "
,
the_tag
,
"; file: "
,
fn
)
tab
<-
as.data.frame
(
data.table
::
copy
(
m
$
out
$
tab
$
data
[
tag
==
the_tag
,
.
(
file
,
tag
,
adduct
,
mz
,
rt
,
ID
)]))
## err_ms1_eic <- m$extr$tol$eic
## err_coarse_fun <- m$extr$tol$coarse
## err_fine_fun <- m$extr$tol$fine
## err_rt <- m$extr$tol$rt
err_coarse
<-
m
$
conf
$
tolerance
[[
"ms1 coarse"
]]
err_fine
<-
m
$
conf
$
tolerance
[[
"ms1 fine"
]]
err_ms1_eic
<-
m
$
conf
$
tolerance
$
eic
err_rt
<-
m
$
conf
$
tolerance
$
rt
missing_precursor_info
<-
m
$
conf
$
extract
$
missing_precursor_info
x
<-
futuref
(
extract
(
fn
=
fn
,
tag
=
the_tag
,
tab
=
tab
,
err_ms1_eic
=
err_ms1_eic
,
err_coarse
=
err_coarse
,
err_fine
=
err_fine
,
err_rt
=
err_rt
,
missing_precursors
=
missing_precursor_info
),
lazy
=
F
)
x
})
msk
<-
sapply
(
tmp
,
future
::
resolved
)
curr_done
<-
which
(
msk
)
for
(
x
in
curr_done
)
{
message
(
"Done extraction for "
,
future
::
value
(
tmp
[[
x
]])
$
ms1
$
tag
[[
1
]])
}
while
(
!
all
(
msk
))
{
msk
<-
sapply
(
tmp
,
future
::
resolved
)
newly_done
<-
which
(
msk
)
for
(
x
in
setdiff
(
newly_done
,
curr_done
))
{
message
(
"Done extraction for "
,
future
::
value
(
tmp
[[
x
]])
$
ms1
$
tag
[[
1
]])
}
Sys.sleep
(
0.5
)
curr_done
<-
newly_done
}
ztmp
<-
lapply
(
tmp
,
future
::
value
)
m
$
extr
$
ms1
<-
data.table
::
rbindlist
(
lapply
(
ztmp
,
function
(
x
)
x
$
ms1
))
m
$
extr
$
ms2
<-
data.table
::
rbindlist
(
lapply
(
ztmp
,
function
(
x
)
x
$
ms2
))
data.table
::
setkeyv
(
m
$
extr
$
ms1
,
BASE_KEY
)
data.table
::
setkeyv
(
m
$
extr
$
ms2
,
c
(
BASE_KEY
,
"CE"
))
fn_ex
<-
get_fn_extr
(
m
)
timetag
<-
format
(
Sys.time
(),
"%Y%m%d_%H%M%S"
)
saveRDS
(
object
=
m
,
file
=
file.path
(
m
$
run
$
paths
$
project
,
FN_EXTR_STATE
))
m
}
extr_data_serial
<-
function
(
m
)
{
## Reduce the comp table to only unique masses (this is because
## different sets can have same masses).
m
$
out
$
tab
$
data
<-
m
$
out
$
tab
$
comp
[,
head
(
.SD
,
1
),
by
=
BASE_KEY
]
m
$
out
$
tab
$
data
[,
set
:=
NULL
]
#This column is meaningless now.
file
<-
m
$
out
$
tab
$
data
[,
unique
(
file
)]
fpaths
<-
file.path
(
m
$
run
$
paths
$
data
,
file
)
allCEs
<-
do.call
(
c
,
args
=
lapply
(
fpaths
,
function
(
fn
)
{
z
<-
MSnbase
::
readMSData
(
files
=
fn
,
msLevel
=
c
(
1
,
2
),
mode
=
"onDisk"
)
unique
(
MSnbase
::
collisionEnergy
(
z
),
fromLast
=
T
)
}))
allCEs
<-
unique
(
allCEs
)
allCEs
<-
allCEs
[
!
is.na
(
allCEs
)]
cols
<-
paste
(
'CE'
,
allCEs
,
sep
=
''
)
vals
<-
rep
(
NA
,
length
(
cols
))
m
$
out
$
tab
$
data
[,(
cols
)
:=
.
(
rep
(
NA
,
.N
))]
file
<-
file.path
(
m
$
run
$
paths
$
data
,
m
$
out
$
tab
$
data
[,
unique
(
file
)])
ftags
<-
m
$
out
$
tab
$
data
[,
.
(
tag
=
unique
(
tag
)),
by
=
file
]
ftags
[,
path
:=
file.path
(
..m
$
run
$
paths
$
data
,
file
)]
futuref
<-
m
$
future
tmp
<-
lapply
(
1
:
nrow
(
ftags
),
function
(
ii
)
{
fn
<-
ftags
[
ii
,
path
]
the_tag
<-
ftags
[
ii
,
tag
]
message
(
"(extract): Commencing extraction for tag: "
,
the_tag
,
"; file: "
,
fn
)
tab
<-
as.data.frame
(
data.table
::
copy
(
m
$
out
$
tab
$
data
[
tag
==
the_tag
,
.
(
file
,
tag
,
adduct
,
mz
,
rt
,
ID
)]))
## err_ms1_eic <- m$extr$tol$eic
## err_coarse_fun <- m$extr$tol$coarse
## err_fine_fun <- m$extr$tol$fine
## err_rt <- m$extr$tol$rt
err_coarse
<-
m
$
conf
$
tolerance
[[
"ms1 coarse"
]]
err_fine
<-
m
$
conf
$
tolerance
[[
"ms1 fine"
]]
err_ms1_eic
<-
m
$
conf
$
tolerance
$
eic
err_rt
<-
m
$
conf
$
tolerance
$
rt
missing_precursor_info
<-
m
$
conf
$
extract
$
missing_precursor_info
x
<-
extract
(
fn
=
fn
,
tag
=
the_tag
,
tab
=
tab
,
err_ms1_eic
=
err_ms1_eic
,
err_coarse
=
err_coarse
,
err_fine
=
err_fine
,
err_rt
=
err_rt
,
missing_precursors
=
missing_precursor_info
)
message
(
"Done extraction for "
,
x
$
ms1
$
tag
[[
1
]])
x
})
ztmp
<-
tmp
m
$
extr
$
ms1
<-
data.table
::
rbindlist
(
lapply
(
ztmp
,
function
(
x
)
x
$
ms1
))
m
$
extr
$
ms2
<-
data.table
::
rbindlist
(
lapply
(
ztmp
,
function
(
x
)
x
$
ms2
))
data.table
::
setkeyv
(
m
$
extr
$
ms1
,
BASE_KEY
)
data.table
::
setkeyv
(
m
$
extr
$
ms2
,
c
(
BASE_KEY
,
"CE"
))
fn_ex
<-
get_fn_extr
(
m
)
timetag
<-
format
(
Sys.time
(),
"%Y%m%d_%H%M%S"
)
saveRDS
(
object
=
m
,
file
=
file.path
(
m
$
run
$
paths
$
project
,
FN_EXTR_STATE
))
m
}
##' @export
conf_trans
<-
function
(
conf
)
{
conf
$
prescreen
<-
conf_trans_pres
(
conf
$
prescreen
)
...
...
This diff is collapsed.
Click to expand it.
R/extraction.R
+
0
−
600
View file @
2f9cdc4a
...
...
@@ -12,606 +12,6 @@
## See the License for the specific language governing permissions and
## limitations under the License.
load_raw_data
<-
function
(
fn
,
mode
=
"inMemory"
)
{
ms1
<-
MSnbase
::
readMSData
(
files
=
fn
,
mode
=
mode
,
msLevel.
=
1
)
ms2
<-
MSnbase
::
readMSData
(
files
=
fn
,
mode
=
mode
,
msLevel.
=
2
)
c
(
ms1
=
ms1
,
ms2
=
ms2
)
}
centroided1
<-
function
(
ms
)
{
if
(
all
(
MSnbase
::
centroided
(
ms
))
==
T
)
return
(
T
)
else
{
state
<-
MSnbase
::
isCentroided
(
ms
)
N
<-
length
(
state
)
fls
<-
length
(
which
(
state
==
F
))
if
(
fls
/
(
1
.
*
N
)
<
0.01
)
T
else
F
}
}
centroided
<-
function
(
msvec
)
{
if
(
is.vector
(
msvec
))
{
f
<-
list
()
for
(
i
in
1
:
length
(
msvec
))
{
f
[[
i
]]
<-
future
::
future
(
centroided1
(
msvec
[[
i
]]))
}
lapply
(
f
,
FUN
=
future
::
value
)
}
else
{
centroided1
(
msvec
)
}
}
acq_mz
<-
function
(
tabFn
)
{
df
<-
read.csv
(
tabFn
,
stringsAsFactors
=
F
,
comment.char
=
''
)
x
<-
as.numeric
(
df
$
mz
)
names
(
x
)
<-
paste
(
"ID:"
,
as.character
(
df
$
ID
),
sep
=
''
)
x
}
name2id
<-
function
(
nm
)
{
as.integer
(
substring
(
nm
,
4
))}
id2name
<-
function
(
id
)
{
paste
(
"ID:"
,
id
,
sep
=
''
)}
ppm2dev
<-
function
(
m
,
ppm
)
1e-6
*
ppm
*
m
gen_mz_range
<-
function
(
mz
,
err
)
{
mat
<-
matrix
(
data
=
numeric
(
1
),
nrow
=
length
(
mz
),
ncol
=
2
)
mat
[,
1
]
<-
mz
-
err
mat
[,
2
]
<-
mz
+
err
mat
}
gen_rt_range
<-
function
(
rt
,
err
)
{
mat
<-
matrix
(
data
=
numeric
(
1
),
nrow
=
length
(
rt
),
ncol
=
2
)
rV
<-
which
(
!
is.na
(
rt
))
rNA
<-
which
(
is.na
(
rt
))
mat
[
rV
,
1
]
<-
(
rt
[
rV
]
-
err
)
*
60
mat
[
rV
,
2
]
<-
(
rt
[
rV
]
+
err
)
*
60
mat
[
rNA
,
1
]
<--
Inf
mat
[
rNA
,
2
]
<-
Inf
mat
}
filt_ms2_by_prcs
<-
function
(
ms2
,
mzrng
,
ids
,
adduct
)
{
pre
<-
MSnbase
::
precursorMz
(
ms2
)
psn
<-
MSnbase
::
precScanNum
(
ms2
)
acN
<-
MSnbase
::
acquisitionNum
(
ms2
)
nR
<-
length
(
pre
)
inRange
<-
function
(
i
)
{
mp
<-
pre
[[
i
]]
x
<-
mzrng
[,
1
]
<
mp
&
mp
<
mzrng
[,
2
]
ind
<-
which
(
x
)
sids
<-
ids
[
ind
]
add
<-
adduct
[
ind
]
dtable
(
ID
=
sids
,
adduct
=
add
)
}
lst
<-
lapply
(
1
:
nR
,
function
(
i
)
{
dt
<-
inRange
(
i
)
list
(
n
=
i
,
prec_scan
=
psn
[[
i
]],
aN
=
acN
[[
i
]],
ids
=
dt
$
ID
,
adduct
=
dt
$
adduct
)
})
nemp
<-
sapply
(
lst
,
function
(
m
)
length
(
m
$
ids
)
>
0
)
wrk
<-
lst
[
nemp
]
dfL
<-
sum
(
sapply
(
wrk
,
function
(
w
)
length
(
w
$
ids
)))
df
<-
dtable
(
ID
=
character
(
dfL
),
adduct
=
character
(
dfL
),
prec_scan
=
integer
(
dfL
),
aN
=
integer
(
dfL
),
OK
=
logical
(
dfL
))
df
$
OK
<-
T
#TODO Introduced for testing, be careful.
offD
<
-0
for
(
m
in
wrk
)
{
l
<-
length
(
m
$
ids
)
rng
<-
(
offD
+1
)
:
(
offD
+
l
)
df
[
rng
,
"ID"
]
<-
m
$
ids
df
[
rng
,
"prec_scan"
]
<-
m
$
prec_scan
df
[
rng
,
"aN"
]
<-
m
$
aN
df
[
rng
,
"adduct"
]
<-
m
$
adduct
offD
<-
offD
+
l
}
df
[
order
(
df
$
aN
),]
}
filt_ms2_by_prcs_ht
<-
function
(
ms2
,
mzrng
,
ids
,
adduct
)
{
lgnd
<-
filt_ms2_by_prcs
(
ms2
,
mzrng
=
mzrng
,
ids
=
ids
,
adduct
=
adduct
)
scans
<-
unique
(
lgnd
$
aN
)
ns
<-
which
(
MSnbase
::
acquisitionNum
(
ms2
)
%in%
scans
)
sms2
<-
ms2
[
ns
]
list
(
ms2
=
sms2
,
leg
=
lgnd
)
}
pick_unique_precScans
<-
function
(
idx
)
{
ps
<-
unique
(
idx
$
prec_scan
)
mind
<-
match
(
ps
,
idx
$
prec_scan
)
data.frame
(
prec_scan
=
idx
$
prec_scan
[
mind
],
ID
=
idx
$
ID
[
mind
],
adduct
=
idx
$
adduct
[
mind
],
stringsAsFactors
=
F
)
}
pick_uniq_pscan
<-
function
(
leg
)
{
res
<-
leg
[,
.
(
prec_scan
=
unique
(
prec_scan
)),
by
=
c
(
"ID"
,
"adduct"
)]
res
[
order
(
prec_scan
),]
## ids<-unique(leg$ID)
## x<-lapply(ids,function(id) {ups<-unique(leg[id==leg$ID,"prec_scan"]);data.frame(ID=rep(id,length(ups)),prec_scan=ups,stringsAsFactors = F)})
## res<-do.call(rbind,c(x,list(stringsAsFactors=F)))
## res[order(res$prec_scan),]
}
verif_prec_fine_ht
<-
function
(
preLeg
,
ms1
,
mz
,
mzrng
,
ids
,
adduct
)
{
## TODO FIXME TESTPHASE Something goes wrong here, all mapply results are
## not OK. More testing needed. ... huh? But, it works now? (23/02/2022)
df
<-
preLeg
xx
<-
dtable
(
adduct
=
adduct
,
ID
=
ids
,
mz
=
mz
,
mz1
=
mzrng
[,
1
],
mz2
=
mzrng
[,
2
])
df
<-
preLeg
[
xx
,
on
=
c
(
"ID"
,
"adduct"
)]
df
$
ipns
<-
match
(
df
$
prec_scan
,
MSnbase
::
acquisitionNum
(
ms1
))
df
[,
(
"mzsp"
)
:=
.
(
lapply
(
ipns
,
function
(
ip
)
if
(
!
is.na
(
ip
))
MSnbase
::
mz
(
ms1
[[
ip
]])
else
NA_real_
))]
df
$
OK
<-
mapply
(
function
(
m1
,
sp
,
m2
)
any
((
m1
<
sp
)
&
(
sp
<
m2
)),
df
$
mz1
,
df
$
mzsp
,
df
$
mz2
)
res
<-
df
[
df
$
OK
,]
res
$
ipns
<-
NULL
res
$
mz1
<-
NULL
res
$
mz2
<-
NULL
res
$
mzsp
<-
NULL
res
}
filt_ms2
<-
function
(
ms1
,
ms2
,
mz
,
errCoarse
,
errFinePPM
)
{
tmp
<-
filt_ms2_by_prcs_ht
(
ms2
,
mz
,
errCoarse
=
errCoarse
)
legMS2
<-
tmp
$
leg
legPcs
<-
pick_uniq_pscan
(
legMS2
)
legPcs
<-
verif_prec_fine_ht
(
legPcs
,
ms1
=
ms1
,
mz
=
mz
,
errFinePPM
=
errFinePPM
)
x
<-
Map
(
function
(
id
,
psn
)
{
legMS2
[
id
==
legMS2
$
ID
&
psn
==
legMS2
$
prec_scan
,]},
legPcs
[,
"ID"
],
legPcs
[,
"prec_scan"
])
x
<-
do.call
(
rbind
,
c
(
x
,
list
(
make.row.names
=
F
,
stringsAsFactors
=
F
)))[
c
(
"ID"
,
"aN"
)]
rownames
(
x
)
<-
NULL
x
<-
x
[
order
(
x
$
aN
),]
uids
<-
unique
(
x
$
ID
)
acN
<-
MSnbase
::
acquisitionNum
(
ms2
)
res
<-
lapply
(
uids
,
function
(
id
)
{
x
<-
ms2
[
match
(
x
[
id
==
x
$
ID
,
"aN"
],
acN
)]
fData
(
x
)[,
"rtm"
]
<-
MSnbase
::
rtime
(
x
)
/
60
.
fData
(
x
)[,
"maxI"
]
<-
sapply
(
MSnbase
::
intensity
(
x
),
max
)
x
})
names
(
res
)
<-
uids
res
}
filt_ms2_fine
<-
function
(
ms1
,
ms2
,
mz
,
ids
,
adduct
,
err_coarse_fun
,
err_fine_fun
)
{
## This function is supposed to extract only those MS2 spectra for
## which it is proven that the precursor exists within the fine
## error range.
mzrng_c
<-
gen_mz_range
(
mz
,
err_coarse_fun
(
mz
))
mzrng_f
<-
gen_mz_range
(
mz
,
err_fine_fun
(
mz
))
tmp
<-
filt_ms2_by_prcs_ht
(
ms2
,
mzrng
=
mzrng_c
,
ids
=
ids
,
adduct
=
adduct
)
legMS2
<-
tmp
$
leg
message
(
"nrow legMS2:"
,
nrow
(
legMS2
))
legPcs
<-
pick_uniq_pscan
(
legMS2
)
legPcs
<-
verif_prec_fine_ht
(
legPcs
,
ms1
=
ms1
,
mz
=
mz
,
mzrng
=
mzrng_f
,
ids
=
ids
,
adduct
=
adduct
)
## x<-Map(function (id,psn,a) {legMS2[id==legMS2$ID & a==legMS2$adduct & psn==legMS2$prec_scan,]},legPcs[,"ID"],legPcs[,"prec_scan"],legPcs[,"adduct"])
## x <- data.table::rbindlist(x)[,.(ID,adduct,aN)]
x
<-
legMS2
[
legPcs
[,
.
(
ID
,
adduct
,
prec_scan
)],
on
=
c
(
"ID"
,
"adduct"
,
"prec_scan"
)]
## x<-do.call(rbind,c(x,list(make.row.names=F,stringsAsFactors=F)))[c("ID","aN")]
## rownames(x)<-NULL
x
<-
x
[
order
(
x
$
aN
),]
x
}
extr_ms2
<-
function
(
ms1
,
ms2
,
ids
,
mz
,
adduct
,
err_coarse_fun
,
err_fine_fun
)
{
## Extraction of MS2 EICs and spectra.
x
<-
filt_ms2_fine
(
ms1
=
ms1
,
ms2
=
ms2
,
mz
=
mz
,
ids
=
ids
,
adduct
=
adduct
,
err_coarse_fun
=
err_coarse_fun
,
err_fine_fun
=
err_fine_fun
)
## This was here before and obviously wrong when multiple adducts
## correspond to the same ID:
##
## uids <- unique(x$ID)
## uadds <- unique(x$adduct)
idadd
<-
x
[,
unique
(
.SD
),
.SDcols
=
c
(
"ID"
,
"adduct"
)]
acN
<-
MSnbase
::
acquisitionNum
(
ms2
)
chunks
<-
Map
(
function
(
id
,
ad
)
{
ans
<-
x
[
id
==
x
$
ID
&
ad
==
x
$
adduct
,]
$
aN
sp
<-
ms2
[
which
(
acN
%in%
ans
)]
res
<-
gen_ms2_spec_blk
(
sp
)
res
$
ID
<-
id
res
$
adduct
<-
ad
res
},
idadd
$
ID
,
idadd
$
adduct
)
data.table
::
rbindlist
(
chunks
,
fill
=
T
)
}
add_ms2_prcs_scans
<-
function
(
ms2
,
idx
)
{
df
<-
idx
df
$
prec_scan
<-
integer
(
nrow
(
idx
))
for
(
i
in
1
:
nrow
(
df
))
{
sn
<-
df
$
sn
[[
i
]]
df
$
prec_scan
[[
i
]]
<-
MSnbase
::
precScanNum
(
ms2
[[
sn
]])
}
## sn<-as.integer(df$sn)
## df$prec_scan[]<-MSnbase::precScanNum(ms2[sn])
##
## This errors with: msLevel for signature "NULL" for a specific
## compound. However, the above approach is cool.
df
}
refn_ms2_by_prec
<-
function
(
idxMS2
,
preFine
)
{
pf
<-
preFine
[
preFine
$
OK
,]
pf
$
ID
<-
as.character
(
pf
$
ID
)
idxMS2
$
OK
<-
logical
(
nrow
(
idxMS2
))
idxMS2
$
ID
<-
as.character
(
idxMS2
$
ID
)
for
(
n
in
1
:
nrow
(
idxMS2
))
{
scan
<-
idxMS2
$
prec_scan
[[
n
]]
id2
<-
idxMS2
$
ID
[[
n
]]
ppf
<-
pf
[
pf
$
ID
==
id2
,]
inPF
<-
ppf
$
prec_scan
%in%
scan
idxMS2
$
OK
[[
n
]]
<-
any
(
inPF
)
}
idxMS2
}
trim_ms2_by_prec
<-
function
(
rawMS2
,
mz
,
errCoarse
,
errFinePPM
)
{
idxMS2
<-
filt_ms2_by_prcs
(
ms2
=
ms2
,
mz
=
mz
,
errCoarse
=
errCoarse
)
}
grab_ms2_spec
<-
function
(
idx
,
raw
)
{
idx
<-
idx
[
idx
$
OK
,]
IDs
<-
unique
(
idx
$
ID
)
res
<-
lapply
(
IDs
,
function
(
id
)
{
sn
<-
idx
$
sn
[
idx
$
ID
==
id
]
spec
<-
raw
[
sn
]
rts
<-
MSnbase
::
rtime
(
spec
)
lmz
<-
MSnbase
::
mz
(
spec
)
lI
<-
MSnbase
::
intensity
(
spec
)
rts
<-
rts
/
60
.
names
(
lmz
)
<-
NULL
names
(
lI
)
<-
NULL
Map
(
function
(
mz
,
I
,
rt
)
{
mat
<-
matrix
(
data
=
0.0
,
ncol
=
length
(
mz
),
nrow
=
2
,
dimnames
=
list
(
c
(
"mz"
,
"intensity"
)))
mat
[
"mz"
,]
<-
as.numeric
(
mz
)
mat
[
"intensity"
,]
<-
as.numeric
(
I
)
list
(
rt
=
rt
,
spec
=
mat
)
},
lmz
,
lI
,
rts
)
})
names
(
res
)
<-
IDs
res
}
gen_ms2_chrom
<-
function
(
ms2Spec
)
{
lapply
(
ms2Spec
,
function
(
sp
)
{
if
(
length
(
sp
)
>
0
)
{
nRow
<-
length
(
sp
)
mat
<-
matrix
(
0.0
,
nrow
=
nRow
,
ncol
=
2
)
rt
<-
sapply
(
sp
,
function
(
x
)
x
$
rt
)
ord
<-
order
(
rt
)
intn
<-
lapply
(
sp
,
function
(
x
)
max
(
x
$
spec
))
rt
<-
as.numeric
(
rt
[
ord
])
intn
<-
as.numeric
(
intn
[
ord
])
names
(
intn
)
<-
NULL
names
(
rt
)
<-
NULL
mat
[,
1
]
<-
rt
mat
[,
2
]
<-
intn
colnames
(
mat
)
<-
c
(
"rt"
,
"intensity"
)
mat
}
else
list
()
})
}
gen_ms1_chrom
<-
function
(
raw
,
mz
,
errEIC
,
id
,
rt
=
NULL
,
errRT
=
NULL
)
{
mzRng
<-
gen_mz_range
(
mz
,
err
=
errEIC
)
rtRng
<-
gen_rt_range
(
rt
,
err
=
errRT
)
x
<-
MSnbase
::
chromatogram
(
raw
,
mz
=
mzRng
,
msLevel
=
1
,
missing
=
0.0
,
rt
=
rtRng
)
res
<-
lapply
(
x
,
function
(
xx
)
{
rt
<-
MSnbase
::
rtime
(
xx
)
/
60
.
ints
<-
MSnbase
::
intensity
(
xx
)
df
<-
dtable
(
rt
=
rt
,
intensity
=
ints
)
df
})
names
(
res
)
<-
id
res
}
gen_ms1_chrom_ht
<-
function
(
raw
,
mz
,
errEIC
,
rt
=
NULL
,
errRT
=
NULL
)
{
mzRng
<-
gen_mz_range
(
mz
,
err
=
errEIC
)
rtRng
<-
gen_rt_range
(
rt
,
err
=
errRT
)
res
<-
MSnbase
::
chromatogram
(
raw
,
mz
=
mzRng
,
msLevel
=
1
,
missing
=
0.0
,
rt
=
rtRng
)
fData
(
res
)[[
"ID"
]]
<-
rownames
(
mzRng
)
res
}
get_ext_width
<-
function
(
maxid
)
{
as.integer
(
log10
(
maxid
)
+1
)}
id_fn_ext
<-
function
(
width
,
id
)
{
formatC
(
as.numeric
(
id
),
width
=
width
,
flag
=
0
)
}
write_eic
<-
function
(
eic
,
suff
=
"eic.csv"
,
dir
=
"."
,
width
=
get_ext_width
(
max
(
as.numeric
(
names
(
eic
)))))
{
Map
(
function
(
e
,
n
)
{
if
(
length
(
e
)
>
0
)
{
fn
<-
file.path
(
dir
,
paste
(
id_fn_ext
(
width
,
n
),
suff
,
sep
=
"."
))
tab2file
(
tab
=
e
,
file
=
fn
)
}
},
eic
,
names
(
eic
))
}
write_ms2_spec
<-
function
(
ms2Spec
,
dir
=
"."
)
{
ids
<-
as.numeric
(
names
(
ms2Spec
))
maxid
<-
max
(
ids
)
width
<-
get_ext_width
(
maxid
)
for
(
id
in
names
(
ms2Spec
))
{
sp
<-
ms2Spec
[[
id
]]
if
(
length
(
sp
)
>
0
)
{
dr
<-
file.path
(
dir
,
id_fn_ext
(
width
,
id
))
dir.create
(
path
=
dr
,
showWarnings
=
F
)
for
(
s
in
sp
)
{
fn
<-
file.path
(
dr
,
paste
(
"RT_"
,
s
$
rt
,
"_spectrum.csv"
,
sep
=
""
))
df
<-
t
(
s
$
spec
)
colnames
(
df
)
<-
c
(
"mz"
,
"intensity"
)
tab2file
(
tab
=
df
,
file
=
fn
)
}
}
}
}
extr_msnb
<-
function
(
file
,
wd
,
mz
,
errEIC
,
errFinePPM
,
errCoarse
=
0.5
,
rt
=
NULL
,
errRT
=
NULL
,
mode
=
"inMemory"
)
{
## Perform the entire data extraction procedure.
##
## file - The input mzML file.
## wd - Top-level directory where the results should be deposited.
## mz - A named vector of precursor masses for which to scan the
## file. The names can be RMassBank IDs.
## rt - A named vector of length 1, or same as mz, giving the retention
## times in minutes. The names should be the same as for mz.
## errRT - A vector of length 1, or same as mz, giving the
## half-width of the time window in which the peak for the
## corresponding mz is supposed to be.
## errEIC - Absolute mz tolerance used to extract precursor EICs.
## errFinePPM - Tolerance given in PPM used to associate input
## masses with what the instrument assigned as precursors to MS2
## products.
message
(
"Loading "
,
file
,
" in mode "
,
mode
,
"."
)
data
<-
load_raw_data
(
file
,
mode
=
mode
)
ms1
<-
data
[[
"ms1"
]]
ms2
<-
data
[[
"ms2"
]]
message
(
"Done loading "
,
file
,
"."
)
## EICs for precursors.
message
(
"Extracting precursor EICs. Please wait."
)
eicMS1
<-
gen_ms1_chrom
(
raw
=
ms1
,
mz
=
mz
,
errEIC
=
errEIC
,
rt
=
rt
,
errRT
=
errRT
)
write_eic
(
eicMS1
,
dir
=
wd
)
message
(
"Extracting precursor EICs finished."
)
## Extract MS2 spectra.
message
(
"Extracting MS2 spectra."
)
idxMS2
<-
filt_ms2_by_prcs
(
ms2
=
ms2
,
mz
=
mz
,
errCoarse
=
errCoarse
)
message
(
"Resampling MS2 spectra."
)
# idxMS2<-add_ms2_prcs_scans(ms2,idxMS2)
prsc
<-
pick_unique_precScans
(
idxMS2
)
vprsc
<-
verif_prec_fine
(
preSc
=
prsc
,
ms1
=
ms1
,
mz
=
mz
,
errFinePPM
=
errFinePPM
)
idxMS2
<-
refn_ms2_by_prec
(
idxMS2
=
idxMS2
,
preFine
=
vprsc
)
message
(
"Resampling MS2 spectra finished."
)
ms2Spec
<-
grab_ms2_spec
(
idxMS2
,
ms2
)
eicMS2
<-
gen_ms2_chrom
(
ms2Spec
)
message
(
"Extracting MS2 spectra finished."
)
write_eic
(
eicMS2
,
dir
=
wd
,
suff
=
"kids.csv"
,
width
=
get_ext_width
(
max
(
as.numeric
(
names
(
eicMS1
)))))
specDir
<-
file.path
(
wd
,
"ms2_spectra"
)
dir.create
(
specDir
,
showWarnings
=
F
)
write_ms2_spec
(
ms2Spec
,
dir
=
specDir
)
message
(
"Done with "
,
file
)
}
##' @importFrom MSnbase filterMz
extr_msnb_ht
<-
function
(
file
,
wd
,
mz
,
errEIC
,
errFinePPM
,
errCoarse
,
fnSpec
,
rt
=
NULL
,
errRT
=
NULL
,
mode
=
"onDisk"
)
{
## Perform the entire data extraction procedure.
##
## file - The input mzML file.
## wd - Top-level directory where the results should be deposited.
## mz - A named vector of precursor masses for which to scan the
## file. The names can be RMassBank IDs.
## rt - A named vector of length 1, or same as mz, giving the retention
## times in minutes. The names should be the same as for mz.
## errRT - A vector of length 1, or same as mz, giving the
## half-width of the time window in which the peak for the
## corresponding mz is supposed to be.
## errEIC - Absolute mz tolerance used to extract precursor EICs.
## errFinePPM - Tolerance given in PPM used to associate input
## masses with what the instrument assigned as precursors to MS2
## products.
message
(
"Loading "
,
file
,
" in mode "
,
mode
,
"."
)
data
<-
load_raw_data
(
file
,
mode
=
mode
)
ms1
<-
data
[[
"ms1"
]]
ms2
<-
data
[[
"ms2"
]]
message
(
"Done loading "
,
file
,
"."
)
## Filtering
mzCrs
<-
gen_mz_range
(
mz
=
mz
,
err
=
errCoarse
)
mzMin
<-
min
(
mzCrs
)
mzMax
<-
max
(
mzCrs
)
ms1
<-
filterMz
(
ms1
,
c
(
mzMin
,
mzMax
))
fms2
<-
filt_ms2
(
ms1
,
ms2
,
mz
,
errCoarse
=
errCoarse
,
errFinePPM
=
errFinePPM
)
## EICs for precursors.
message
(
"Extracting precursor EICs. Please wait."
)
eicMS1
<-
gen_ms1_chrom_ht
(
raw
=
ms1
,
mz
=
mz
,
errEIC
=
errEIC
,
rt
=
rt
,
errRT
=
errRT
)
message
(
"Extracting precursor EICs finished."
)
x
<-
list
(
eic
=
eicMS1
,
ms2
=
fms2
)
saveRDS
(
object
=
x
,
file
=
file.path
(
wd
,
fnSpec
))
x
}
extr_eic_ms1
<-
function
(
tab
,
err
)
{
## Asynchronous extraction of ms1 spectra. The result is a list of
## running futures.
file
<-
unique
(
tab
$
file
)
res
<-
lapply
(
file
,
function
(
fn
)
future
::
futur
(
extr_fn
(
fn
),
lazy
=
T
))
names
(
res
)
<-
file
res
}
##' @importFrom MSnbase filterMz readMSData
##' @export
extract
<-
function
(
fn
,
tag
,
tab
,
err_ms1_eic.
,
err_coarse
,
err_fine
,
err_rt.
,
missing_precursors
)
{
## Extracts MS1 and MS2 EICs, as well as MS2 spectra, subject to
## tolerance specifications.
## TODO: Still detecting external references ... but which?
## However, the results check out, compared to sequential access.
err_coarse_fun
<-
gen_mz_err_f
(
err_coarse
,
"ms1 coarse error: Only ppm, or Da units allowed."
)
err_fine_fun
<-
gen_mz_err_f
(
err_fine
,
"ms1 fine error: Only ppm, or Da units allowed."
)
err_ms1_eic
<-
gen_mz_err_f
(
err_ms1_eic.
,
"eic error: Only ppm, or Da units allowed."
)
err_rt
<-
gen_rt_err
(
err_rt.
,
"rt error: Only s(econds), or min(utes) allowed."
)
tab
<-
data.table
::
as.data.table
(
tab
)
## chunk <- tab[file==fn]
mz
<-
tab
$
mz
rt
<-
tab
$
rt
id
<-
tab
$
ID
adduct
<-
tab
$
adduct
names
(
mz
)
<-
id
names
(
rt
)
<-
id
mzerr
<-
err_coarse_fun
(
mz
)
mzrng
<-
gen_mz_range
(
mz
=
mz
,
err
=
mzerr
)
rtrng
<-
gen_rt_range
(
rt
=
rt
,
err
=
err_rt
)
mzmin
<-
min
(
mzrng
)
mzmax
<-
max
(
mzrng
)
read_ms1
<-
function
()
{
ms1
<-
readMSData
(
file
=
fn
,
msLevel
=
1
,
mode
=
"onDisk"
)
ms1
<-
filterMz
(
ms1
,
mz
=
c
(
mzmin
,
mzmax
),
msLevel
=
1
)
ms1
}
read_ms2
<-
function
()
{
ms2
<-
MSnbase
::
readMSData
(
file
=
fn
,
msLevel
=
2
,
mode
=
"onDisk"
)
ms2
}
read_all
<-
function
()
{
MSnbase
::
readMSData
(
file
=
fn
,
msLevel.
=
c
(
1
,
2
),
mode
=
"onDisk"
)
}
extr_ms1_eic
<-
function
(
ms1
)
{
eic
<-
MSnbase
::
chromatogram
(
ms1
,
mz
=
mzrng
,
msLevel
=
1
,
missing
=
0.0
,
rt
=
rtrng
)
bits
<-
dtable
(
N
=
sapply
(
eic
,
NROW
))
bigN
<-
bits
[,
sum
(
N
)]
bits
[,
idx
:=
paste0
(
'I'
,
.I
)]
bits
$
ID
<-
id
bits
$
adduct
<-
adduct
bits
$
tag
<-
tag
res
<-
dtable
(
rt
=
numeric
(
bigN
),
intensity
=
numeric
(
bigN
),
tag
=
tag
,
adduct
=
bits
[,
rep
(
adduct
,
N
)],
ID
=
bits
[,
rep
(
ID
,
N
)],
idx
=
bits
[,
rep
(
idx
,
N
)])
data.table
::
setkey
(
res
,
idx
)
names
(
eic
)
<-
bits
$
idx
res
[,
c
(
"rt"
,
"intensity"
)
:=
.
(
MSnbase
::
rtime
(
eic
[[
idx
]])
/
60
.
,
MSnbase
::
intensity
(
eic
[[
idx
]])),
by
=
idx
]
data.table
::
setkeyv
(
res
,
BASE_KEY
)
res
}
if
(
is.null
(
missing_precursors
))
missing_precursors
<-
"do_nothing"
if
(
missing_precursors
!=
"do_nothing"
)
{
ms
<-
clean_na_precs
(
read_all
(),
missing_precursors
=
missing_precursors
)
fdta
<-
as.data.table
(
MSnbase
::
fData
(
ms
),
keep.rownames
=
"rn"
)
fdta
[,
idx
:=
.I
]
idx_ms1
<-
fdta
[
msLevel
==
1
,
idx
]
idx_ms2
<-
fdta
[
msLevel
==
2
,
idx
]
ms1
<-
ms
[
idx_ms1
]
ms2
<-
ms
[
idx_ms2
]
}
else
{
ms1
<-
read_ms1
()
ms2
<-
read_ms2
()
}
res_ms1
<-
extr_ms1_eic
(
ms1
)
res_ms2
<-
extr_ms2
(
ms1
=
ms1
,
ms2
=
ms2
,
ids
=
id
,
mz
=
mz
,
adduct
=
adduct
,
err_coarse_fun
=
err_coarse_fun
,
err_fine_fun
=
err_fine_fun
)
res_ms2
[,
"tag"
:=
tag
]
## Clean all the NA intensity MS2 (sometimes a consequence of 'fill'.)
res_ms2
<-
res_ms2
[
!
is.na
(
intensity
)]
res
<-
list
(
ms1
=
res_ms1
,
ms2
=
res_ms2
)
res
}
gen_ms2_spec_blk
<-
function
(
spectra
)
{
dt
<-
dtable
(
mz
=
MSnbase
::
mz
(
spectra
),
intensity
=
MSnbase
::
intensity
(
spectra
),
rt
=
lapply
(
MSnbase
::
rtime
(
spectra
),
function
(
z
)
z
/
60
.
),
CE
=
MSnbase
::
collisionEnergy
(
spectra
),
an
=
MSnbase
::
acquisitionNum
(
spectra
))
dt
[,
maspI
:=
sapply
(
intensity
,
function
(
zz
)
max
(
zz
))]
data.table
::
rbindlist
(
apply
(
dt
,
1
,
function
(
row
)
dtable
(
intensity
=
row
[[
"intensity"
]],
rt
=
row
[[
"rt"
]],
mz
=
row
[[
"mz"
]],
CE
=
row
[[
"CE"
]],
an
=
row
[[
"an"
]])))
}
## NEW FUNCTIONS.
create_fine_table
<-
function
(
m
)
{
## Select fine mz-ranges and split them into those with rt entries
## and those without.
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment