Skip to content
Snippets Groups Projects
Unverified Commit dde03e91 authored by Todor Kondic's avatar Todor Kondic
Browse files

feature: enable processing of mzMLs with missing precursors

The extraction routine was modified to first scan for missing
precursors. If there are such, then assign as precursors the nearest
preceeding MS1 signature (in exactly the same way RMassBank is doing
it). Another option is to just omit MS2 with no precursors.
parent 423a1512
No related branches found
No related tags found
No related merge requests found
......@@ -317,6 +317,8 @@ extr_data <- function(m) {
file <- m$out$tab$data[,unique(file)]
allCEs <- do.call(c,args=lapply(file,function(fn) {
z <- MSnbase::readMSData(files=fn,msLevel = c(1,2),mode="onDisk")
unique(MSnbase::collisionEnergy(z),fromLast=T)
}))
......@@ -348,14 +350,16 @@ extr_data <- function(m) {
err_rt <- m$conf$tolerance$rt
missing_precursor_info <- m$conf$extract$missing_precursor_info
x <- futuref(extract(fn=fn,
tag=the_tag,
tab=tab,
err_ms1_eic=err_ms1_eic,
err_coarse = err_coarse,
err_fine= err_fine,
err_rt= err_rt),
err_rt= err_rt,
missing_precursors = missing_precursor_info),
lazy = F)
x
......
......@@ -490,7 +490,7 @@ extr_eic_ms1 <- function(tab,err) {
}
##' @export
extract <- function(fn,tag,tab,err_ms1_eic.,err_coarse,err_fine,err_rt.) {
extract <- function(fn,tag,tab,err_ms1_eic.,err_coarse,err_fine,err_rt.,missing_precursors) {
## Extracts MS1 and MS2 EICs, as well as MS2 spectra, subject to
## tolerance specifications.
......@@ -530,6 +530,9 @@ extract <- function(fn,tag,tab,err_ms1_eic.,err_coarse,err_fine,err_rt.) {
ms2 <- MSnbase::readMSData(file=fn,msLevel=2,mode="onDisk")
ms2
}
read_all <- function() {
MSnbase::readMSData(file=fn,msLevel. = c(1,2),mode="onDisk")
}
extr_ms1_eic <- function(ms1) {
eic <- MSnbase::chromatogram(ms1,mz=mzrng,msLevel=1,missing=0.0,rt=rtrng)
bits <- dtable(N=sapply(eic,NROW))
......@@ -555,8 +558,22 @@ extract <- function(fn,tag,tab,err_ms1_eic.,err_coarse,err_fine,err_rt.) {
data.table::setkeyv(res,BASE_KEY)
res
}
ms1 <- read_ms1()
ms2 <- read_ms2()
if (missing_precursors != "do_nothing") {
ms <- clean_na_precs(read_all(),missing_precursors = missing_precursors)
fdta <- as.data.table(MSnbase::fData(ms),keep.rownames = "rn")
fdta[,idx := .I]
idx_ms1 <- fdta[msLevel == 1,idx]
idx_ms2 <- fdta[msLevel == 2,idx]
ms1 <- ms1[idx_ms1]
ms2 <- ms2[idx_ms2]
} else {
ms1 <- read_ms1()
ms2 <- read_ms2()
}
res_ms1 <- extr_ms1_eic(ms1)
res_ms2 <- extr_ms2(ms1=ms1,
ms2=ms2,
......
......@@ -793,7 +793,6 @@ base_conf <- function () {
compounds=list(lists=list(),
sets="",
data=""),
extr=list(fn=""),
debug = F)
m
}
......@@ -803,6 +802,8 @@ extr_conf <- function(m) {
"ms1 fine"=MS1_ERR_FINE,
"eic"=EIC_ERR,
"rt"=RT_EXTR_ERR)
m$conf$extract <- list(missing_precursor_info=DEF_CONF_MISSING_PCS)
m
}
......@@ -1614,3 +1615,77 @@ gen_key_plot_tab <- function(m) {
by=plot_key]
}
enum_na_scans <- function(vec) {
## Canibalised .locf function from RMassBank.
tmp <- !is.na(vec)
na_idx <- cumsum(tmp)
## The construct below deals with the fact that vec[0] ==
## integer(0) and not NA... Therefore we retrieve indexes to
## c(NA,vec) from c(0, which(vec.isna)) + 1 (the +1 shifts all
## values so the 0s point to NA).
c(NA,vec)[c(0,which(tmp))[na_idx+1]+1]
}
fix_na_precs <- function(ms) {
fd_df <- MSnbase::fData(ms)
fd <- data.table::as.data.table(fd_df, keep.rownames = "rn")
## Reset precursor numbers.
fd[,precursorScanNum := NA_real_]
## Temporarily label MS1 precursors using their acquisition
## numbers.
fd[msLevel == 1, precursorScanNum := acquisitionNum]
## Now populate the MS2 precursors based on MS1 scan numbers.
fd[,precursorScanNum := enum_na_scans(precursorScanNum)]
## Reset MS1 precursors.
fd[msLevel == 1, precursorScanNum := 0]
## Drop remaining MS2 NAs.
notna_idx <- fd[!is.na(precursorScanNum),.I]
fd <- fd[notna_idx]
notna_rns <- fd[,rn]
ms <- ms[notna_idx]
fd_res <- as.data.frame(fd)
## Fix rownames.
rownames(fd_res) <- notna_rns
## Modify header data for `ms'.
MSnbase::fData(ms) <- fd_res
ms
}
omit_na_precs <- function(ms) {
fd_df <- MSnbase::fData(ms)
fd <- data.table::as.data.table(fd_df, keep.rownames = "rn")
fd[,idx:=.I]
idx <- fd[msLevel == 1 | msLevel == 2 & !is.na(precursorScanNum),idx]
ms[idx]
}
clean_na_precs <- function(ms,missing_precursors) {
fd <- data.table::as.data.table(MSnbase::fData(ms))
na_pcs <- fd[msLevel==2 & is.na(precursorScanNum),.I]
pc_na_info <- missing_precursors
if (length(na_pcs)>0) {
warning("There are MS2 spectra with missing precursor entries in your data. Consider setting missing_precursor_info to `fill', or `omit'.")
message("Current missing_precursor_info value is: ", pc_na_info)
}
ms <- if (pc_na_info == "fill") {
fix_na_precs(ms)
} else if (pc_na_info == "omit") {
omit_na_precs(ms)
} else if (pc_na_info == "do_nothing") {
ms
} else {
stop("Fatal: m$conf$missing_precursor_info value ",pc_na_info, "is not one of `fill', `omit', or `do_nothing`.")
}
ms
}
......@@ -262,3 +262,5 @@ EMPTY_COMP_TAB <- dtable(ID=character(),
Formula=character(),
file=character())
DEF_CONF_MISSING_PCS <- "do_nothing"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment