diff --git a/R/mix.R b/R/mix.R index 7df790f6098ef5c5a725d20d1b9b7b07bcab312d..b4716b55c605db75e9923d6ec74cefaa604007f5 100644 --- a/R/mix.R +++ b/R/mix.R @@ -354,80 +354,6 @@ v<-function(fn_data,stgs_alist,wd,fn_cmpd_list,mode,readMethod="mzR",archdir="ar } } - -##' Prescreens. Ripped off from ReSOLUTION -##' -##' -##' @title Prescreen -##' @param archive_name ... -##' @param RMB_mode ... -##' @param FileList ... -##' @param cmpd_list ... -##' @param ppm_limit_fine ... -##' @param EIC_limit ... -##' @author Emma Schymanski, Todor Kondić -RMB_EIC_prescreen_intrn <- function (archive_name, RMB_mode, FileList, cmpd_list, - ppm_limit_fine = 10, EIC_limit = 0.001) { - pdf_title <- paste(archive_name, "_EICscan.pdf", sep = "") - pdf(pdf_title) - n_spec <- 0 - cmpd_RT_maxI <- "" - msms_found <- "" - rts <- 0 - max_I_prec <- "" - cmpd_RT_maxI_min <- "" - file_list <- read.csv(FileList, stringsAsFactors = FALSE) - cmpd_info <- read.csv(cmpd_list, stringsAsFactors = FALSE) - f <- mzR::openMSfile(file_list$Files[1]) - for (i in 1:length(file_list$ID)) { - cpdID <- file_list$ID[i] - n_spec <- n_spec + 1 - smiles <- tryCatch(RMassBank::findSmiles(cpdID), error = function(e) NA) - if (!is.na(smiles)) { - mz <- as.numeric(RMassBank::findMz(cpdID, RMB_mode)[3]) - } - else { - mz <- as.numeric(RMassBank::findMz(cpdID, RMB_mode, retrieval = "unknown")[3]) - } - eic <- RMassBank::findEIC(f, mz, limit = EIC_limit) - msms_found[n_spec] <- FALSE - msms <- RMassBank::findMsMsHR.mass(f, mz, 0.5, RMassBank::ppm(mz, ppm_limit_fine, - p = TRUE)) - max_I_prec_index <- which.max(eic$intensity) - cmpd_RT_maxI[n_spec] <- eic[max_I_prec_index, 1] - max_I_prec[n_spec] <- eic[max_I_prec_index, 2] - cmpd_RT_maxI_min[n_spec] <- as.numeric(cmpd_RT_maxI[n_spec])/60 - plot.new() - plot.window(range(eic$rt), range(eic$intensity)) - box() - lines(eic$intensity ~ eic$rt) - for (specs in msms) { - if (specs@found == TRUE) { - df <- do.call(rbind, lapply(specs@children, function(sp) c(sp@rt, - intensity = max(sp@intensity)))) - lines(intensity ~ retentionTime, data = df, type = "h", - col = "blue") - msms_found[n_spec] <- TRUE - } - } - title(main = cpdID, xlab = "RT (sec)", ylab = "Intensity") - text(as.numeric(cmpd_RT_maxI[n_spec]), as.numeric(max_I_prec[n_spec]), - labels = as.numeric(cmpd_RT_maxI_min[n_spec]), pos = 4) - axis(1) - axis(2) - gc() - rts[i] <- (cmpd_RT_maxI[n_spec]) - } - dev.off() - write.csv(cbind(file_list$ID, cmpd_info$mz, cmpd_info$Name, - cmpd_RT_maxI, cmpd_RT_maxI_min, max_I_prec, msms_found), - file = paste(archive_name, "_RTs_wI.csv", sep = ""), - row.names = F) -} - - - - ##' Prescreens. Writes data out. Adapted from ReSOLUTION ##' ##' @@ -547,7 +473,7 @@ presc.p<-function(cl,fn_data,fn_cmpd_l,mode,ppm_lim_fine=10,EIC_limit=0.001) { ##' @return Nothing useful. ##' @author Todor Kondić ##' @export -presc.plot <- function(wd,out="prescreen.pdf",pal="Accent") { +presc.plot <- function(wd,out="prescreen.pdf",pal="Dark2") { dfdir <- file.path(wd,"prescreen") pdf(out) ## Get the basenames of eic files. @@ -556,26 +482,32 @@ presc.plot <- function(wd,out="prescreen.pdf",pal="Accent") { for (i in seq(length(eics))) { eic <- eics[[i]] maybekid <- maybekids[[i]] + fn_ini <- lapply(wd,function(x) file.path(x,list.files(path=x,patt="*.ini")[[1]])) + + lbls <- lapply(fn_ini,function(x) {s <- yaml::yaml.load_file(x);s$spectraList[[1]]$ce}) plot.new() dfs <- lapply(file.path(dfdir,eic),function(fn) { tryCatch(read.csv(fn,stringsAsFactors = F), error=function(e) {message(paste(e,"; offending file:",fn))}) }) + + dfs <- lapply(dfs,function(x) data.frame(rt=x$rt/60.,intensity=x$intensity)) + ## Find max intensities. + w_max <- sapply(dfs,function (x) which.max(x$intensity)) + rt_max <- Map(function(df,w) df$rt[[w]],dfs,w_max) + i_max<- Map(function(df,w) df$intensity[[w]],dfs,w_max) - ## Find max intensity for the first in the group. - imx <- which.max(dfs[[1]]$intensity) - int_max <- dfs[[1]]$intensity[[imx]] - rt_max <- dfs[[1]]$rt[[imx]] - rt_min_max <- as.numeric(rt_max)/60. rt_rng <- range(sapply(dfs,function(x) x$rt)) int_rng <- range(sapply(dfs,function(x) x$intensity)) - plot.window(rt_rng,int_rng) + plot.window(rt_rng,c(int_rng[[1]],1.1*int_rng[[2]])) box() cols <- RColorBrewer::brewer.pal(n=length(dfs),name=pal) + legend("top",horiz=T,legend=lbls,col=cols,fill=cols) + ## Plot eic across the directory set. for (n in seq(length(dfs))) { df <- dfs[[n]] @@ -585,21 +517,17 @@ presc.plot <- function(wd,out="prescreen.pdf",pal="Accent") { ## Find existing children and plot them across the directory ## set. + maybes <- file.path(dfdir,maybekid) indkids <- which(file.exists(maybes)) kids <- maybes[indkids] - dfs <- lapply(kids,function(fn) { - tryCatch(read.csv(fn,stringsAsFactors=F), - error=function(e) {message(paste(e,"; offending file:",fn)) - return(NA)}, - finally={return(NA)}) - }) - dfs <- dfs[!is.na(dfs)] + + dfs <- lapply(kids,read.csv,stringsAsFactors=F) for (df in dfs) { - lines(intensity~retentionTime,data=df,type="h",col="blue") + lines(intensity ~ retentionTime,data=df,type="h",col="black") } - title(main=i,xlab="retention time [s]",ylab="intensity") - text(as.numeric(rt_max),as.numeric(int_max),labels=as.numeric(rt_min_max),pos=4) + title(main=i,xlab="retention time [min]",ylab="intensity") + for (i in seq(length(w_max))) text(rt_max[[i]],i_max[[i]],labels=rt_max[[i]],pos=4) axis(1) axis(2) gc() diff --git a/R/run.R b/R/run.R index e5d36dad7856e3141acab770507d3d64671f42ee..fbc3c2f96680decbd27501b25a932869c6664b14 100644 --- a/R/run.R +++ b/R/run.R @@ -23,7 +23,7 @@ presc.do<-function(fn_data,fn_cmpd_list,mode,proc=F) { if (proc) { - cl<-parallel::makeCluster(proc) + cl<-parallel::makeCluster(proc,type='FORK') presc.p(cl=cl,fn_data,fn_cmpd_l=fn_cmpd_list,mode=mode) } else { presc.v(fn_data,fn_cmpd_l=fn_cmpd_list,mode)