diff --git a/NAMESPACE b/NAMESPACE index 0f870cca68b9697f9bb3144ac25e81151f33fd75..2c3e0d571b725830fff289ef810fc105c0dd80b4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,8 +3,5 @@ export(mb.do) export(mb.prep) export(presc.do) -export(presc.p) export(presc.plot) -export(presc.single) -export(presc.v) export(sw.do) diff --git a/R/mix.R b/R/mix.R index 70e7789d4a5312bd72011b4f33e9a3922d1cbc84..135c39da4754bc55700be9f3820481e439e2003e 100644 --- a/R/mix.R +++ b/R/mix.R @@ -7,13 +7,14 @@ stripext<-function(fn) { ##' Create directories without drama. ##' ##' @title Create directories without drama -##' @param path Name of the directory. +##' @param path Names of the directories. ##' @return The character string containing the input argument `path`. ##' @author Todor Kondić no_drama_mkdir<-function(path) { - - if (! dir.exists(path)) dir.create(path) - path + f <- Vectorize(function(path) { + if (! dir.exists(path)) dir.create(path) + path},vectorize.args="path") + f(path) } ##' Produce the Rmb Settings file @@ -60,6 +61,62 @@ mk_combine_file<-function(sett_fns,fname) { fname } + +fn_data2wd <- function(fn_data,dest) { + + f <- Vectorize(function(fn_data) { + noext <- stripext(fn_data) + file.path(dest,basename(noext)) + },vectorize.args="fn_data") + f(fn_data) +} + +get_presc_d <- function(wd) { file.path(wd,"prescreen")} +gen_presc_d <- function(wd) { no_drama_mkdir(get_presc_d(wd))} + + + +get_cmpd_l_fn <- function(wd) { + f <- function(wd) file.path(wd,"compounds.csv") + fv <- Vectorize(f,vectorize.args=c("wd")) + fv(wd) +} + +get_stgs_fn <- function(wd) { + f <- function(wd) file.path(wd,"settings.ini") + fv <- Vectorize(f,vectorize.args=c("wd")) + fv(wd) +} + +get_ftable_fn <- function(wd) { + f <- function(wd) file.path(wd,"ftable.csv") + fv <- Vectorize(f,vectorize.args=c("wd")) + fv(wd) +} + +get_inp_stgs_fn<- function(fn_data) { + f <- Vectorize(function(fn_data) { + bnm <- stripext(fn_data) + fn <- paste(bnm,".ini",sep='')}, + vectorize.args="fn_data") + f(fn_data)} + +get_info_dir <- function(wd) { + file.path(wd,"info") +} + +get_info_fn <- function(wd) { + file.path(get_info_dir(wd),"info.csv") +} + +gen_info_dir <- function(wd) { + nm <- get_info_dir(wd) + no_drama_mkdir(nm) + nm +} + + + ##' Generate the RMassBank compound list from the input compound list ##' in CSV file src_fn. The input compound list format is either a ##' Chemical Dashboard csv file with, at least, PREFERRED_ SMILES @@ -67,13 +124,12 @@ mk_combine_file<-function(sett_fns,fname) { ##' SMILES and Names filled. Argument dest_fn is the destination ##' filename. Returns the number of compounds. ##' -##' ##' @title Generate Compound List File ##' @param src_fn The input compound list CSV filename. ##' @param dest_fn The resulting compound list CSV filename. ##' @return Number of compounds. ##' @author Todor Kondić -gen_comp_list<-function(src_fn,dest_fn) { +gen_cmpd_l<-function(src_fn,dest_fn) { df<-read.csv(src_fn) ## Names nms<-if ("PREFERRED_NAME" %in% names(df)) df$PREFERRED_NAME else df$Name @@ -87,43 +143,40 @@ gen_comp_list<-function(src_fn,dest_fn) { casvals<-if ("CASRN" %in% names(df)) df$CASRN else rep(NA,sz) if (is.null(haha)) stop("Unable to read SMILES from the input compound list.") outdf<-data.frame(ID=1:sz,Name=nms,SMILES=haha,CAS=casvals,RT=rep(NA,sz)) - write.csv(outdf,file=dest_fn,row.names=F,na="") + f <- Vectorize(function (dest_fn) { + write.csv(outdf,file=dest_fn,row.names=F,na="") + },vectorize.args="dest_fn",SIMPLIFY=F) + + f(dest_fn) length(nms) } ##' Generates settings file and loads it. ##' -##' ##' @title Generate and Load the RMassBank Settings File -##' @param fn_data The mzML filename. ##' @param stgs Settings named list, or a settings filename. ##' @param wd Directory under which results are archived. ##' @return result of RMassBank::loadRmbSettings ##' @author Todor Kondić -gen_stgs_and_load <- function(fn_data,stgs,wd) { - wd <- normalizePath(wd) - fn_data <- normalizePath(fn_data) +gen_stgs_and_load <- function(stgs,wd) { stgs<-if (is.character(stgs)) yaml::yaml.load_file(stgs) else stgs - sfn<-file.path(wd,paste(basename(fn_data),".ini",sep='')) + sfn<-get_stgs_fn(wd) mk_sett_file(stgs,sfn) RMassBank::loadRmbSettings(sfn) } ##' Generates the RMassBank compound list and loads it. ##' -##' ##' @title Generate and Load the RMassBank Compound List -##' @param fn_data The mzML filename. ##' @param wd Directory under which results are archived. +##' @param fn_cmpdl The input compound list filename. ##' @return Named list. The key `fn_cmpdl` is the path of the ##' generated compound list and the key `n` the number of ##' compounds. ##' @author Todor Kondić -gen_cmpdl_and_load <- function(fn_data,wd,fn_cmpdl) { - wd <- normalizePath(wd) - fn_data <- normalizePath(fn_data) - fn_comp<-file.path(wd,paste(basename(fn_data),".comp.csv",sep='')) - n_cmpd<-gen_comp_list(fn_cmpdl,fn_comp) +gen_cmpdl_and_load <- function(wd,fn_cmpdl) { + fn_comp<-get_cmpd_l_fn(wd) + n_cmpd<-gen_cmpd_l(fn_cmpdl,fn_comp) RMassBank::loadList(fn_comp) list(fn_cmpdl=fn_comp,n=n_cmpd) } @@ -133,225 +186,50 @@ gen_cmpdl_and_load <- function(fn_data,wd,fn_cmpdl) { ##' ##' @title Generate and Load the RMassBank Settings File ##' @param fn_data The mzML filename. -##' @param n_cmpd Number of compounds. ##' @param wd Directory under which results are archived. +##' @param n_cmpd Number of compounds. ##' @return File path of the file table. ##' @author Todor Kondić -gen_file_table <- function(fn_data,n_cmpd,wd) { - wd <- normalizePath(wd) - fn_data <- normalizePath(fn_data) - df_table<-data.frame(Files=rep(fn_data,n_cmpd),ID=1:n_cmpd) - fn_table<-file.path(wd,paste("fn-table.",basename(fn_data),".csv",sep='')) - write.csv(x=df_table,file=fn_table,row.names=F) - fn_table +gen_ftable <- function(fn_data,wd,n_cmpd) { + f <- Vectorize(function(fn_data,wd) { + df_table<-data.frame(Files=rep(fn_data,n_cmpd),ID=1:n_cmpd) + fn_table<-get_ftable_fn(wd) + write.csv(x=df_table,file=fn_table,row.names=F) + fn_table + }, vectorize.args=c("fn_data","wd")) + + f(fn_data,wd) } -##' Wrapper for a single prescreening call. Produces output in the -##' usual mix method places. -##' -##' @title Wrapper for RMB_EIC_Prescreen -##' @param fn_data The mzML filename. -##' @param stgs_alist Settings named list, or a settings filename. -##' @param wd Directory under which results are archived. -##' @param mode RMB mode. -##' @param fn_cmpd_l Filename of the compound list. -##' @param ppm_lim_fine The ppm_limit_fine argument to RMB_EIC_Prescreen -##' @param EIC_limit Passed down to RMB_EIC_Prescreen. -##' @return result of RMB_EIC_Prescreen -##' @author Todor Kondić -##' @export -presc.single <- function(fn_data,stgs_alist,wd,mode,fn_cmpd_l,ppm_lim_fine=10,EIC_limit=0.001) { - no_drama_mkdir(wd) - wd <- normalizePath(wd) - gen_stgs_and_load(fn_data,stgs_alist,wd) - - ## Generate and load the compound list. - x <- gen_cmpdl_and_load(fn_data,wd,fn_cmpd_l) - fn_comp <- x$fn_cmpdl - n_cmpd <- x$n - - ## Generate file table. - fn_table <- gen_file_table(fn_data,n_cmpd,wd) - - #curd <- setwd(wd) - res <-RMB_EIC_prescreen_df(wd=wd,RMB_mode=mode, FileList=fn_table, - cmpd_list=fn_comp, - ppm_limit_fine=ppm_lim_fine, - EIC_limit=EIC_limit) - #setwd(curd) - res +gen_fn_stgs <- function(fn_inp,fn) { + f <- Vectorize(function(fn_inp,fn) { + stgs <- yaml::yaml.load_file(fn_inp) + mk_sett_file(stgs,fn) + fn}, vectorize.args=c("fn_inp","fn")) + f(fn_inp,fn) } -##' Runs a compound mixture workflow on a single mzML file. -##' -##' @title RMassBank Spectral Workflow on a Single Compound Mixture -##' @param fn_data A mzML data file. -##' @param stgs_alist RMassBank settings. It can either be a named -##' list of settings, or a filename of a YAML file. -##' @param wd The name of the work directory. -##' @param fn_cmpd_list The file name of he compound list -##' corresponding to `fn_data`. -##' @param mode Modes as described in the standard workflow vignette -##' of RMassBank. -##' @param readMethod Default read method is "mzR". Consult the -##' documentation of `msmsRead` for details. -##' @param archdir The directory to store R objects created during -##' workflow execution. -##' @param lastStep The last step in the workflow. Default is eight. -##' @return MsmsWorkspace object. -##' @author Todor Kondić -single.sw<-function(fn_data,stgs_alist,wd,fn_cmpd_list,mode,readMethod="mzR",archdir="archive",lastStep=8) { - ## Generate settings file and load. - stgs_alist<-if (is.character(stgs_alist)) yaml::yaml.load_file(stgs_alist) else stgs_alist - sfn<-file.path(wd,paste(fn_data,".ini",sep='')) - mk_sett_file(stgs_alist,sfn) - RMassBank::loadRmbSettings(sfn) - - ## Generate and load the compound list. - fn_comp<-file.path(wd,paste(fn_data,".comp.csv",sep='')) - n_cmpd<-gen_comp_list(fn_cmpd_list,fn_comp) - RMassBank::loadList(fn_comp) - - ## Generate file table. - df_table<-data.frame(Files=rep(fn_data,n_cmpd),ID=1:n_cmpd) - fn_table<-file.path(wd,paste("fn-table.",fn_data,".csv",sep='')) - write.csv(x=df_table,file=fn_table,row.names=F) - - ## Make empty workspace. - w <- RMassBank::newMsmsWorkspace() - ## Run the workflow. - message(paste("Reading in file:",fn_data)) - w <-RMassBank::msmsRead(w,filetable=fn_table,readMethod="mzR",mode=mode) - archdir<-file.path(wd,archdir) - if (!dir.exists(archdir)) dir.create(archdir) - fn_arch<-file.path(archdir,paste(fn_data,".archive",sep='')) - RMassBank::msmsWorkflow(w, mode=mode, steps=2:lastStep,archivename=fn_arch) -} - +conf <- function(fn_data,fn_cmpd_l,dest) { + no_drama_mkdir(dest) + wd <- fn_data2wd(fn_data,dest) + no_drama_mkdir(wd) + fn_inp_stgs <- get_inp_stgs_fn(fn_data) + fn_stgs <- get_stgs_fn(wd) + fn_out_cmpd_l <- get_cmpd_l_fn(wd) -##' Prepare single mbWorkspace object based on the workspace, the -##' infolist name and RMassBank settings. -##' -##' -##' @title Prepare Single mbWorkspace object -##' @param w MsmsWorkspace object. -##' @param fn_info Filename of the infolist to be generated. -##' @param fn_stgs Filename of the RMassBank settings. -##' @return A mbWorkspace object. -##' @author Todor Kondić -mb.prep.single<-function(w,fn_info,fn_stgs) { - RMassBank::loadRmbSettings(fn_stgs) - mb <- RMassBank::newMbWorkspace(w) - RMassBank::resetInfolists(mb) - RMassBank::mbWorkflow(mb,infolist_path=fn_info) + gen_fn_stgs(fn_inp_stgs,fn_stgs) + n_cmpd <- gen_cmpd_l(fn_cmpd_l,fn_out_cmpd_l) + gen_ftable(fn_data,wd,n_cmpd) } -##' Vectorize mb.prep function. -##' -##' -##' @title Vectorized mb.prep function. -##' @param w A sequence of msmsWorkspaces. -##' @param fn_info A sequence of infolist filenams to be generated. -##' @param fn_stgs A sequence of settings associated with each -##' msmsWorkspace object. -##' @return A list of mbWorkspaces. -##' @author Todor Kondić -mb.prep.v<-function(w,fn_info,fn_stgs) { - f<-Vectorize(mb.prep.single,vectorize.args=c("w","fn_info","fn_stgs"),SIMPLIFY=F) - res<-f(w,fn_info,fn_stgs) - names(res)<-names(w) - res -} - -##' Performs a single MassBank workflow after preparation. -##' -##' -##' @title Single MassBank workflow. -##' @param mb A mbWorkspace object. -##' @param infodir Directory containing the infolist. -##' @param fn_stgs The settings associated with the mbWorkspace -##' object. -##' @return A mbWorkflow object. -##' @author Todor Kondić -mb.single<-function(mb,infodir,fn_stgs) { +reconf <- function(wd) {## Load the settings. + fn_stgs <- get_stgs_fn(wd) RMassBank::loadRmbSettings(fn_stgs) - mb <- RMassBank::resetInfolists(mb) - mb <- RMassBank::loadInfolists(mb,infodir) - ## loadInfolists - ## addPeaks - prevd<-setwd(infodir) - res<-RMassBank::mbWorkflow(mb,step=1:8) - setwd(prevd) - res -} - -##' Vectorises presc.single. -##' -##' @title Vectorises presc.single -##' @param fn_data Sequence of mzML filenames. -##' @param fn_cmpd_l Compound list filename. -##' @param mode RMB mode. -##' @param ppm_lim_fine Prescreen fine limit (see ReSOLUTION prescreening function). -##' @param EIC_limit Prescreen EIC limit (see ReSOLUTION prescreening function). -##' @return Nothing useful. -##' @author Todor Kondić -##' @export -presc.v<-function(fn_data,fn_cmpd_l,mode,ppm_lim_fine=10,EIC_limit=0.001) { - idir<-function(n) file.path(".",stripext(n)) - wd <- sapply(fn_data,idir) - stgs_alist <- sapply(wd,function(d) {paste(d,".ini",sep='')}) - f<-Vectorize(presc.single,vectorize.args=c("fn_data","stgs_alist","wd"),SIMPLIFY=F) - f(fn_data,stgs_alist,wd,mode=mode,fn_cmpd_l=fn_cmpd_l,ppm_lim_fine=ppm_lim_fine,EIC_limit=EIC_limit) -} - - -##' Interface to vectorised spectral workflow. -##' -##' -##' @title Vectorised Spectral Workflow. -##' @param fn_data A sequence of mzML input files. -##' @param stgs_alist A list of named list of settings, or a list of -##' filenames of YAML files containing the settings. -##' @param wd The list of working directories. -##' @param fn_cmpd_list The compound list characterising the mixtures. -##' @param mode Same as in msmsRead. -##' @param readMethod Same as in msmsRead. -##' @param archdir Name of the archive. -##' @param lastStep The last step of the spectral workflow. -##' @param combine If TRUE, use combineMultiplicies to merge -##' workspaces corresponding to different collisional energies. -##' @return A named list of spectral workspaces. The names are derived -##' from data filenames. -##' @author Todor Kondić -v<-function(fn_data,stgs_alist,wd,fn_cmpd_list,mode,readMethod="mzR",archdir="archive",lastStep=8,combine=F) { - idir<-function(n) file.path(".",stripext(n)) - f<-Vectorize(single.sw,vectorize.args=c("wd","fn_data","stgs_alist"),SIMPLIFY=F) - rootdir <- getwd() - if (combine) { - z<-f(fn_data,stgs_alist,wd,fn_cmpd_list,mode,readMethod=readMethod,archdir=archdir,lastStep=7) - names(z)<-basename(fn_data) - zz<-RMassBank::combineMultiplicities(z) - - combdir<-"combined" - archdir<-file.path(rootdir,combdir,archdir) - no_drama_mkdir(combdir) - no_drama_mkdir(archdir) - fn_arch<-file.path(archdir,"archive") - fn_comb_stgs <- file.path(rootdir,combdir,paste(combdir,".mzML.ini",sep='')) - ddirs <- sapply(names(z),idir) - stgs_fls <- sapply(ddirs,function(x) file.path(x,paste(x,".mzML.ini",sep=''))) - mk_combine_file(stgs_fls,fn_comb_stgs) - - res<-list(RMassBank::msmsWorkflow(zz, steps=8, mode=mode, archivename = fn_arch)) - names(res)<-paste(combdir,".mzML",sep='') #Clearly a hack. - res - } else { - z<-f(fn_data,stgs_alist,wd,fn_cmpd_list,mode,readMethod=readMethod,archdir=archdir,lastStep=lastStep) - names(z)<-basename(fn_data) - z - } + ## Load the compound list. + fn_cmpd_l <- get_cmpd_l_fn(wd) + RMassBank::loadList(fn_cmpd_l) } ##' Prescreens. Writes data out. Adapted from ReSOLUTION @@ -437,33 +315,6 @@ RMB_EIC_prescreen_df <- function (wd, RMB_mode, FileList, cmpd_list, row.names = F) } - - -##' Parallel version of presc.single. -##' -##' @title Parallel version of presc.single -##' @param cl Cluster object. -##' @param fn_data Sequence of mzML files. -##' @param fn_cmpd_l Filename of the compound list. -##' @param mode RMB mode. -##' @param ppm_lim_fine See ReSOLUTION. -##' @param EIC_limit See ReSOLUTION. -##' @return Nothing useful. -##' @author Todor Kondić -##' @export -presc.p<-function(cl,fn_data,fn_cmpd_l,mode,ppm_lim_fine=10,EIC_limit=0.001) { - idir<-function(n) file.path(".",stripext(n)) - wd <- sapply(fn_data,idir) - stgs_alist <- sapply(wd,function(d) {paste(d,".ini",sep='')}) - - f <- function(fn_data,stgs_alist,wd) presc.single(fn_data=fn_data,stgs_alist=stgs_alist,wd=wd,mode=mode, - fn_cmpd_l=fn_cmpd_l,ppm_lim_fine=ppm_lim_fine,EIC_limit=EIC_limit) - - parallel::clusterMap(cl,fun=f,fn_data,stgs_alist,wd) - -} - - ##' Plot the output of prescreen. ##' ##' @title Plot the Output of Prescreen @@ -484,7 +335,7 @@ presc.plot <- function(wd,out="prescreen.pdf",pal="Dark2",cex=0.75,digits=6) { for (i in seq(length(eics))) { eic <- eics[[i]] maybekid <- maybekids[[i]] - fn_ini <- lapply(wd,function(x) file.path(x,list.files(path=x,patt="*.ini")[[1]])) + fn_ini <- lapply(wd,get_stgs_fn) lbls <- lapply(fn_ini,function(x) {s <- yaml::yaml.load_file(x);s$spectraList[[1]]$ce}) plot.new() @@ -551,65 +402,6 @@ presc.plot <- function(wd,out="prescreen.pdf",pal="Dark2",cex=0.75,digits=6) { } dev.off() } - - - - - -##' Interface to parallel spectral workflow. -##' -##' -##' @title Parallel Spectral Workflow. -##' @param fn_data A sequence of mzML input files. -##' @param stgs_alist A list of named list of settings, or a list of -##' filenames of YAML files containing the settings. -##' @param wd The list of working directories. -##' @param fn_cmpd_list The compound list characterising the mixtures. -##' @param mode Same as in msmsRead. -##' @param readMethod Same as in msmsRead. -##' @param archdir Name of the archive. -##' @param lastStep The last step in spectral workflow. -##' @param combine If TRUE, use combineMultiplicies to merge -##' workspaces corresponding to different collisional energies. -##' @param cl Cluster. -##' @return A named list of spectral workspaces. The names are derived -##' from data filenames. -##' @author Todor Kondić -p.sw<-function(fn_data,stgs_alist,wd,fn_cmpd_list,mode,readMethod="mzR",archdir="archive",lastStep=8,combine=F,cl=NULL) { - idir<-function(n) file.path(".",stripext(n)) - fnocomb<-function(fn,stgs,wd) { - single.sw(fn,stgs,wd,fn_cmpd_list,mode,readMethod,archdir,lastStep=lastStep) - } - fcomb<-function(fn,stgs,wd) { - single.sw(fn,stgs,wd,fn_cmpd_list,mode,readMethod,archdir,lastStep=7) - } - - if (combine) { - rootdir <- getwd() - z<-parallel::clusterMap(cl,fcomb,fn_data,stgs_alist,wd) - names(z)<-basename(fn_data) - zz<-RMassBank::combineMultiplicities(z) - - combdir<-"combined" - archdir<-file.path(rootdir,combdir,archdir) - no_drama_mkdir(combdir) - no_drama_mkdir(archdir) - fn_arch<-file.path(archdir,"archive") - fn_comb_stgs <- file.path(rootdir,combdir,paste(combdir,".mzML.ini",sep='')) - ddirs <- sapply(names(z),idir) - stgs_fls <- sapply(ddirs,function(x) file.path(x,paste(x,".mzML.ini",sep=''))) - mk_combine_file(stgs_fls,fn_comb_stgs) - - res<-list(RMassBank::msmsWorkflow(zz, steps=8, mode=mode, archivename = fn_arch)) - names(res)<-paste(combdir,".yml",sep='') #Clearly a hack. - res - } else { - z<-parallel::clusterMap(cl,fnocomb,fn_data,stgs_alist,wd) - names(z)<-basename(fn_data) - z - } -} - ##' Interface to vectorised Mass Bank workflow. ##' diff --git a/R/run.R b/R/run.R index b4187ccbb4959fe73992fd1e0477095d1d4d82fa..9e95cd40470c9444815ce4219f1b7de86cc4a886 100644 --- a/R/run.R +++ b/R/run.R @@ -13,20 +13,34 @@ attch<-function(...) paste(...,sep='') ##' @title Prescreening on bunch of files. ##' @param fn_data The mzML files. Basis for the out directory name ##' generation. -##' @param fn_cmpd_list The compound list CSV. +##' @param fn_cmpd_l The compound list. ##' @param mode RMB mode. +##' @param dest Destination directory. ##' @param proc Amount of processors, or FALSE. +##' @param fn_cmpd_list The compound list CSV. ##' @return Nothing useful. ##' @author Todor Kondić ##' @export -presc.do<-function(fn_data,fn_cmpd_list,mode,proc=F) { +presc.do<-function(fn_data,fn_cmpd_l,mode,dest=".",proc=F,...) { + conf(fn_data,fn_cmpd_l,dest) + fread <- function(fn_data) { + wd <- fn_data2wd(fn_data,dest) + gen_presc_d(wd) + reconf(wd) + message("Currently processing: ",wd) + fn_ftable <- get_ftable_fn(wd) + fn_cmpd_l <- get_cmpd_l_fn(wd) + RMB_EIC_prescreen_df(wd=wd,RMB_mode=mode,FileList=fn_ftable, + cmpd_list=fn_cmpd_l,...) + } if (proc) { - cl<-parallel::makeCluster(proc,type='FORK') - presc.p(cl=cl,fn_data,fn_cmpd_l=fn_cmpd_list,mode=mode) + cl<-parallel::makeCluster(proc) + parallel::clusterEvalQ(cl,library(rmbmix)) + parallel::clusterMap(cl,fread,fn_data) } else { - presc.v(fn_data,fn_cmpd_l=fn_cmpd_list,mode) + lapply(fn_data,fread) } } @@ -51,28 +65,62 @@ presc.do<-function(fn_data,fn_cmpd_list,mode,proc=F) { ##' ##' @title Perform MassBank Workflow on Multiple Compound Mixtures ##' @param fn_data List of mzML data filenames to be processed. -##' @param fn_cmpd_list Compound list. +##' @param fn_cmpd_l Compound list. ##' @param mode as in msmsRead. -##' @param rdir The root data directory. +##' @param dest The destination data directory. ##' @param combine If TRUE, use combineMultiplicies to merge ##' workspaces corresponding to different collisional energies. ##' @param proc Split work between this amount of processes. If FALSE ##' (or, 1), run sequential. +##' @param split This is the last step before combine ##' @return A named list of msmsWorkspace objects. ##' @author Todor Kondić ##' @export -sw.do<-function(fn_data,fn_cmpd_list,mode,rdir=".",combine=F,proc=F) { - no_drama_mkdir(rdir) - wdirs<-sapply(basename(fn_data),function(nm) file.path(rdir,stripext(nm))) - sapply(wdirs,no_drama_mkdir) - stgs<-sapply(basename(wdirs),function (nm) paste(nm,"yml",sep='.')) - - if (proc) { - cl<-parallel::makeCluster(proc,type='FORK') - p.sw(fn_data,stgs,wdirs,fn_cmpd_list,mode,combine=combine,cl=cl) - } else { - v(fn_data,stgs,wdirs,fn_cmpd_list,mode,combine=combine) +sw.do <- function(fn_data, fn_cmpd_l, mode, dest=".", combine=F, + proc=F,split=3) { + + conf(fn_data,fn_cmpd_l,dest) + fread <- function(fn_data) { + wd <- fn_data2wd(fn_data,dest) + reconf(wd) + w <- RMassBank::newMsmsWorkspace() + RMassBank::msmsRead(w=w,filetable = get_ftable_fn(wd), + mode=mode,readMethod = "mzR") + } + fwork <- Vectorize(function(w,wd,steps) { + archdir <- file.path(wd,"archive") + no_drama_mkdir(archdir) + fn_arch <- file.path(archdir,"archive") + reconf(wd) + RMassBank::msmsWorkflow(w=w,mode=mode,steps=steps,archivename = fn_arch) + }, vectorize.args = c("w","wd"),SIMPLIFY=F) + + w <- if (proc) { + cl=parallel::makeCluster(proc) + parallel::clusterEvalQ(cl,library(rmbmix)) + parallel::clusterMap(cl,fread,fn_data) + } else { + lapply(fn_data,fread) + } + wd <- fn_data2wd(fn_data,dest) + w <- fwork(w,wd,steps=2:split) + if (combine) { + ## Combined workflow is not based on a single file, but the + ## functions that generate config are. Therefore, lets create + ## a fake filename. + fakefile <- "combine.mzML" + cwd <- fn_data2wd(fakefile,dest) + xx <- get_stgs_fn(wd[[1]]) + file.copy(xx,"combine.ini",overwrite = T) + # mk_combine_file(get_stgs_fn(wd),"combine.ini") + conf(fakefile,fn_cmpd_l,dest) + reconf(cwd) + w <- list(RMassBank::combineMultiplicities(w)) + wd <- list(cwd) } + w <- fwork(w,wd,steps=(split+1):8) + names(w) <- wd + w } ##' Creates and prepares mbWorkspace objects before the full workflow @@ -83,17 +131,22 @@ sw.do<-function(fn_data,fn_cmpd_list,mode,rdir=".",combine=F,proc=F) { ##' ##' @title Prepare mbWorkspace objects ##' @param w A list of spectral workspace inputs. -##' @param rdir Data root. -##' @param proc Split work between this amount of processes. If FALSE ##' @return Named list of prepared mbWorkspace objects. ##' @author Todor Kondić ##' @export -mb.prep<-function(w,rdir=".") { - idir<-function(n) file.path(rdir,stripext(n)) - sapply(names(w),function (n) no_drama_mkdir(file.path(idir(n),"info"))) - fn_info<-sapply(names(w),function (n) file.path(idir(n),"info",attch(n,'.info.csv'))) - fn_stgs<-sapply(names(w),function(n) file.path(idir(n),attch(n,'.ini'))) - mb.prep.v(w,fn_info,fn_stgs) +mb.prep<-function(w) { + wd <- names(w) + fwork <- Vectorize(function(w,wd) { + reconf(wd) + idir <- gen_info_dir(wd) + mb <- RMassBank::newMbWorkspace(w) + RMassBank::resetInfolists(mb) + RMassBank::mbWorkflow(mb,infolist_path = get_info_fn(wd)) + },vectorize.args = c("w","wd")) + + mb <- fwork(w,wd) + names(mb) <- wd + mb } @@ -104,21 +157,27 @@ mb.prep<-function(w,rdir=".") { ##' ##' @title Perform the Mass Bank workflow ##' @param mb The list of prepared mbWorkspace objects. -##' @param rdir Root data dir. ##' @param proc Split work between this amount of processes. If FALSE ##' (or, 1), run sequential. ##' @return The named list of processed mbWorkspace objects. ##' @author Todor Kondić ##' @export -mb.do<-function(mb,rdir=".",proc=F) { - idir<-function(n) file.path(rdir,stripext(n)) - infodir<-sapply(names(mb),function(n) file.path(idir(n),"info")) - fn_stgs<-sapply(names(mb),function(n) file.path(idir(n),attch(n,'.ini'))) - +mb.do<-function(mb,proc=F) { + wd <- names(mb) + infodir <- get_info_dir(wd) + fwork <- Vectorize(function(mb,id,wd) { + reconf(wd) + mb <- RMassBank::resetInfolists(mb) + mb <- RMassBank::loadInfolists(mb,id) + dcur <- setwd(id) + mb <- RMassBank::mbWorkflow(mb,step=1:8) + setwd(dcur) + mb},vectorize.args = c("mb","id","wd")) if (proc) { - cl<-parallel::makeCluster(proc,type='FORK') - mb.p(mb,infodir,fn <- stgs,cl=cl) + cl<-parallel::makeCluster(proc) + parallel::clusterEvalQ(cl,library("rmbmix")) + parallel::clusterMap(cl,fwork,mb,infodir,wd) } else { - mb.v(mb,infodir,fn_stgs) + fwork(mb,infodir,wd) } } diff --git a/README.org b/README.org index 964bb466c032c81023c64471f31f8c7a14204ce2..06b9fd789b83db4635a05c0ce3d5c38bdd80740d 100644 --- a/README.org +++ b/README.org @@ -5,7 +5,8 @@ ** Prescreening 1. Generate prescreening data frames, #+BEGIN_SRC R - presc.do(list.files(patt=".*mzML"),"pH",fn_cmpd_list="./Pesticides.info.csv",proc=4) + presc.do(list.files(path="~/ECI_SCRATCH/20190627_HEROBER",patt=".*.mzML",full.names=T), + mode="pH",fn_cmpd_l="./cutlist.csv",proc=F,dest=".") #+END_SRC This is to be carried out inside the directory containing the ~mzML~ files. For sequential execution, leave out the ~proc~ @@ -13,7 +14,7 @@ 2. Plot, #+BEGIN_SRC R require(rmbmix) - presc.plot(normalizePath(list.dirs(".",recursive=F,full.names=F))) + presc.plot(list.files(".",patt="2.*POS",include.dirs=T),cex=0.7,digits=4) #+END_SRC The only argument is a sequence of absolute paths to directories containing the prescreen data. The resulting plot file is going diff --git a/man/gen_comp_list.Rd b/man/gen_cmpd_l.Rd similarity index 90% rename from man/gen_comp_list.Rd rename to man/gen_cmpd_l.Rd index e242bbf272b201a309db9387455829f2d5270455..e2a69af6910b934079dd85aa43e3763f50a6ff5e 100644 --- a/man/gen_comp_list.Rd +++ b/man/gen_cmpd_l.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/mix.R -\name{gen_comp_list} -\alias{gen_comp_list} +\name{gen_cmpd_l} +\alias{gen_cmpd_l} \title{Generate Compound List File} \usage{ -gen_comp_list(src_fn, dest_fn) +gen_cmpd_l(src_fn, dest_fn) } \arguments{ \item{src_fn}{The input compound list CSV filename.} diff --git a/man/gen_cmpdl_and_load.Rd b/man/gen_cmpdl_and_load.Rd index eb28d7831ea21875005dc192d21cba5ec99c1fdc..a782c7bc8c0b4c9138ac02f3733d4e45af5a204a 100644 --- a/man/gen_cmpdl_and_load.Rd +++ b/man/gen_cmpdl_and_load.Rd @@ -4,12 +4,12 @@ \alias{gen_cmpdl_and_load} \title{Generate and Load the RMassBank Compound List} \usage{ -gen_cmpdl_and_load(fn_data, wd, fn_cmpdl) +gen_cmpdl_and_load(wd, fn_cmpdl) } \arguments{ -\item{fn_data}{The mzML filename.} - \item{wd}{Directory under which results are archived.} + +\item{fn_cmpdl}{The input compound list filename.} } \value{ Named list. The key \code{fn_cmpdl} is the path of the diff --git a/man/gen_ftable.Rd b/man/gen_ftable.Rd new file mode 100644 index 0000000000000000000000000000000000000000..de5f27d08217170cf6b89b75d3dcb34741dd56ec --- /dev/null +++ b/man/gen_ftable.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mix.R +\name{gen_ftable} +\alias{gen_ftable} +\title{Generate and Load the RMassBank Settings File} +\usage{ +gen_ftable(fn_data, wd, n_cmpd) +} +\arguments{ +\item{fn_data}{The mzML filename.} + +\item{wd}{Directory under which results are archived.} + +\item{n_cmpd}{Number of compounds.} +} +\value{ +File path of the file table. +} +\description{ +Generates file table. +} +\author{ +Todor Kondić +} diff --git a/man/gen_stgs_and_load.Rd b/man/gen_stgs_and_load.Rd index 53ee0bba8eef9a9aa2d8173a34a1b88b0d7c5ff0..1fb26fcadaa0cd354710dcbf389120cdfd1913d9 100644 --- a/man/gen_stgs_and_load.Rd +++ b/man/gen_stgs_and_load.Rd @@ -4,11 +4,9 @@ \alias{gen_stgs_and_load} \title{Generate and Load the RMassBank Settings File} \usage{ -gen_stgs_and_load(fn_data, stgs, wd) +gen_stgs_and_load(stgs, wd) } \arguments{ -\item{fn_data}{The mzML filename.} - \item{stgs}{Settings named list, or a settings filename.} \item{wd}{Directory under which results are archived.} diff --git a/man/mb.do.Rd b/man/mb.do.Rd index 73e319d502ee46098ba30753221dca5eded59767..d77aec260a82277c5fdf639f315a0d87a37352fa 100644 --- a/man/mb.do.Rd +++ b/man/mb.do.Rd @@ -4,13 +4,11 @@ \alias{mb.do} \title{Perform the Mass Bank workflow} \usage{ -mb.do(mb, rdir = ".", proc = F) +mb.do(mb, proc = F) } \arguments{ \item{mb}{The list of prepared mbWorkspace objects.} -\item{rdir}{Root data dir.} - \item{proc}{Split work between this amount of processes. If FALSE (or, 1), run sequential.} } diff --git a/man/mb.prep.Rd b/man/mb.prep.Rd index 43fdac85f45a3111b019bf504123fda8c389ed39..24634416a375b2d9ef7432a635c44ead15888b2c 100644 --- a/man/mb.prep.Rd +++ b/man/mb.prep.Rd @@ -4,14 +4,10 @@ \alias{mb.prep} \title{Prepare mbWorkspace objects} \usage{ -mb.prep(w, rdir = ".") +mb.prep(w) } \arguments{ \item{w}{A list of spectral workspace inputs.} - -\item{rdir}{Data root.} - -\item{proc}{Split work between this amount of processes. If FALSE} } \value{ Named list of prepared mbWorkspace objects. diff --git a/man/mb.prep.single.Rd b/man/mb.prep.single.Rd deleted file mode 100644 index e0f116c7c0ab702e855be151903763801d5ac311..0000000000000000000000000000000000000000 --- a/man/mb.prep.single.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mix.R -\name{mb.prep.single} -\alias{mb.prep.single} -\title{Prepare Single mbWorkspace object} -\usage{ -mb.prep.single(w, fn_info, fn_stgs) -} -\arguments{ -\item{w}{MsmsWorkspace object.} - -\item{fn_info}{Filename of the infolist to be generated.} - -\item{fn_stgs}{Filename of the RMassBank settings.} -} -\value{ -A mbWorkspace object. -} -\description{ -Prepare single mbWorkspace object based on the workspace, the -infolist name and RMassBank settings. -} -\author{ -Todor Kondić -} diff --git a/man/mb.prep.v.Rd b/man/mb.prep.v.Rd deleted file mode 100644 index 2aba41e44d98c25a3834f6b2d9f6586b0fb03588..0000000000000000000000000000000000000000 --- a/man/mb.prep.v.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mix.R -\name{mb.prep.v} -\alias{mb.prep.v} -\title{Vectorized mb.prep function.} -\usage{ -mb.prep.v(w, fn_info, fn_stgs) -} -\arguments{ -\item{w}{A sequence of msmsWorkspaces.} - -\item{fn_info}{A sequence of infolist filenams to be generated.} - -\item{fn_stgs}{A sequence of settings associated with each -msmsWorkspace object.} -} -\value{ -A list of mbWorkspaces. -} -\description{ -Vectorize mb.prep function. -} -\author{ -Todor Kondić -} diff --git a/man/mb.single.Rd b/man/mb.single.Rd deleted file mode 100644 index 107e606c03ddf66ed3929a7b298560ec953429e6..0000000000000000000000000000000000000000 --- a/man/mb.single.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mix.R -\name{mb.single} -\alias{mb.single} -\title{Single MassBank workflow.} -\usage{ -mb.single(mb, infodir, fn_stgs) -} -\arguments{ -\item{mb}{A mbWorkspace object.} - -\item{infodir}{Directory containing the infolist.} - -\item{fn_stgs}{The settings associated with the mbWorkspace -object.} -} -\value{ -A mbWorkflow object. -} -\description{ -Performs a single MassBank workflow after preparation. -} -\author{ -Todor Kondić -} diff --git a/man/no_drama_mkdir.Rd b/man/no_drama_mkdir.Rd index aa1bc123bbaedb07749b901a4f2f92395fed7c29..c33d17dca9fe4c8aad8577c798f3294c3915f057 100644 --- a/man/no_drama_mkdir.Rd +++ b/man/no_drama_mkdir.Rd @@ -7,7 +7,7 @@ no_drama_mkdir(path) } \arguments{ -\item{path}{Name of the directory.} +\item{path}{Names of the directories.} } \value{ The character string containing the input argument \code{path}. diff --git a/man/p.sw.Rd b/man/p.sw.Rd deleted file mode 100644 index 3f64c0b861bddeeae16283d5dbaccbd25a3082d4..0000000000000000000000000000000000000000 --- a/man/p.sw.Rd +++ /dev/null @@ -1,42 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mix.R -\name{p.sw} -\alias{p.sw} -\title{Parallel Spectral Workflow.} -\usage{ -p.sw(fn_data, stgs_alist, wd, fn_cmpd_list, mode, readMethod = "mzR", - archdir = "archive", lastStep = 8, combine = F, cl = NULL) -} -\arguments{ -\item{fn_data}{A sequence of mzML input files.} - -\item{stgs_alist}{A list of named list of settings, or a list of -filenames of YAML files containing the settings.} - -\item{wd}{The list of working directories.} - -\item{fn_cmpd_list}{The compound list characterising the mixtures.} - -\item{mode}{Same as in msmsRead.} - -\item{readMethod}{Same as in msmsRead.} - -\item{archdir}{Name of the archive.} - -\item{lastStep}{The last step in spectral workflow.} - -\item{combine}{If TRUE, use combineMultiplicies to merge -workspaces corresponding to different collisional energies.} - -\item{cl}{Cluster.} -} -\value{ -A named list of spectral workspaces. The names are derived -from data filenames. -} -\description{ -Interface to parallel spectral workflow. -} -\author{ -Todor Kondić -} diff --git a/man/presc.do.Rd b/man/presc.do.Rd index f001b9bae73f1fd4e24fd0af8d85d96d98f85300..162677c29787fcad7eeb80ecc9a1eb03550f3636 100644 --- a/man/presc.do.Rd +++ b/man/presc.do.Rd @@ -4,17 +4,21 @@ \alias{presc.do} \title{Prescreening on bunch of files.} \usage{ -presc.do(fn_data, fn_cmpd_list, mode, proc = F) +presc.do(fn_data, fn_cmpd_l, mode, dest = ".", proc = F, ...) } \arguments{ \item{fn_data}{The mzML files. Basis for the out directory name generation.} -\item{fn_cmpd_list}{The compound list CSV.} +\item{fn_cmpd_l}{The compound list.} \item{mode}{RMB mode.} +\item{dest}{Destination directory.} + \item{proc}{Amount of processors, or FALSE.} + +\item{fn_cmpd_list}{The compound list CSV.} } \value{ Nothing useful. diff --git a/man/presc.plot.Rd b/man/presc.plot.Rd index b6721d192a3a955582bfdab6f7813857767c99c2..fd6139b74ffce5b6d5dd5dfd15aa91dc9caf3fa3 100644 --- a/man/presc.plot.Rd +++ b/man/presc.plot.Rd @@ -6,9 +6,6 @@ \usage{ presc.plot(wd, out = "prescreen.pdf", pal = "Dark2", cex = 0.75, digits = 6) - -presc.plot(wd, out = "prescreen.pdf", pal = "Accent") - } \arguments{ \item{wd}{Sequence of data dirs containing the prescreen subdir.} @@ -17,7 +14,6 @@ presc.plot(wd, out = "prescreen.pdf", pal = "Accent") \item{pal}{ColorBrewer palette name.} - \item{cex}{As in legend.} \item{digits}{Number of significant digits for peak ret times.} diff --git a/man/single.sw.Rd b/man/single.sw.Rd deleted file mode 100644 index 06fc2a0713a11f0fadaeec4681acd0ab5c95b0de..0000000000000000000000000000000000000000 --- a/man/single.sw.Rd +++ /dev/null @@ -1,40 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mix.R -\name{single.sw} -\alias{single.sw} -\title{RMassBank Spectral Workflow on a Single Compound Mixture} -\usage{ -single.sw(fn_data, stgs_alist, wd, fn_cmpd_list, mode, - readMethod = "mzR", archdir = "archive", lastStep = 8) -} -\arguments{ -\item{fn_data}{A mzML data file.} - -\item{stgs_alist}{RMassBank settings. It can either be a named -list of settings, or a filename of a YAML file.} - -\item{wd}{The name of the work directory.} - -\item{fn_cmpd_list}{The file name of he compound list -corresponding to \code{fn_data}.} - -\item{mode}{Modes as described in the standard workflow vignette -of RMassBank.} - -\item{readMethod}{Default read method is "mzR". Consult the -documentation of \code{msmsRead} for details.} - -\item{archdir}{The directory to store R objects created during -workflow execution.} - -\item{lastStep}{The last step in the workflow. Default is eight.} -} -\value{ -MsmsWorkspace object. -} -\description{ -Runs a compound mixture workflow on a single mzML file. -} -\author{ -Todor Kondić -} diff --git a/man/sw.do.Rd b/man/sw.do.Rd index a025d6c3c00957a17650cd9bd59bf124e82bf9ef..51ff104adb0878fc505d509264019c406da083c7 100644 --- a/man/sw.do.Rd +++ b/man/sw.do.Rd @@ -4,22 +4,25 @@ \alias{sw.do} \title{Perform MassBank Workflow on Multiple Compound Mixtures} \usage{ -sw.do(fn_data, fn_cmpd_list, mode, rdir = ".", combine = F, proc = F) +sw.do(fn_data, fn_cmpd_l, mode, dest = ".", combine = F, proc = F, + split = 3) } \arguments{ \item{fn_data}{List of mzML data filenames to be processed.} -\item{fn_cmpd_list}{Compound list.} +\item{fn_cmpd_l}{Compound list.} \item{mode}{as in msmsRead.} -\item{rdir}{The root data directory.} +\item{dest}{The destination data directory.} \item{combine}{If TRUE, use combineMultiplicies to merge workspaces corresponding to different collisional energies.} \item{proc}{Split work between this amount of processes. If FALSE (or, 1), run sequential.} + +\item{split}{This is the last step before combine} } \value{ A named list of msmsWorkspace objects. diff --git a/man/v.Rd b/man/v.Rd deleted file mode 100644 index 803834482a3f6d1bce3ed638a5fd3fc26afa4c90..0000000000000000000000000000000000000000 --- a/man/v.Rd +++ /dev/null @@ -1,40 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mix.R -\name{v} -\alias{v} -\title{Vectorised Spectral Workflow.} -\usage{ -v(fn_data, stgs_alist, wd, fn_cmpd_list, mode, readMethod = "mzR", - archdir = "archive", lastStep = 8, combine = F) -} -\arguments{ -\item{fn_data}{A sequence of mzML input files.} - -\item{stgs_alist}{A list of named list of settings, or a list of -filenames of YAML files containing the settings.} - -\item{wd}{The list of working directories.} - -\item{fn_cmpd_list}{The compound list characterising the mixtures.} - -\item{mode}{Same as in msmsRead.} - -\item{readMethod}{Same as in msmsRead.} - -\item{archdir}{Name of the archive.} - -\item{lastStep}{The last step of the spectral workflow.} - -\item{combine}{If TRUE, use combineMultiplicies to merge -workspaces corresponding to different collisional energies.} -} -\value{ -A named list of spectral workspaces. The names are derived -from data filenames. -} -\description{ -Interface to vectorised spectral workflow. -} -\author{ -Todor Kondić -}