Skip to content
Snippets Groups Projects
Commit 9bcf7c6c authored by Todor Kondic's avatar Todor Kondic
Browse files

Streamline mix.R and run.R

More sensible (re)loading of the configuration files, encapsulation of
certain operations and other fixes

* mix.R(get_.*): Generate various filenames.

* mix.R(various intermediate wrappers around RMassBank calls):
  removed.

* run.R(presc.do): Adjust to changes in mix.R

* man: Update docs.

* NAMESPACE: Update exports.
parent f1a30085
No related branches found
No related tags found
No related merge requests found
...@@ -3,8 +3,5 @@ ...@@ -3,8 +3,5 @@
export(mb.do) export(mb.do)
export(mb.prep) export(mb.prep)
export(presc.do) export(presc.do)
export(presc.p)
export(presc.plot) export(presc.plot)
export(presc.single)
export(presc.v)
export(sw.do) export(sw.do)
...@@ -70,6 +70,9 @@ fn_data2wd <- function(fn_data,dest) { ...@@ -70,6 +70,9 @@ fn_data2wd <- function(fn_data,dest) {
},vectorize.args="fn_data") },vectorize.args="fn_data")
f(fn_data) f(fn_data)
} }
get_presc_d <- function(wd) { file.path(wd,"prescreen")}
gen_presc_d <- function(wd) { no_drama_mkdir(get_presc_d(wd))}
...@@ -86,7 +89,7 @@ get_stgs_fn <- function(wd) { ...@@ -86,7 +89,7 @@ get_stgs_fn <- function(wd) {
} }
get_ftable_fn <- function(wd) { get_ftable_fn <- function(wd) {
f <- function(wd) file.path(wd,"ftable.ini") f <- function(wd) file.path(wd,"ftable.csv")
fv <- Vectorize(f,vectorize.args=c("wd")) fv <- Vectorize(f,vectorize.args=c("wd"))
fv(wd) fv(wd)
} }
...@@ -229,61 +232,6 @@ reconf <- function(wd) {## Load the settings. ...@@ -229,61 +232,6 @@ reconf <- function(wd) {## Load the settings.
RMassBank::loadList(fn_cmpd_l) RMassBank::loadList(fn_cmpd_l)
} }
##' Wrapper for a single prescreening call. Produces output in the
##' usual mix method places.
##'
##' @title Wrapper for RMB_EIC_Prescreen
##' @param fn_data The mzML filename.
##' @param stgs_alist Settings named list, or a settings filename.
##' @param wd Directory under which results are archived.
##' @param mode RMB mode.
##' @param fn_cmpd_l Filename of the compound list.
##' @param ppm_lim_fine The ppm_limit_fine argument to RMB_EIC_Prescreen
##' @param EIC_limit Passed down to RMB_EIC_Prescreen.
##' @return result of RMB_EIC_Prescreen
##' @author Todor Kondić
##' @export
presc.single <- function(fn_data,stgs_alist,wd,mode,fn_cmpd_l,ppm_lim_fine=10,EIC_limit=0.001) {
no_drama_mkdir(wd)
gen_stgs_and_load(stgs_alist,wd)
## Generate and load the compound list.
x <- gen_cmpdl_and_load(wd,fn_cmpd_l)
fn_cmpd_l <- x$fn_cmpdl
n_cmpd <- x$n
## Generate file table.
fn_table <- gen_file_table(fn_data,n_cmpd,wd)
#curd <- setwd(wd)
res <-RMB_EIC_prescreen_df(wd=wd,RMB_mode=mode, FileList=fn_table,
cmpd_list=fn_cmpd_l,
ppm_limit_fine=ppm_lim_fine,
EIC_limit=EIC_limit)
#setwd(curd)
res
}
##' Vectorises presc.single.
##'
##' @title Vectorises presc.single
##' @param fn_data Sequence of mzML filenames.
##' @param fn_cmpd_l Compound list filename.
##' @param mode RMB mode.
##' @param ppm_lim_fine Prescreen fine limit (see ReSOLUTION prescreening function).
##' @param EIC_limit Prescreen EIC limit (see ReSOLUTION prescreening function).
##' @return Nothing useful.
##' @author Todor Kondić
##' @export
presc.v<-function(fn_data,fn_cmpd_l,mode,ppm_lim_fine=10,EIC_limit=0.001) {
idir<-function(n) file.path(".",stripext(n))
wd <- sapply(fn_data,idir)
stgs_alist <- sapply(wd,function(d) {paste(d,".ini",sep='')})
f<-Vectorize(presc.single,vectorize.args=c("fn_data","stgs_alist","wd"),SIMPLIFY=F)
f(fn_data,stgs_alist,wd,mode=mode,fn_cmpd_l=fn_cmpd_l,ppm_lim_fine=ppm_lim_fine,EIC_limit=EIC_limit)
}
##' Prescreens. Writes data out. Adapted from ReSOLUTION ##' Prescreens. Writes data out. Adapted from ReSOLUTION
##' ##'
##' ##'
...@@ -367,33 +315,6 @@ RMB_EIC_prescreen_df <- function (wd, RMB_mode, FileList, cmpd_list, ...@@ -367,33 +315,6 @@ RMB_EIC_prescreen_df <- function (wd, RMB_mode, FileList, cmpd_list,
row.names = F) row.names = F)
} }
##' Parallel version of presc.single.
##'
##' @title Parallel version of presc.single
##' @param cl Cluster object.
##' @param fn_data Sequence of mzML files.
##' @param fn_cmpd_l Filename of the compound list.
##' @param mode RMB mode.
##' @param ppm_lim_fine See ReSOLUTION.
##' @param EIC_limit See ReSOLUTION.
##' @return Nothing useful.
##' @author Todor Kondić
##' @export
presc.p<-function(cl,fn_data,fn_cmpd_l,mode,ppm_lim_fine=10,EIC_limit=0.001) {
idir<-function(n) file.path(".",stripext(n))
wd <- sapply(fn_data,idir)
stgs_alist <- sapply(wd,function(d) {paste(d,".ini",sep='')})
f <- function(fn_data,stgs_alist,wd) presc.single(fn_data=fn_data,stgs_alist=stgs_alist,wd=wd,mode=mode,
fn_cmpd_l=fn_cmpd_l,ppm_lim_fine=ppm_lim_fine,EIC_limit=EIC_limit)
parallel::clusterMap(cl,fun=f,fn_data,stgs_alist,wd)
}
##' Plot the output of prescreen. ##' Plot the output of prescreen.
##' ##'
##' @title Plot the Output of Prescreen ##' @title Plot the Output of Prescreen
...@@ -414,7 +335,7 @@ presc.plot <- function(wd,out="prescreen.pdf",pal="Dark2",cex=0.75,digits=6) { ...@@ -414,7 +335,7 @@ presc.plot <- function(wd,out="prescreen.pdf",pal="Dark2",cex=0.75,digits=6) {
for (i in seq(length(eics))) { for (i in seq(length(eics))) {
eic <- eics[[i]] eic <- eics[[i]]
maybekid <- maybekids[[i]] maybekid <- maybekids[[i]]
fn_ini <- lapply(wd,function(x) file.path(x,list.files(path=x,patt="*.ini")[[1]])) fn_ini <- lapply(wd,get_stgs_fn)
lbls <- lapply(fn_ini,function(x) {s <- yaml::yaml.load_file(x);s$spectraList[[1]]$ce}) lbls <- lapply(fn_ini,function(x) {s <- yaml::yaml.load_file(x);s$spectraList[[1]]$ce})
plot.new() plot.new()
......
...@@ -13,20 +13,34 @@ attch<-function(...) paste(...,sep='') ...@@ -13,20 +13,34 @@ attch<-function(...) paste(...,sep='')
##' @title Prescreening on bunch of files. ##' @title Prescreening on bunch of files.
##' @param fn_data The mzML files. Basis for the out directory name ##' @param fn_data The mzML files. Basis for the out directory name
##' generation. ##' generation.
##' @param fn_cmpd_list The compound list CSV. ##' @param fn_cmpd_l The compound list.
##' @param mode RMB mode. ##' @param mode RMB mode.
##' @param dest Destination directory.
##' @param proc Amount of processors, or FALSE. ##' @param proc Amount of processors, or FALSE.
##' @param fn_cmpd_list The compound list CSV.
##' @return Nothing useful. ##' @return Nothing useful.
##' @author Todor Kondić ##' @author Todor Kondić
##' @export ##' @export
presc.do<-function(fn_data,fn_cmpd_list,mode,proc=F) { presc.do<-function(fn_data,fn_cmpd_l,mode,dest=".",proc=F,...) {
conf(fn_data,fn_cmpd_l,dest)
fread <- function(fn_data) {
wd <- fn_data2wd(fn_data,dest)
gen_presc_d(wd)
reconf(wd)
message("Currently processing: ",wd)
fn_ftable <- get_ftable_fn(wd)
fn_cmpd_l <- get_cmpd_l_fn(wd)
RMB_EIC_prescreen_df(wd=wd,RMB_mode=mode,FileList=fn_ftable,
cmpd_list=fn_cmpd_l,...)
}
if (proc) { if (proc) {
cl<-parallel::makeCluster(proc,type='FORK') cl<-parallel::makeCluster(proc)
presc.p(cl=cl,fn_data,fn_cmpd_l=fn_cmpd_list,mode=mode) parallel::clusterEvalQ(cl,library(rmbmix))
parallel::clusterMap(cl,fread,fn_data)
} else { } else {
presc.v(fn_data,fn_cmpd_l=fn_cmpd_list,mode) lapply(fn_data,fread)
} }
} }
......
% Generated by roxygen2: do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mix.R % Please edit documentation in R/mix.R
\name{gen_comp_list} \name{gen_cmpd_l}
\alias{gen_comp_list} \alias{gen_cmpd_l}
\title{Generate Compound List File} \title{Generate Compound List File}
\usage{ \usage{
gen_comp_list(src_fn, dest_fn) gen_cmpd_l(src_fn, dest_fn)
} }
\arguments{ \arguments{
\item{src_fn}{The input compound list CSV filename.} \item{src_fn}{The input compound list CSV filename.}
......
...@@ -4,12 +4,12 @@ ...@@ -4,12 +4,12 @@
\alias{gen_cmpdl_and_load} \alias{gen_cmpdl_and_load}
\title{Generate and Load the RMassBank Compound List} \title{Generate and Load the RMassBank Compound List}
\usage{ \usage{
gen_cmpdl_and_load(fn_data, wd, fn_cmpdl) gen_cmpdl_and_load(wd, fn_cmpdl)
} }
\arguments{ \arguments{
\item{fn_data}{The mzML filename.}
\item{wd}{Directory under which results are archived.} \item{wd}{Directory under which results are archived.}
\item{fn_cmpdl}{The input compound list filename.}
} }
\value{ \value{
Named list. The key \code{fn_cmpdl} is the path of the Named list. The key \code{fn_cmpdl} is the path of the
......
% Generated by roxygen2: do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mix.R % Please edit documentation in R/mix.R
\name{gen_file_table} \name{gen_ftable}
\alias{gen_file_table} \alias{gen_ftable}
\title{Generate and Load the RMassBank Settings File} \title{Generate and Load the RMassBank Settings File}
\usage{ \usage{
gen_file_table(fn_data, n_cmpd, wd) gen_ftable(fn_data, wd, n_cmpd)
} }
\arguments{ \arguments{
\item{fn_data}{The mzML filename.} \item{fn_data}{The mzML filename.}
\item{n_cmpd}{Number of compounds.}
\item{wd}{Directory under which results are archived.} \item{wd}{Directory under which results are archived.}
\item{n_cmpd}{Number of compounds.}
} }
\value{ \value{
File path of the file table. File path of the file table.
......
...@@ -4,11 +4,9 @@ ...@@ -4,11 +4,9 @@
\alias{gen_stgs_and_load} \alias{gen_stgs_and_load}
\title{Generate and Load the RMassBank Settings File} \title{Generate and Load the RMassBank Settings File}
\usage{ \usage{
gen_stgs_and_load(fn_data, stgs, wd) gen_stgs_and_load(stgs, wd)
} }
\arguments{ \arguments{
\item{fn_data}{The mzML filename.}
\item{stgs}{Settings named list, or a settings filename.} \item{stgs}{Settings named list, or a settings filename.}
\item{wd}{Directory under which results are archived.} \item{wd}{Directory under which results are archived.}
......
...@@ -4,13 +4,11 @@ ...@@ -4,13 +4,11 @@
\alias{mb.do} \alias{mb.do}
\title{Perform the Mass Bank workflow} \title{Perform the Mass Bank workflow}
\usage{ \usage{
mb.do(mb, rdir = ".", proc = F) mb.do(mb, proc = F)
} }
\arguments{ \arguments{
\item{mb}{The list of prepared mbWorkspace objects.} \item{mb}{The list of prepared mbWorkspace objects.}
\item{rdir}{Root data dir.}
\item{proc}{Split work between this amount of processes. If FALSE \item{proc}{Split work between this amount of processes. If FALSE
(or, 1), run sequential.} (or, 1), run sequential.}
} }
......
...@@ -4,14 +4,10 @@ ...@@ -4,14 +4,10 @@
\alias{mb.prep} \alias{mb.prep}
\title{Prepare mbWorkspace objects} \title{Prepare mbWorkspace objects}
\usage{ \usage{
mb.prep(w, rdir = ".") mb.prep(w)
} }
\arguments{ \arguments{
\item{w}{A list of spectral workspace inputs.} \item{w}{A list of spectral workspace inputs.}
\item{rdir}{Data root.}
\item{proc}{Split work between this amount of processes. If FALSE}
} }
\value{ \value{
Named list of prepared mbWorkspace objects. Named list of prepared mbWorkspace objects.
......
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mix.R
\name{mb.prep.single}
\alias{mb.prep.single}
\title{Prepare Single mbWorkspace object}
\usage{
mb.prep.single(w, fn_info, fn_stgs)
}
\arguments{
\item{w}{MsmsWorkspace object.}
\item{fn_info}{Filename of the infolist to be generated.}
\item{fn_stgs}{Filename of the RMassBank settings.}
}
\value{
A mbWorkspace object.
}
\description{
Prepare single mbWorkspace object based on the workspace, the
infolist name and RMassBank settings.
}
\author{
Todor Kondić
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mix.R
\name{mb.prep.v}
\alias{mb.prep.v}
\title{Vectorized mb.prep function.}
\usage{
mb.prep.v(w, fn_info, fn_stgs)
}
\arguments{
\item{w}{A sequence of msmsWorkspaces.}
\item{fn_info}{A sequence of infolist filenams to be generated.}
\item{fn_stgs}{A sequence of settings associated with each
msmsWorkspace object.}
}
\value{
A list of mbWorkspaces.
}
\description{
Vectorize mb.prep function.
}
\author{
Todor Kondić
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mix.R
\name{mb.single}
\alias{mb.single}
\title{Single MassBank workflow.}
\usage{
mb.single(mb, infodir, fn_stgs)
}
\arguments{
\item{mb}{A mbWorkspace object.}
\item{infodir}{Directory containing the infolist.}
\item{fn_stgs}{The settings associated with the mbWorkspace
object.}
}
\value{
A mbWorkflow object.
}
\description{
Performs a single MassBank workflow after preparation.
}
\author{
Todor Kondić
}
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
no_drama_mkdir(path) no_drama_mkdir(path)
} }
\arguments{ \arguments{
\item{path}{Name of the directory.} \item{path}{Names of the directories.}
} }
\value{ \value{
The character string containing the input argument \code{path}. The character string containing the input argument \code{path}.
......
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mix.R
\name{p.sw}
\alias{p.sw}
\title{Parallel Spectral Workflow.}
\usage{
p.sw(cl, fn_data, stgs_alist, wd, fn_cmpd_list, mode, readMethod = "mzR",
archdir = "archive", lastStep = 8, combine = F,
combdest = "combined")
}
\arguments{
\item{cl}{Cluster.}
\item{fn_data}{A sequence of mzML input files.}
\item{stgs_alist}{A list of named list of settings, or a list of
filenames of YAML files containing the settings.}
\item{wd}{The list of working directories.}
\item{fn_cmpd_list}{The compound list characterising the mixtures.}
\item{mode}{Same as in msmsRead.}
\item{readMethod}{Same as in msmsRead.}
\item{archdir}{Name of the archive.}
\item{lastStep}{The last step in spectral workflow.}
\item{combine}{If TRUE, use combineMultiplicies to merge
workspaces corresponding to different collisional energies.}
\item{combdest}{Combine destination directory.}
}
\value{
A named list of spectral workspaces. The names are derived
from data filenames.
}
\description{
Interface to parallel spectral workflow.
}
\author{
Todor Kondić
}
...@@ -4,17 +4,21 @@ ...@@ -4,17 +4,21 @@
\alias{presc.do} \alias{presc.do}
\title{Prescreening on bunch of files.} \title{Prescreening on bunch of files.}
\usage{ \usage{
presc.do(fn_data, fn_cmpd_list, mode, proc = F) presc.do(fn_data, fn_cmpd_l, mode, dest = ".", proc = F, ...)
} }
\arguments{ \arguments{
\item{fn_data}{The mzML files. Basis for the out directory name \item{fn_data}{The mzML files. Basis for the out directory name
generation.} generation.}
\item{fn_cmpd_list}{The compound list CSV.} \item{fn_cmpd_l}{The compound list.}
\item{mode}{RMB mode.} \item{mode}{RMB mode.}
\item{dest}{Destination directory.}
\item{proc}{Amount of processors, or FALSE.} \item{proc}{Amount of processors, or FALSE.}
\item{fn_cmpd_list}{The compound list CSV.}
} }
\value{ \value{
Nothing useful. Nothing useful.
......
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mix.R
\name{presc.p}
\alias{presc.p}
\title{Parallel version of presc.single}
\usage{
presc.p(cl, fn_data, fn_cmpd_l, mode, ppm_lim_fine = 10,
EIC_limit = 0.001)
}
\arguments{
\item{cl}{Cluster object.}
\item{fn_data}{Sequence of mzML files.}
\item{fn_cmpd_l}{Filename of the compound list.}
\item{mode}{RMB mode.}
\item{ppm_lim_fine}{See ReSOLUTION.}
\item{EIC_limit}{See ReSOLUTION.}
}
\value{
Nothing useful.
}
\description{
Parallel version of presc.single.
}
\author{
Todor Kondić
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mix.R
\name{presc.single}
\alias{presc.single}
\title{Wrapper for RMB_EIC_Prescreen}
\usage{
presc.single(fn_data, stgs_alist, wd, mode, fn_cmpd_l, ppm_lim_fine = 10,
EIC_limit = 0.001)
}
\arguments{
\item{fn_data}{The mzML filename.}
\item{stgs_alist}{Settings named list, or a settings filename.}
\item{wd}{Directory under which results are archived.}
\item{mode}{RMB mode.}
\item{fn_cmpd_l}{Filename of the compound list.}
\item{ppm_lim_fine}{The ppm_limit_fine argument to RMB_EIC_Prescreen}
\item{EIC_limit}{Passed down to RMB_EIC_Prescreen.}
}
\value{
result of RMB_EIC_Prescreen
}
\description{
Wrapper for a single prescreening call. Produces output in the
usual mix method places.
}
\author{
Todor Kondić
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mix.R
\name{presc.v}
\alias{presc.v}
\title{Vectorises presc.single}
\usage{
presc.v(fn_data, fn_cmpd_l, mode, ppm_lim_fine = 10, EIC_limit = 0.001)
}
\arguments{
\item{fn_data}{Sequence of mzML filenames.}
\item{fn_cmpd_l}{Compound list filename.}
\item{mode}{RMB mode.}
\item{ppm_lim_fine}{Prescreen fine limit (see ReSOLUTION prescreening function).}
\item{EIC_limit}{Prescreen EIC limit (see ReSOLUTION prescreening function).}
}
\value{
Nothing useful.
}
\description{
Vectorises presc.single.
}
\author{
Todor Kondić
}
...@@ -4,12 +4,13 @@ ...@@ -4,12 +4,13 @@
\alias{sw.do} \alias{sw.do}
\title{Perform MassBank Workflow on Multiple Compound Mixtures} \title{Perform MassBank Workflow on Multiple Compound Mixtures}
\usage{ \usage{
sw.do(fn_data, fn_cmpd_list, mode, dest = ".", combine = F, proc = F) sw.do(fn_data, fn_cmpd_l, mode, dest = ".", combine = F, proc = F,
split = 3)
} }
\arguments{ \arguments{
\item{fn_data}{List of mzML data filenames to be processed.} \item{fn_data}{List of mzML data filenames to be processed.}
\item{fn_cmpd_list}{Compound list.} \item{fn_cmpd_l}{Compound list.}
\item{mode}{as in msmsRead.} \item{mode}{as in msmsRead.}
...@@ -20,6 +21,8 @@ workspaces corresponding to different collisional energies.} ...@@ -20,6 +21,8 @@ workspaces corresponding to different collisional energies.}
\item{proc}{Split work between this amount of processes. If FALSE \item{proc}{Split work between this amount of processes. If FALSE
(or, 1), run sequential.} (or, 1), run sequential.}
\item{split}{This is the last step before combine}
} }
\value{ \value{
A named list of msmsWorkspace objects. A named list of msmsWorkspace objects.
......
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mix.R
\name{sw.single.1}
\alias{sw.single.1}
\title{RMassBank Spectral Workflow on a Single Compound Mixture (step 1)}
\usage{
sw.single.1(fn_data, stgs_alist, wd, fn_cmpd_list, mode,
readMethod = "mzR")
}
\arguments{
\item{fn_data}{A mzML data file.}
\item{stgs_alist}{RMassBank settings. It can either be a named
list of settings, or a filename of a YAML file.}
\item{wd}{The name of the work directory.}
\item{fn_cmpd_list}{The file name of he compound list
corresponding to \code{fn_data}.}
\item{mode}{Modes as described in the standard workflow vignette
of RMassBank.}
\item{readMethod}{Default read method is "mzR". Consult the
documentation of \code{msmsRead} for details.}
}
\value{
MsmsWorkspace object.
}
\description{
Runs the first step of the compound mixture workflow on a single mzML file.
}
\author{
Todor Kondić
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment