Skip to content
Snippets Groups Projects
Commit 4912825a authored by Todor Kondic's avatar Todor Kondic
Browse files

Add README.org

* README.org: New file.
parent 4f195e84
No related branches found
No related tags found
No related merge requests found
......@@ -20,4 +20,6 @@ Collate:
'mix.R'
'run.R'
Imports:
RMassBank
RMassBank,
parallel,
yaml
......@@ -4,6 +4,7 @@ stripext<-function(fn) {
##' Create directories without drama.
##'
##' Create directories without drama.
##'
##' @title Create directories without drama
##' @param path Name of the directory.
......@@ -26,19 +27,39 @@ no_drama_mkdir<-function(path) {
##' @param file The name of the YAML specification that will be merged
##' with the template Rmb settings file.
##' @return NULL
##' @author Todor Kondić
mk_sett_file<-function(sett_alist,file) {
require(yaml)
tmp<-tempfile()
RMassBank::RmbSettingsTemplate(tmp)
sett<-yaml.load_file(tmp)
sett<-yaml::yaml.load_file(tmp)
for (nm in names(sett_alist)) {
sett[[nm]]<-sett_alist[[nm]]
}
write_yaml(x=sett,file=file)
yaml::write_yaml(x=sett,file=file)
NULL
}
##' Combine the RMB settings files
##'
##' Combine RMB settings with different collisional energies into one
##' settings file with multiple collisional energy entries.
##'
##' @title Combine RMB Settings With Different Collisional Energies
##' @param sett_fns A list of settings files.
##' @param fname The name of the combined file.
##' @return fname
##' @author Todor Kondić
mk_combine_file<-function(sett_fns,fname) {
all_settings <- lapply(sett_fns,yaml::yaml.load_file)
comb_settings <- all_settings[[1]]
for (n in 1:length(all_settings)) {
comb_settings$spectraList[[n]] <- all_settings[[n]]$spectraList[[1]]
}
yaml::write_yaml(x=comb_settings,fname)
fname
}
##' Generate the RMassBank compound list from the input compound list
##' in CSV file src_fn. The input compound list format is either a
##' Chemical Dashboard csv file with, at least, PREFERRED_ SMILES
......@@ -86,14 +107,12 @@ gen_comp_list<-function(src_fn,dest_fn) {
##' documentation of `msmsRead` for details.
##' @param archdir The directory to store R objects created during
##' workflow execution.
##' @param lastStep The last step in the workflow. Default is eight.
##' @return MsmsWorkspace object.
##' @author Todor Kondić
single.sw<-function(fn_data,stgs_alist,wd,fn_cmpd_list,mode,readMethod="mzR",archdir="archive") {
require(RMassBank)
require(yaml)
single.sw<-function(fn_data,stgs_alist,wd,fn_cmpd_list,mode,readMethod="mzR",archdir="archive",lastStep=8) {
## Generate settings file and load.
stgs_alist<-if (is.character(stgs_alist)) yaml.load_file(stgs_alist) else stgs_alist
stgs_alist<-if (is.character(stgs_alist)) yaml::yaml.load_file(stgs_alist) else stgs_alist
sfn<-file.path(wd,paste(fn_data,".ini",sep=''))
mk_sett_file(stgs_alist,sfn)
RMassBank::loadRmbSettings(sfn)
......@@ -109,14 +128,14 @@ single.sw<-function(fn_data,stgs_alist,wd,fn_cmpd_list,mode,readMethod="mzR",arc
write.csv(x=df_table,file=fn_table,row.names=F)
## Make empty workspace.
w <- newMsmsWorkspace()
w <- RMassBank::newMsmsWorkspace()
## Run the workflow.
message(paste("Reading in file:",fn_data))
w <-msmsRead(w,filetable=fn_table,readMethod="mzR",mode=mode)
w <-RMassBank::msmsRead(w,filetable=fn_table,readMethod="mzR",mode=mode)
archdir<-file.path(wd,archdir)
if (!dir.exists(archdir)) dir.create(archdir)
fn_arch<-file.path(archdir,paste(fn_data,".archive",sep=''))
RMassBank::msmsWorkflow(w, mode=mode, steps=2:8,archivename=fn_arch)
RMassBank::msmsWorkflow(w, mode=mode, steps=2:lastStep,archivename=fn_arch)
}
......@@ -193,14 +212,95 @@ mb.single<-function(mb,infodir,fn_stgs) {
##' @param mode Same as in msmsRead.
##' @param readMethod Same as in msmsRead.
##' @param archdir Name of the archive.
##' @param lastStep The last step of the spectral workflow.
##' @param combine If TRUE, use combineMultiplicies to merge
##' workspaces corresponding to different collisional energies.
##' @return A named list of spectral workspaces. The names are derived
##' from data filenames.
##' @author Todor Kondić
v<-function(fn_data,stgs_alist,wd,fn_cmpd_list,mode,readMethod="mzR",archdir="archive") {
v<-function(fn_data,stgs_alist,wd,fn_cmpd_list,mode,readMethod="mzR",archdir="archive",lastStep=8,combine=F) {
idir<-function(n) file.path(".",stripext(n))
f<-Vectorize(single.sw,vectorize.args=c("wd","fn_data","stgs_alist"),SIMPLIFY=F)
x<-f(fn_data,stgs_alist,wd,fn_cmpd_list,mode,readMethod=readMethod,archdir=archdir)
names(x)<-basename(fn_data)
x}
rootdir <- getwd()
if (combine) {
z<-f(fn_data,stgs_alist,wd,fn_cmpd_list,mode,readMethod=readMethod,archdir=archdir,lastStep=7)
names(z)<-basename(fn_data)
zz<-RMassBank::combineMultiplicities(z)
combdir<-"combined"
archdir<-file.path(rootdir,combdir,archdir)
no_drama_mkdir(combdir)
no_drama_mkdir(archdir)
fn_arch<-file.path(archdir,"archive")
fn_comb_stgs <- file.path(rootdir,combdir,paste(combdir,".mzML.ini",sep=''))
ddirs <- sapply(names(z),idir)
stgs_fls <- sapply(ddirs,function(x) file.path(x,paste(x,".mzML.ini",sep='')))
mk_combine_file(stgs_fls,fn_comb_stgs)
res<-list(RMassBank::msmsWorkflow(zz, steps=8, mode=mode, archivename = fn_arch))
names(res)<-paste(combdir,".mzML",sep='') #Clearly a hack.
res
} else {
z<-f(fn_data,stgs_alist,wd,fn_cmpd_list,mode,readMethod=readMethod,archdir=archdir,lastStep=lastStep)
names(z)<-basename(fn_data)
z
}
}
##' Interface to parallel spectral workflow.
##'
##'
##' @title Parallel Spectral Workflow.
##' @param fn_data A sequence of mzML input files.
##' @param stgs_alist A list of named list of settings, or a list of
##' filenames of YAML files containing the settings.
##' @param wd The list of working directories.
##' @param fn_cmpd_list The compound list characterising the mixtures.
##' @param mode Same as in msmsRead.
##' @param readMethod Same as in msmsRead.
##' @param archdir Name of the archive.
##' @param lastStep The last step in spectral workflow.
##' @param combine If TRUE, use combineMultiplicies to merge
##' workspaces corresponding to different collisional energies.
##' @param cl Cluster.
##' @return A named list of spectral workspaces. The names are derived
##' from data filenames.
##' @author Todor Kondić
p.sw<-function(fn_data,stgs_alist,wd,fn_cmpd_list,mode,readMethod="mzR",archdir="archive",lastStep=8,combine=F,cl=NULL) {
idir<-function(n) file.path(".",stripext(n))
fnocomb<-function(fn,stgs,wd) {
single.sw(fn,stgs,wd,fn_cmpd_list,mode,readMethod,archdir,lastStep=lastStep)
}
fcomb<-function(fn,stgs,wd) {
single.sw(fn,stgs,wd,fn_cmpd_list,mode,readMethod,archdir,lastStep=7)
}
if (combine) {
rootdir <- getwd()
z<-parallel::clusterMap(cl,fcomb,fn_data,stgs_alist,wd)
names(z)<-basename(fn_data)
zz<-RMassBank::combineMultiplicities(z)
combdir<-"combined"
archdir<-file.path(rootdir,combdir,archdir)
no_drama_mkdir(combdir)
no_drama_mkdir(archdir)
fn_arch<-file.path(archdir,"archive")
fn_comb_stgs <- file.path(rootdir,combdir,paste(combdir,".mzML.ini",sep=''))
ddirs <- sapply(names(z),idir)
stgs_fls <- sapply(ddirs,function(x) file.path(x,paste(x,".mzML.ini",sep='')))
mk_combine_file(stgs_fls,fn_comb_stgs)
res<-list(RMassBank::msmsWorkflow(zz, steps=8, mode=mode, archivename = fn_arch))
names(res)<-paste(combdir,".yml",sep='') #Clearly a hack.
res
} else {
z<-parallel::clusterMap(cl,fnocomb,fn_data,stgs_alist,wd)
names(z)<-basename(fn_data)
z
}
}
##' Interface to vectorised Mass Bank workflow.
##'
......@@ -217,3 +317,20 @@ mb.v<-function(mb,infodir,fn_stgs) {
x<-f(mb,infodir,fn_stgs)
names(x)<-names(mb)
x}
##' Interface to parallelised Mass Bank workflow.
##'
##'
##' @title Parallel Mass Bank Workflow
##' @param mb List of mass bank workflow objects
##' @param infodir List of subdirs containing info lists.
##' @param fn_stgs List of settings files.
##' @param cl Cluster.
##' @return A named list of mbWorkspace objects. The names are derived
##' from the input mb sequence.
##' @author Todor Kondić
mb.p<-function(mb,infodir,fn_stgs,cl=F) {
x<-parallel::clusterMap(cl=cl,mb.single,mb,infodir,fn_stgs)
names(x)<-names(mb)
x}
......@@ -29,16 +29,25 @@ attch<-function(...) paste(...,sep='')
##' @param fn_cmpd_list Compound list.
##' @param mode as in msmsRead.
##' @param rdir The root data directory.
##' @param combine If TRUE, use combineMultiplicies to merge
##' workspaces corresponding to different collisional energies.
##' @param proc Split work between this amount of processes. If FALSE
##' (or, 1), run sequential.
##' @return A named list of msmsWorkspace objects.
##' @author Todor Kondić
##' @export
sw.do<-function(fn_data,fn_cmpd_list,mode,rdir=".") {
sw.do<-function(fn_data,fn_cmpd_list,mode,rdir=".",combine=F,proc=F) {
no_drama_mkdir(rdir)
wdirs<-sapply(basename(fn_data),function(nm) file.path(rdir,stripext(nm)))
sapply(wdirs,no_drama_mkdir)
stgs<-sapply(basename(wdirs),function (nm) paste(nm,"yml",sep='.'))
v(fn_data,stgs,wdirs,fn_cmpd_list,mode)
if (proc) {
cl<-parallel::makeCluster(proc)
p.sw(fn_data,stgs,wdirs,fn_cmpd_list,mode,combine=combine,cl=cl)
} else {
v(fn_data,stgs,wdirs,fn_cmpd_list,mode,combine=combine)
}
}
##' Creates and prepares mbWorkspace objects before the full workflow
......@@ -50,6 +59,7 @@ sw.do<-function(fn_data,fn_cmpd_list,mode,rdir=".") {
##' @title Prepare mbWorkspace objects
##' @param w A list of spectral workspace inputs.
##' @param rdir Data root.
##' @param proc Split work between this amount of processes. If FALSE
##' @return Named list of prepared mbWorkspace objects.
##' @author Todor Kondić
##' @export
......@@ -70,12 +80,20 @@ mb.prep<-function(w,rdir=".") {
##' @title Perform the Mass Bank workflow
##' @param mb The list of prepared mbWorkspace objects.
##' @param rdir Root data dir.
##' @param proc Split work between this amount of processes. If FALSE
##' (or, 1), run sequential.
##' @return The named list of processed mbWorkspace objects.
##' @author Todor Kondić
##' @export
mb.do<-function(mb,rdir=".") {
mb.do<-function(mb,rdir=".",proc=F) {
idir<-function(n) file.path(rdir,stripext(n))
infodir<-sapply(names(mb),function(n) file.path(idir(n),"info"))
fn_stgs<-sapply(names(mb),function(n) file.path(idir(n),attch(n,'.ini')))
mb.v(mb,infodir,fn_stgs)
if (proc) {
cl<-parallel::makeCluster(proc)
mb.p(mb,infodir,fn <- stgs,cl=cl)
} else {
mb.v(mb,infodir,fn_stgs)
}
}
......@@ -4,12 +4,15 @@
\alias{mb.do}
\title{Perform the Mass Bank workflow}
\usage{
mb.do(mb, rdir = ".")
mb.do(mb, rdir = ".", proc = F)
}
\arguments{
\item{mb}{The list of prepared mbWorkspace objects.}
\item{rdir}{Root data dir.}
\item{proc}{Split work between this amount of processes. If FALSE
(or, 1), run sequential.}
}
\value{
The named list of processed mbWorkspace objects.
......
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mix.R
\name{mb.p}
\alias{mb.p}
\title{Parallel Mass Bank Workflow}
\usage{
mb.p(mb, infodir, fn_stgs, cl = F)
}
\arguments{
\item{mb}{List of mass bank workflow objects}
\item{infodir}{List of subdirs containing info lists.}
\item{fn_stgs}{List of settings files.}
\item{cl}{Cluster.}
}
\value{
A named list of mbWorkspace objects. The names are derived
from the input mb sequence.
}
\description{
Interface to parallelised Mass Bank workflow.
}
\author{
Todor Kondić
}
......@@ -10,6 +10,8 @@ mb.prep(w, rdir = ".")
\item{w}{A list of spectral workspace inputs.}
\item{rdir}{Data root.}
\item{proc}{Split work between this amount of processes. If FALSE}
}
\value{
Named list of prepared mbWorkspace objects.
......
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mix.R
\name{mk_combine_file}
\alias{mk_combine_file}
\title{Combine RMB Settings With Different Collisional Energies}
\usage{
mk_combine_file(sett_fns, fname)
}
\arguments{
\item{sett_fns}{A list of settings files.}
\item{fname}{The name of the combined file.}
}
\value{
fname
}
\description{
Combine the RMB settings files
}
\details{
Combine RMB settings with different collisional energies into one
settings file with multiple collisional energy entries.
}
\author{
Todor Kondić
}
......@@ -20,6 +20,3 @@ Produce the Rmb Settings file
Produce the Rmb Settings file based on the customisation file in
YAML format.
}
\author{
Todor Kondić
}
......@@ -15,6 +15,9 @@ The character string containing the input argument \code{path}.
\description{
Create directories without drama.
}
\details{
Create directories without drama.
}
\author{
Todor Kondić
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mix.R
\name{p.sw}
\alias{p.sw}
\title{Parallel Spectral Workflow.}
\usage{
p.sw(fn_data, stgs_alist, wd, fn_cmpd_list, mode, readMethod = "mzR",
archdir = "archive", lastStep = 8, combine = F, cl = NULL)
}
\arguments{
\item{fn_data}{A sequence of mzML input files.}
\item{stgs_alist}{A list of named list of settings, or a list of
filenames of YAML files containing the settings.}
\item{wd}{The list of working directories.}
\item{fn_cmpd_list}{The compound list characterising the mixtures.}
\item{mode}{Same as in msmsRead.}
\item{readMethod}{Same as in msmsRead.}
\item{archdir}{Name of the archive.}
\item{lastStep}{The last step in spectral workflow.}
\item{combine}{If TRUE, use combineMultiplicies to merge
workspaces corresponding to different collisional energies.}
\item{cl}{Cluster.}
}
\value{
A named list of spectral workspaces. The names are derived
from data filenames.
}
\description{
Interface to parallel spectral workflow.
}
\author{
Todor Kondić
}
......@@ -5,7 +5,7 @@
\title{RMassBank Spectral Workflow on a Single Compound Mixture}
\usage{
single.sw(fn_data, stgs_alist, wd, fn_cmpd_list, mode,
readMethod = "mzR", archdir = "archive")
readMethod = "mzR", archdir = "archive", lastStep = 8)
}
\arguments{
\item{fn_data}{A mzML data file.}
......@@ -26,6 +26,8 @@ documentation of \code{msmsRead} for details.}
\item{archdir}{The directory to store R objects created during
workflow execution.}
\item{lastStep}{The last step in the workflow. Default is eight.}
}
\value{
MsmsWorkspace object.
......
......@@ -4,7 +4,7 @@
\alias{sw.do}
\title{Perform MassBank Workflow on Multiple Compound Mixtures}
\usage{
sw.do(fn_data, fn_cmpd_list, mode, rdir = ".")
sw.do(fn_data, fn_cmpd_list, mode, rdir = ".", combine = F, proc = F)
}
\arguments{
\item{fn_data}{List of mzML data filenames to be processed.}
......@@ -14,6 +14,12 @@ sw.do(fn_data, fn_cmpd_list, mode, rdir = ".")
\item{mode}{as in msmsRead.}
\item{rdir}{The root data directory.}
\item{combine}{If TRUE, use combineMultiplicies to merge
workspaces corresponding to different collisional energies.}
\item{proc}{Split work between this amount of processes. If FALSE
(or, 1), run sequential.}
}
\value{
A named list of msmsWorkspace objects.
......
......@@ -5,7 +5,7 @@
\title{Vectorised Spectral Workflow.}
\usage{
v(fn_data, stgs_alist, wd, fn_cmpd_list, mode, readMethod = "mzR",
archdir = "archive")
archdir = "archive", lastStep = 8, combine = F)
}
\arguments{
\item{fn_data}{A sequence of mzML input files.}
......@@ -22,6 +22,11 @@ filenames of YAML files containing the settings.}
\item{readMethod}{Same as in msmsRead.}
\item{archdir}{Name of the archive.}
\item{lastStep}{The last step of the spectral workflow.}
\item{combine}{If TRUE, use combineMultiplicies to merge
workspaces corresponding to different collisional energies.}
}
\value{
A named list of spectral workspaces. The names are derived
......
## Create a temporary directory to hold the files generated on the
## fly.
rmbmix.mkdir<-function() {
nm<-tempfile(pattern="rmbmix")
dir.create(nm)
nm
}
##
rmbmix.mk_sett_file<-function(sett_alist,file) {
require(yaml)
tmp<-tempfile()
RmbSettingsTemplate(tmp)
sett<-yaml.load_file(tmp)
for (nm in names(sett_alist)) {
sett[[nm]]<-sett_alist[[nm]]
}
write_yaml(x=sett,file=file)
}
## Generate the RMassBank compound list from the input compound list
## in CSV file src_fn. The input compound list format is either
## Chemical Dashboard csv file with, at least, PREFERRED_NAMES and
## SMILES columns _filled_ out, or just an ordinary csv file with
## columns SMILES and Names filled. Argument dest_fn is the
## destination filename. Returns the number of compounds.
rmbmix.gen_comp_list<-function(src_fn,dest_fn) {
df<-read.csv(src_fn)
## Names
nms<-if ("PREFERRED_NAME" %in% names(df)) df$PREFERRED_NAME else df$Name
if (is.null(nms)) stop("Unable to read compound names from the input compound list.")
## SMILES
haha<-df$SMILES
sz<-length(haha)
## CAS
casvals<-if ("CASRN" %in% names(df)) df$CASRN else rep(NA,sz)
if (is.null(haha)) stop("Unable to read SMILES from the input compound list.")
outdf<-data.frame(ID=1:sz,Name=nms,SMILES=haha,CAS=casvals,RT=rep(NA,sz))
write.csv(outdf,file=dest_fn,row.names=F,na="")
length(nms)
}
## Perform the compound mixture workflow on the data file called
## fn_data with settings named list called stgs_alist. Alternatively,
## stg_alist can be a file name which follows the RMassBank settings
## specification, also in YAML format, containing only parts that
## differ from the default. Argument fn_cmpd_list is the compound
## list. Argument wd is the scratch dir to hold generated ini files
## and the like. Arguments mode and readMethod are the same as in
## msmsRead.
rmbmix.single<-function(fn_data,stgs_alist,fn_cmpd_list,wd,mode,readMethod="mzR",archdir="archive") {
require(RMassBank)
require(yaml)
## Generate settings file and load.
stgs_alist<-if (is.character(stgs_alist)) yaml.load_file(stgs_alist) else stgs_alist
sfn<-file.path(wd,paste(fn_data,".ini",sep=''))
rmbmix.mk_sett_file(stgs_alist,sfn)
loadRmbSettings(sfn)
## Generate and load the compound list.
fn_comp<-file.path(wd,paste(fn_data,".comp.csv",sep=''))
n_cmpd<-rmbmix.gen_comp_list(fn_cmpd_list,fn_comp)
loadList(fn_comp)
## Generate file table.
df_table<-data.frame(Files=rep(fn_data,n_cmpd),ID=1:n_cmpd)
fn_table<-file.path(wd,paste("fn-table.",fn_data,".csv",sep=''))
write.csv(x=df_table,file=fn_table,row.names=F)
## Make empty workspace.
w <- newMsmsWorkspace()
## Run the workflow.
message(paste("Reading in file:",fn_data))
w <-msmsRead(w,filetable=fn_table,readMethod="mzR",mode=mode)
if (!dir.exists(archdir)) dir.create(archdir)
fn_arch<-file.path(archdir,paste(fn_data,".archive",sep=''))
w<-msmsWorkflow(w, mode=mode, steps=2:8,archivename=fn_arch)
mb<-newMbWorkspace(w)
mb<-resetInfolists(mb)
## loadInfolists
## addPeaks
bits<-strsplit(fn_data,split="\\.")[[1]]
fn_info<-if (length(bits)> 1) paste(head(bits,-1),collapse=".") else fn_data
infodir<-fn_info
fn_info<-paste(archdir,"/",fn_info,".csv",sep='')
mb<-mbWorkflow(mb,infolist_path=fn_info)
list(w=w,mb=mb)
}
rmbmix.mb2.single<-function(mb,infodir) {
mb <- resetInfolists(mb)
mb <- loadInfolists(mb,infodir)
mbWorkflow(mb)
}
rmbmix.mb2<-Vectorize(rmbmix.mb2.single,vectorize.args=c("mb","infodir"),SIMPLIFY=F)
rmbmix<- Vectorize(rmbmix.single,vectorize.args=c("fn_data","stgs_alist"),SIMPLIFY=F)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment