Newer
Older
stripext<-function(fn) {
bits<-strsplit(fn,split="\\.")[[1]]
if (length(bits)> 1) paste(head(bits,-1),collapse=".") else fn}
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
##' Create directories without drama.
##'
##'
##' @title Create directories without drama
##' @param path Name of the directory.
##' @return The character string containing the input argument `path`.
##' @author Todor Kondić
no_drama_mkdir<-function(path) {
if (! dir.exists(path)) dir.create(path)
path
}
##' Produce the Rmb Settings file
##'
##' Produce the Rmb Settings file based on the customisation file in
##' YAML format.
##'
##' @title Generate RMassBank settings file.
##' @param sett_alist The named list of settings that are different
##' from the RMassBank defaults.
##' @param file The name of the YAML specification that will be merged
##' with the template Rmb settings file.
##' @return NULL
##' @author Todor Kondić
mk_sett_file<-function(sett_alist,file) {
require(yaml)
tmp<-tempfile()
RMassBank::RmbSettingsTemplate(tmp)
sett<-yaml.load_file(tmp)
for (nm in names(sett_alist)) {
sett[[nm]]<-sett_alist[[nm]]
}
write_yaml(x=sett,file=file)
NULL
}
##' Generate the RMassBank compound list from the input compound list
##' in CSV file src_fn. The input compound list format is either a
##' Chemical Dashboard csv file with, at least, PREFERRED_ SMILES
##' columns _filled_ out, or just an ordinary CSV file with columns
##' SMILES and Names filled. Argument dest_fn is the destination
##' filename. Returns the number of compounds.
##'
##'
##' @title Generate Compound List File
##' @param src_fn The input compound list CSV filename.
##' @param dest_fn The resulting compound list CSV filename.
##' @return Number of compounds.
##' @author Todor Kondić
gen_comp_list<-function(src_fn,dest_fn) {
df<-read.csv(src_fn)
## Names
nms<-if ("PREFERRED_NAME" %in% names(df)) df$PREFERRED_NAME else df$Name
if (is.null(nms)) stop("Unable to read compound names from the input compound list.")
## SMILES
haha<-df$SMILES
sz<-length(haha)
## CAS
casvals<-if ("CASRN" %in% names(df)) df$CASRN else rep(NA,sz)
if (is.null(haha)) stop("Unable to read SMILES from the input compound list.")
outdf<-data.frame(ID=1:sz,Name=nms,SMILES=haha,CAS=casvals,RT=rep(NA,sz))
write.csv(outdf,file=dest_fn,row.names=F,na="")
length(nms)
}
##' Runs a compound mixture workflow on a single mzML file.
##'
##' @title RMassBank Spectral Workflow on a Single Compound Mixture
##' @param fn_data A mzML data file.
##' @param stgs_alist RMassBank settings. It can either be a named
##' list of settings, or a filename of a YAML file.
##' @param wd The name of the work directory.
##' @param fn_cmpd_list The file name of he compound list
##' corresponding to `fn_data`.
##' @param mode Modes as described in the standard workflow vignette
##' of RMassBank.
##' @param readMethod Default read method is "mzR". Consult the
##' documentation of `msmsRead` for details.
##' @param archdir The directory to store R objects created during
##' workflow execution.
##' @param lastStep The last step in the workflow. Default is eight.
##' @return MsmsWorkspace object.
##' @author Todor Kondić
single.sw<-function(fn_data,stgs_alist,wd,fn_cmpd_list,mode,readMethod="mzR",archdir="archive",lastStep=8) {
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
require(RMassBank)
require(yaml)
## Generate settings file and load.
stgs_alist<-if (is.character(stgs_alist)) yaml.load_file(stgs_alist) else stgs_alist
sfn<-file.path(wd,paste(fn_data,".ini",sep=''))
mk_sett_file(stgs_alist,sfn)
RMassBank::loadRmbSettings(sfn)
## Generate and load the compound list.
fn_comp<-file.path(wd,paste(fn_data,".comp.csv",sep=''))
n_cmpd<-gen_comp_list(fn_cmpd_list,fn_comp)
RMassBank::loadList(fn_comp)
## Generate file table.
df_table<-data.frame(Files=rep(fn_data,n_cmpd),ID=1:n_cmpd)
fn_table<-file.path(wd,paste("fn-table.",fn_data,".csv",sep=''))
write.csv(x=df_table,file=fn_table,row.names=F)
## Make empty workspace.
w <- newMsmsWorkspace()
## Run the workflow.
message(paste("Reading in file:",fn_data))
w <-msmsRead(w,filetable=fn_table,readMethod="mzR",mode=mode)
archdir<-file.path(wd,archdir)
if (!dir.exists(archdir)) dir.create(archdir)
fn_arch<-file.path(archdir,paste(fn_data,".archive",sep=''))
RMassBank::msmsWorkflow(w, mode=mode, steps=2:lastStep,archivename=fn_arch)
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
}
##' Prepare single mbWorkspace object based on the workspace, the
##' infolist name and RMassBank settings.
##'
##'
##' @title Prepare Single mbWorkspace object
##' @param w MsmsWorkspace object.
##' @param fn_info Filename of the infolist to be generated.
##' @param fn_stgs Filename of the RMassBank settings.
##' @return A mbWorkspace object.
##' @author Todor Kondić
mb.prep.single<-function(w,fn_info,fn_stgs) {
RMassBank::loadRmbSettings(fn_stgs)
mb <- RMassBank::newMbWorkspace(w)
RMassBank::resetInfolists(mb)
RMassBank::mbWorkflow(mb,infolist_path=fn_info)
}
##' Vectorize mb.prep function.
##'
##'
##' @title Vectorized mb.prep function.
##' @param w A sequence of msmsWorkspaces.
##' @param fn_info A sequence of infolist filenams to be generated.
##' @param fn_stgs A sequence of settings associated with each
##' msmsWorkspace object.
##' @return A list of mbWorkspaces.
##' @author Todor Kondić
mb.prep.v<-function(w,fn_info,fn_stgs) {
f<-Vectorize(mb.prep.single,vectorize.args=c("w","fn_info","fn_stgs"),SIMPLIFY=F)
res<-f(w,fn_info,fn_stgs)
names(res)<-names(w)
res
}
##' Performs a single MassBank workflow after preparation.
##'
##'
##' @title Single MassBank workflow.
##' @param mb A mbWorkspace object.
##' @param infodir Directory containing the infolist.
##' @param fn_stgs The settings associated with the mbWorkspace
##' object.
##' @return A mbWorkflow object.
##' @author Todor Kondić
mb.single<-function(mb,infodir,fn_stgs) {
RMassBank::loadRmbSettings(fn_stgs)
mb <- RMassBank::resetInfolists(mb)
mb <- RMassBank::loadInfolists(mb,infodir)
## loadInfolists
## addPeaks
prevd<-setwd(infodir)
res<-RMassBank::mbWorkflow(mb,step=1:8)
setwd(prevd)
res
##' Interface to vectorised spectral workflow.
##'
##' @title Vectorised Spectral Workflow.
##' @param fn_data A sequence of mzML input files.
##' @param stgs_alist A list of named list of settings, or a list of
##' filenames of YAML files containing the settings.
##' @param wd The list of working directories.
##' @param fn_cmpd_list The compound list characterising the mixtures.
##' @param mode Same as in msmsRead.
##' @param readMethod Same as in msmsRead.
##' @param archdir Name of the archive.
##' @return A named list of spectral workspaces. The names are derived
##' from data filenames.
##' @author Todor Kondić
v<-function(fn_data,stgs_alist,wd,fn_cmpd_list,mode,readMethod="mzR",archdir="archive") {
f<-Vectorize(single.sw,vectorize.args=c("wd","fn_data","stgs_alist"),SIMPLIFY=F)
x<-f(fn_data,stgs_alist,wd,fn_cmpd_list,mode,readMethod=readMethod,archdir=archdir)
names(x)<-basename(fn_data)
x}
##' Interface to vectorised Mass Bank workflow.
##'
##' @title Vectorised Mass Bank Workflow
##' @param mb List of mass bank workflow objects
##' @param infodir List of subdirs containing info lists.
##' @param fn_stgs List of settings files.
##' @return A named list of mbWorkspace objects. The names are derived
##' from the input mb sequence.
##' @author Todor Kondić
mb.v<-function(mb,infodir,fn_stgs) {
f<-Vectorize(mb.single,vectorize.args=c("mb","infodir","fn_stgs"),SIMPLIFY=F)
x<-f(mb,infodir,fn_stgs)
names(x)<-names(mb)
x}