Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
shinyscreen
Manage
Activity
Members
Labels
Plan
Wiki
External wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Environmental Cheminformatics
shinyscreen
Commits
b97e2d51
Commit
b97e2d51
authored
5 years ago
by
Todor Kondic
Browse files
Options
Downloads
Patches
Plain Diff
Major reorg
parent
787e47f1
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
R/mix.R
+115
-272
115 additions, 272 deletions
R/mix.R
R/run.R
+58
-22
58 additions, 22 deletions
R/run.R
with
173 additions
and
294 deletions
R/mix.R
+
115
−
272
View file @
b97e2d51
...
...
@@ -7,13 +7,14 @@ stripext<-function(fn) {
##' Create directories without drama.
##'
##' @title Create directories without drama
##' @param path Name of the director
y
.
##' @param path Name
s
of the director
ies
.
##' @return The character string containing the input argument `path`.
##' @author Todor Kondić
no_drama_mkdir
<-
function
(
path
)
{
if
(
!
dir.exists
(
path
))
dir.create
(
path
)
path
f
<-
Vectorize
(
function
(
path
)
{
if
(
!
dir.exists
(
path
))
dir.create
(
path
)
path
},
vectorize.args
=
"path"
)
f
(
path
)
}
##' Produce the Rmb Settings file
...
...
@@ -60,6 +61,59 @@ mk_combine_file<-function(sett_fns,fname) {
fname
}
fn_data2wd
<-
function
(
fn_data
,
dest
)
{
f
<-
Vectorize
(
function
(
fn_data
)
{
noext
<-
stripext
(
fn_data
)
file.path
(
dest
,
basename
(
noext
))
},
vectorize.args
=
"fn_data"
)
f
(
fn_data
)
}
get_cmpd_l_fn
<-
function
(
wd
)
{
f
<-
function
(
wd
)
file.path
(
wd
,
"compounds.csv"
)
fv
<-
Vectorize
(
f
,
vectorize.args
=
c
(
"wd"
))
fv
(
wd
)
}
get_stgs_fn
<-
function
(
wd
)
{
f
<-
function
(
wd
)
file.path
(
wd
,
"settings.ini"
)
fv
<-
Vectorize
(
f
,
vectorize.args
=
c
(
"wd"
))
fv
(
wd
)
}
get_ftable_fn
<-
function
(
wd
)
{
f
<-
function
(
wd
)
file.path
(
wd
,
"ftable.ini"
)
fv
<-
Vectorize
(
f
,
vectorize.args
=
c
(
"wd"
))
fv
(
wd
)
}
get_inp_stgs_fn
<-
function
(
fn_data
)
{
f
<-
Vectorize
(
function
(
fn_data
)
{
bnm
<-
stripext
(
fn_data
)
fn
<-
paste
(
bnm
,
".ini"
,
sep
=
''
)},
vectorize.args
=
"fn_data"
)
f
(
fn_data
)}
get_info_dir
<-
function
(
wd
)
{
file.path
(
wd
,
"info"
)
}
get_info_fn
<-
function
(
wd
)
{
file.path
(
get_info_dir
(
wd
),
"info.csv"
)
}
gen_info_dir
<-
function
(
wd
)
{
nm
<-
get_info_dir
(
wd
)
no_drama_mkdir
(
nm
)
nm
}
##' Generate the RMassBank compound list from the input compound list
##' in CSV file src_fn. The input compound list format is either a
##' Chemical Dashboard csv file with, at least, PREFERRED_ SMILES
...
...
@@ -67,13 +121,12 @@ mk_combine_file<-function(sett_fns,fname) {
##' SMILES and Names filled. Argument dest_fn is the destination
##' filename. Returns the number of compounds.
##'
##'
##' @title Generate Compound List File
##' @param src_fn The input compound list CSV filename.
##' @param dest_fn The resulting compound list CSV filename.
##' @return Number of compounds.
##' @author Todor Kondić
gen_c
o
mp_l
ist
<-
function
(
src_fn
,
dest_fn
)
{
gen_cmp
d
_l
<-
function
(
src_fn
,
dest_fn
)
{
df
<-
read.csv
(
src_fn
)
## Names
nms
<-
if
(
"PREFERRED_NAME"
%in%
names
(
df
))
df
$
PREFERRED_NAME
else
df
$
Name
...
...
@@ -87,41 +140,40 @@ gen_comp_list<-function(src_fn,dest_fn) {
casvals
<-
if
(
"CASRN"
%in%
names
(
df
))
df
$
CASRN
else
rep
(
NA
,
sz
)
if
(
is.null
(
haha
))
stop
(
"Unable to read SMILES from the input compound list."
)
outdf
<-
data.frame
(
ID
=
1
:
sz
,
Name
=
nms
,
SMILES
=
haha
,
CAS
=
casvals
,
RT
=
rep
(
NA
,
sz
))
write.csv
(
outdf
,
file
=
dest_fn
,
row.names
=
F
,
na
=
""
)
f
<-
Vectorize
(
function
(
dest_fn
)
{
write.csv
(
outdf
,
file
=
dest_fn
,
row.names
=
F
,
na
=
""
)
},
vectorize.args
=
"dest_fn"
,
SIMPLIFY
=
F
)
f
(
dest_fn
)
length
(
nms
)
}
##' Generates settings file and loads it.
##'
##'
##' @title Generate and Load the RMassBank Settings File
##' @param fn_data The mzML filename.
##' @param stgs Settings named list, or a settings filename.
##' @param wd Directory under which results are archived.
##' @return result of RMassBank::loadRmbSettings
##' @author Todor Kondić
gen_stgs_and_load
<-
function
(
fn_data
,
stgs
,
wd
)
{
gen_stgs_and_load
<-
function
(
stgs
,
wd
)
{
stgs
<-
if
(
is.character
(
stgs
))
yaml
::
yaml.load_file
(
stgs
)
else
stgs
sfn
<-
file.path
(
wd
,
paste
(
basename
(
fn_data
),
".ini"
,
sep
=
''
)
)
sfn
<-
get_stgs_fn
(
wd
)
mk_sett_file
(
stgs
,
sfn
)
RMassBank
::
loadRmbSettings
(
sfn
)
}
##' Generates the RMassBank compound list and loads it.
##'
##'
##' @title Generate and Load the RMassBank Compound List
##' @param fn_data The mzML filename.
##' @param wd Directory under which results are archived.
##' @param fn_cmpdl The input compound list filename.
##' @return Named list. The key `fn_cmpdl` is the path of the
##' generated compound list and the key `n` the number of
##' compounds.
##' @author Todor Kondić
gen_cmpdl_and_load
<-
function
(
fn_data
,
wd
,
fn_cmpdl
)
{
wd
<-
normalizePath
(
wd
)
fn_data
<-
normalizePath
(
fn_data
)
fn_comp
<-
file.path
(
wd
,
paste
(
basename
(
fn_data
),
".comp.csv"
,
sep
=
''
))
n_cmpd
<-
gen_comp_list
(
fn_cmpdl
,
fn_comp
)
gen_cmpdl_and_load
<-
function
(
wd
,
fn_cmpdl
)
{
fn_comp
<-
get_cmpd_l_fn
(
wd
)
n_cmpd
<-
gen_cmpd_l
(
fn_cmpdl
,
fn_comp
)
RMassBank
::
loadList
(
fn_comp
)
list
(
fn_cmpdl
=
fn_comp
,
n
=
n_cmpd
)
}
...
...
@@ -131,17 +183,50 @@ gen_cmpdl_and_load <- function(fn_data,wd,fn_cmpdl) {
##'
##' @title Generate and Load the RMassBank Settings File
##' @param fn_data The mzML filename.
##' @param n_cmpd Number of compounds.
##' @param wd Directory under which results are archived.
##' @param n_cmpd Number of compounds.
##' @return File path of the file table.
##' @author Todor Kondić
gen_file_table
<-
function
(
fn_data
,
n_cmpd
,
wd
)
{
wd
<-
normalizePath
(
wd
)
fn_data
<-
normalizePath
(
fn_data
)
df_table
<-
data.frame
(
Files
=
rep
(
fn_data
,
n_cmpd
),
ID
=
1
:
n_cmpd
)
fn_table
<-
file.path
(
wd
,
paste
(
"fn-table."
,
basename
(
fn_data
),
".csv"
,
sep
=
''
))
write.csv
(
x
=
df_table
,
file
=
fn_table
,
row.names
=
F
)
fn_table
gen_ftable
<-
function
(
fn_data
,
wd
,
n_cmpd
)
{
f
<-
Vectorize
(
function
(
fn_data
,
wd
)
{
df_table
<-
data.frame
(
Files
=
rep
(
fn_data
,
n_cmpd
),
ID
=
1
:
n_cmpd
)
fn_table
<-
get_ftable_fn
(
wd
)
write.csv
(
x
=
df_table
,
file
=
fn_table
,
row.names
=
F
)
fn_table
},
vectorize.args
=
c
(
"fn_data"
,
"wd"
))
f
(
fn_data
,
wd
)
}
gen_fn_stgs
<-
function
(
fn_inp
,
fn
)
{
f
<-
Vectorize
(
function
(
fn_inp
,
fn
)
{
stgs
<-
yaml
::
yaml.load_file
(
fn_inp
)
mk_sett_file
(
stgs
,
fn
)
fn
},
vectorize.args
=
c
(
"fn_inp"
,
"fn"
))
f
(
fn_inp
,
fn
)
}
conf
<-
function
(
fn_data
,
fn_cmpd_l
,
dest
)
{
no_drama_mkdir
(
dest
)
wd
<-
fn_data2wd
(
fn_data
,
dest
)
no_drama_mkdir
(
wd
)
fn_inp_stgs
<-
get_inp_stgs_fn
(
fn_data
)
fn_stgs
<-
get_stgs_fn
(
wd
)
fn_out_cmpd_l
<-
get_cmpd_l_fn
(
wd
)
gen_fn_stgs
(
fn_inp_stgs
,
fn_stgs
)
n_cmpd
<-
gen_cmpd_l
(
fn_cmpd_l
,
fn_out_cmpd_l
)
gen_ftable
(
fn_data
,
wd
,
n_cmpd
)
}
reconf
<-
function
(
wd
)
{
## Load the settings.
fn_stgs
<-
get_stgs_fn
(
wd
)
RMassBank
::
loadRmbSettings
(
fn_stgs
)
## Load the compound list.
fn_cmpd_l
<-
get_cmpd_l_fn
(
wd
)
RMassBank
::
loadList
(
fn_cmpd_l
)
}
##' Wrapper for a single prescreening call. Produces output in the
...
...
@@ -160,12 +245,11 @@ gen_file_table <- function(fn_data,n_cmpd,wd) {
##' @export
presc.single
<-
function
(
fn_data
,
stgs_alist
,
wd
,
mode
,
fn_cmpd_l
,
ppm_lim_fine
=
10
,
EIC_limit
=
0.001
)
{
no_drama_mkdir
(
wd
)
wd
<-
normalizePath
(
wd
)
gen_stgs_and_load
(
fn_data
,
stgs_alist
,
wd
)
gen_stgs_and_load
(
stgs_alist
,
wd
)
## Generate and load the compound list.
x
<-
gen_cmpdl_and_load
(
fn_data
,
wd
,
fn_cmpd_l
)
fn_c
o
mp
<-
x
$
fn_cmpdl
x
<-
gen_cmpdl_and_load
(
wd
,
fn_cmpd_l
)
fn_cmp
d_l
<-
x
$
fn_cmpdl
n_cmpd
<-
x
$
n
## Generate file table.
...
...
@@ -173,7 +257,7 @@ presc.single <- function(fn_data,stgs_alist,wd,mode,fn_cmpd_l,ppm_lim_fine=10,EI
#curd <- setwd(wd)
res
<-
RMB_EIC_prescreen_df
(
wd
=
wd
,
RMB_mode
=
mode
,
FileList
=
fn_table
,
cmpd_list
=
fn_c
o
mp
,
cmpd_list
=
fn_cmp
d_l
,
ppm_limit_fine
=
ppm_lim_fine
,
EIC_limit
=
EIC_limit
)
#setwd(curd)
...
...
@@ -181,146 +265,9 @@ presc.single <- function(fn_data,stgs_alist,wd,mode,fn_cmpd_l,ppm_lim_fine=10,EI
}
##' Runs a compound mixture workflow on a single mzML file.
##'
##' @title RMassBank Spectral Workflow on a Single Compound Mixture
##' @param fn_data A mzML data file.
##' @param stgs_alist RMassBank settings. It can either be a named
##' list of settings, or a filename of a YAML file.
##' @param wd The name of the work directory.
##' @param fn_cmpd_list The file name of he compound list
##' corresponding to `fn_data`.
##' @param mode Modes as described in the standard workflow vignette
##' of RMassBank.
##' @param readMethod Default read method is "mzR". Consult the
##' documentation of `msmsRead` for details.
##' @param archdir The directory to store R objects created during
##' workflow execution.
##' @param lastStep The last step in the workflow. Default is eight.
##' @return MsmsWorkspace object.
##' @author Todor Kondić
sw.single
<-
function
(
fn_data
,
stgs_alist
,
wd
,
fn_cmpd_list
,
mode
,
readMethod
=
"mzR"
,
archdir
=
"archive"
,
lastStep
=
8
)
{
## Generate settings file and load.
no_drama_mkdir
(
wd
)
wd
<-
normalizePath
(
wd
)
gen_stgs_and_load
(
fn_data
,
stgs_alist
,
wd
)
## Generate and load the compound list.
x
<-
gen_cmpdl_and_load
(
fn_data
,
wd
,
fn_cmpd_list
)
fn_comp
<-
x
$
fn_cmpdl
n_cmpd
<-
x
$
n
## Generate file table.
fn_table
<-
gen_file_table
(
fn_data
,
n_cmpd
,
wd
)
## Make empty workspace.
w
<-
RMassBank
::
newMsmsWorkspace
()
## Run the workflow.
message
(
paste
(
"Reading in file:"
,
fn_data
))
w
<-
RMassBank
::
msmsRead
(
w
,
filetable
=
fn_table
,
readMethod
=
"mzR"
,
mode
=
mode
)
archdir
<-
file.path
(
wd
,
archdir
)
no_drama_mkdir
(
archdir
)
fn_arch
<-
file.path
(
archdir
,
paste
(
basename
(
fn_data
),
".archive"
,
sep
=
''
))
RMassBank
::
msmsWorkflow
(
w
,
mode
=
mode
,
steps
=
2
:
lastStep
,
archivename
=
fn_arch
)
}
##' Runs the first step of the compound mixture workflow on a single mzML file.
##'
##' @title RMassBank Spectral Workflow on a Single Compound Mixture (step 1)
##' @param fn_data A mzML data file.
##' @param stgs_alist RMassBank settings. It can either be a named
##' list of settings, or a filename of a YAML file.
##' @param wd The name of the work directory.
##' @param fn_cmpd_list The file name of he compound list
##' corresponding to `fn_data`.
##' @param mode Modes as described in the standard workflow vignette
##' of RMassBank.
##' @param readMethod Default read method is "mzR". Consult the
##' documentation of `msmsRead` for details.
##' @return MsmsWorkspace object.
##' @author Todor Kondić
sw.single.1
<-
function
(
fn_data
,
stgs_alist
,
wd
,
fn_cmpd_list
,
mode
,
readMethod
=
"mzR"
)
{
## Generate settings file and load.
no_drama_mkdir
(
wd
)
wd
<-
normalizePath
(
wd
)
gen_stgs_and_load
(
fn_data
,
stgs_alist
,
wd
)
## Generate and load the compound list.
x
<-
gen_cmpdl_and_load
(
fn_data
,
wd
,
fn_cmpd_list
)
fn_comp
<-
x
$
fn_cmpdl
n_cmpd
<-
x
$
n
## Generate file table.
fn_table
<-
gen_file_table
(
fn_data
,
n_cmpd
,
wd
)
## Make empty workspace.
w
<-
RMassBank
::
newMsmsWorkspace
()
## Run the workflow.
message
(
paste
(
"Reading in file:"
,
fn_data
))
RMassBank
::
msmsRead
(
w
,
filetable
=
fn_table
,
readMethod
=
"mzR"
,
mode
=
mode
)}
##' Runs steps after sw.single.1 of compound mixture workflow on a
##' single mzML file.
##'
##' @title RMassBank Spectral Workflow on a Single Compound Mixture (after step 1)
##' @param w The msmsWorkspace object that was processed by
##' sw.single.1.
##' @param wd The current working dir.
##' @param archdir The directory to store R objects created during
##' workflow execution.
##' @param lastStep The last step in the workflow. Default is eight.
##' @return MsmsWorkspace object.
##' @author Todor Kondić
sw.single.next
<-
function
(
w
,
wd
,
archdir
=
"archive"
,
lastStep
=
8
)
{
archdir
<-
file.path
(
wd
,
archdir
)
no_drama_mkdir
(
archdir
)
fn_arch
<-
file.path
(
archdir
,
paste
(
wd
,
".archive"
,
sep
=
''
))
RMassBank
::
loadRmbSettings
(
file.path
(
wd
,
basename
(
wd
),
".mzML.ini"
))
RMassBank
::
loadList
(
file.path
(
wd
,
basename
(
wd
),
".mzML.comp.csv"
))
RMassBank
::
msmsWorkflow
(
w
,
mode
=
mode
,
steps
=
2
:
lastStep
,
archivename
=
fn_arch
)
}
##' Prepare single mbWorkspace object based on the workspace, the
##' infolist name and RMassBank settings.
##'
##'
##' @title Prepare Single mbWorkspace object
##' @param w MsmsWorkspace object.
##' @param fn_info Filename of the infolist to be generated.
##' @param fn_stgs Filename of the RMassBank settings.
##' @return A mbWorkspace object.
##' @author Todor Kondić
mb.prep.single
<-
function
(
w
,
fn_info
,
fn_stgs
)
{
RMassBank
::
loadRmbSettings
(
fn_stgs
)
mb
<-
RMassBank
::
newMbWorkspace
(
w
)
RMassBank
::
resetInfolists
(
mb
)
RMassBank
::
mbWorkflow
(
mb
,
infolist_path
=
fn_info
)
}
##' Vectorize mb.prep function.
##'
##'
##' @title Vectorized mb.prep function.
##' @param w A sequence of msmsWorkspaces.
##' @param fn_info A sequence of infolist filenams to be generated.
##' @param fn_stgs A sequence of settings associated with each
##' msmsWorkspace object.
##' @return A list of mbWorkspaces.
##' @author Todor Kondić
mb.prep.v
<-
function
(
w
,
fn_info
,
fn_stgs
)
{
f
<-
Vectorize
(
mb.prep.single
,
vectorize.args
=
c
(
"w"
,
"fn_info"
,
"fn_stgs"
),
SIMPLIFY
=
F
)
res
<-
f
(
w
,
fn_info
,
fn_stgs
)
names
(
res
)
<-
names
(
w
)
res
}
##' Performs a single MassBank workflow after preparation.
##'
##'
##' @title Single MassBank workflow.
##' @param mb A mbWorkspace object.
##' @param infodir Directory containing the infolist.
...
...
@@ -360,54 +307,6 @@ presc.v<-function(fn_data,fn_cmpd_l,mode,ppm_lim_fine=10,EIC_limit=0.001) {
f
(
fn_data
,
stgs_alist
,
wd
,
mode
=
mode
,
fn_cmpd_l
=
fn_cmpd_l
,
ppm_lim_fine
=
ppm_lim_fine
,
EIC_limit
=
EIC_limit
)
}
##' Interface to vectorised spectral workflow.
##'
##'
##' @title Vectorised Spectral Workflow.
##' @param fn_data A sequence of mzML input files.
##' @param stgs_alist A list of named list of settings, or a list of
##' filenames of YAML files containing the settings.
##' @param wd The list of working directories.
##' @param fn_cmpd_list The compound list characterising the mixtures.
##' @param mode Same as in msmsRead.
##' @param readMethod Same as in msmsRead.
##' @param archdir Name of the archive.
##' @param lastStep The last step of the spectral workflow.
##' @param combine If TRUE, use combineMultiplicies to merge
##' workspaces corresponding to different collisional energies.
##' @return A named list of spectral workspaces. The names are derived
##' from data filenames.
##' @author Todor Kondić
v
<-
function
(
fn_data
,
stgs_alist
,
wd
,
fn_cmpd_list
,
mode
,
readMethod
=
"mzR"
,
archdir
=
"archive"
,
lastStep
=
8
,
combine
=
F
)
{
idir
<-
function
(
n
)
file.path
(
"."
,
stripext
(
n
))
f
<-
Vectorize
(
sw.single
,
vectorize.args
=
c
(
"wd"
,
"fn_data"
,
"stgs_alist"
),
SIMPLIFY
=
F
)
rootdir
<-
getwd
()
if
(
combine
)
{
z
<-
f
(
fn_data
,
stgs_alist
,
wd
,
fn_cmpd_list
,
mode
,
readMethod
=
readMethod
,
archdir
=
archdir
,
lastStep
=
7
)
names
(
z
)
<-
basename
(
fn_data
)
zz
<-
RMassBank
::
combineMultiplicities
(
z
)
combdir
<-
"combined"
archdir
<-
file.path
(
rootdir
,
combdir
,
archdir
)
no_drama_mkdir
(
combdir
)
no_drama_mkdir
(
archdir
)
fn_arch
<-
file.path
(
archdir
,
"archive"
)
fn_comb_stgs
<-
file.path
(
rootdir
,
combdir
,
paste
(
combdir
,
".mzML.ini"
,
sep
=
''
))
ddirs
<-
sapply
(
names
(
z
),
idir
)
stgs_fls
<-
sapply
(
ddirs
,
function
(
x
)
file.path
(
x
,
paste
(
x
,
".mzML.ini"
,
sep
=
''
)))
mk_combine_file
(
stgs_fls
,
fn_comb_stgs
)
res
<-
list
(
RMassBank
::
msmsWorkflow
(
zz
,
steps
=
8
,
mode
=
mode
,
archivename
=
fn_arch
))
names
(
res
)
<-
paste
(
combdir
,
".mzML"
,
sep
=
''
)
#Clearly a hack.
res
}
else
{
z
<-
f
(
fn_data
,
stgs_alist
,
wd
,
fn_cmpd_list
,
mode
,
readMethod
=
readMethod
,
archdir
=
archdir
,
lastStep
=
lastStep
)
names
(
z
)
<-
basename
(
fn_data
)
z
}
}
##' Prescreens. Writes data out. Adapted from ReSOLUTION
##'
##'
...
...
@@ -605,62 +504,6 @@ presc.plot <- function(wd,out="prescreen.pdf",pal="Dark2",cex=0.75,digits=6) {
}
dev.off
()
}
##' Interface to parallel spectral workflow.
##'
##'
##' @title Parallel Spectral Workflow.
##' @param cl Cluster.
##' @param fn_data A sequence of mzML input files.
##' @param stgs_alist A list of named list of settings, or a list of
##' filenames of YAML files containing the settings.
##' @param wd The list of working directories.
##' @param fn_cmpd_list The compound list characterising the mixtures.
##' @param mode Same as in msmsRead.
##' @param readMethod Same as in msmsRead.
##' @param archdir Name of the archive.
##' @param lastStep The last step in spectral workflow.
##' @param combine If TRUE, use combineMultiplicies to merge
##' workspaces corresponding to different collisional energies.
##' @param combdest Combine destination directory.
##' @return A named list of spectral workspaces. The names are derived
##' from data filenames.
##' @author Todor Kondić
p.sw
<-
function
(
cl
,
fn_data
,
stgs_alist
,
wd
,
fn_cmpd_list
,
mode
,
readMethod
=
"mzR"
,
archdir
=
"archive"
,
lastStep
=
8
,
combine
=
F
,
combdest
=
"combined"
)
{
idir
<-
function
(
n
)
file.path
(
"."
,
stripext
(
n
))
fnocomb
<-
function
(
fn
,
stgs
,
wd
)
{
sw.single
(
fn
,
stgs
,
wd
,
fn_cmpd_list
,
mode
,
readMethod
,
archdir
,
lastStep
=
lastStep
)
}
fcomb
<-
function
(
fn
,
stgs
,
wd
)
sw.single
(
fn
,
stgs
,
wd
,
fn_cmpd_list
,
mode
,
readMethod
,
archdir
,
lastStep
=
7
)
if
(
combine
)
{
z
<-
parallel
::
clusterMap
(
cl
,
fcomb
,
fn_data
,
stgs_alist
,
wd
)
names
(
z
)
<-
basename
(
fn_data
)
zz
<-
RMassBank
::
combineMultiplicities
(
z
)
archdir
<-
file.path
(
combdest
,
archdir
)
no_drama_mkdir
(
combdest
)
no_drama_mkdir
(
archdir
)
fn_arch
<-
file.path
(
archdir
,
"archive"
)
fn_comb_stgs
<-
file.path
(
combdest
,
paste
(
basename
(
combdest
),
".mzML.ini"
,
sep
=
''
))
## ddirs <- sapply(fn_data,idir)
## stgs_fls <- sapply(ddirs,function(x) file.path(x,paste(basename(x),".mzML.ini",sep='')))
mk_combine_file
(
stgs_alist
,
fn_comb_stgs
)
anycpdlist
<-
file.path
(
wd
[[
1
]],
paste
(
basename
(
wd
[[
1
]]),
".mzML.comp.csv"
,
sep
=
''
))
RMassBank
::
loadRmbSettings
(
fn_comb_stgs
)
RMassBank
::
loadList
(
anycpdlist
)
res
<-
list
(
RMassBank
::
msmsWorkflow
(
zz
,
steps
=
8
,
mode
=
mode
,
archivename
=
fn_arch
))
names
(
res
)
<-
paste
(
basename
(
combdest
),
".ini"
,
sep
=
''
)
#Clearly a hack.
res
}
else
{
z
<-
parallel
::
clusterMap
(
cl
,
fnocomb
,
fn_data
,
stgs_alist
,
wd
)
names
(
z
)
<-
basename
(
fn_data
)
z
}
}
##' Interface to vectorised Mass Bank workflow.
##'
...
...
This diff is collapsed.
Click to expand it.
R/run.R
+
58
−
22
View file @
b97e2d51
...
...
@@ -51,30 +51,61 @@ presc.do<-function(fn_data,fn_cmpd_list,mode,proc=F) {
##'
##' @title Perform MassBank Workflow on Multiple Compound Mixtures
##' @param fn_data List of mzML data filenames to be processed.
##' @param fn_cmpd_l
ist
Compound list.
##' @param fn_cmpd_l Compound list.
##' @param mode as in msmsRead.
##' @param dest The destination data directory.
##' @param combine If TRUE, use combineMultiplicies to merge
##' workspaces corresponding to different collisional energies.
##' @param proc Split work between this amount of processes. If FALSE
##' (or, 1), run sequential.
##' @param split This is the last step before combine
##' @return A named list of msmsWorkspace objects.
##' @author Todor Kondić
##' @export
sw.do
<-
function
(
fn_data
,
fn_cmpd_list
,
mode
,
dest
=
"."
,
combine
=
F
,
proc
=
F
)
{
dest
<-
normalizePath
(
dest
)
no_drama_mkdir
(
dest
)
fn_data
<-
normalizePath
(
fn_data
)
wdirs
<-
sapply
(
basename
(
fn_data
),
function
(
nm
)
file.path
(
dest
,
stripext
(
nm
)))
stgs
<-
sapply
(
fn_data
,
function
(
nm
)
file.path
(
paste
(
stripext
(
nm
),
"ini"
,
sep
=
'.'
)))
if
(
proc
)
{
cl
<-
parallel
::
makeCluster
(
proc
)
parallel
::
clusterEvalQ
(
cl
,
library
(
"rmbmix"
))
p.sw
(
cl
,
fn_data
,
stgs
,
wdirs
,
fn_cmpd_list
,
mode
,
combine
=
combine
)
}
else
{
v
(
fn_data
,
stgs
,
wdirs
,
fn_cmpd_list
,
mode
,
combine
=
combine
)
sw.do
<-
function
(
fn_data
,
fn_cmpd_l
,
mode
,
dest
=
"."
,
combine
=
F
,
proc
=
F
,
split
=
3
)
{
conf
(
fn_data
,
fn_cmpd_l
,
dest
)
fread
<-
function
(
fn_data
)
{
wd
<-
fn_data2wd
(
fn_data
,
dest
)
reconf
(
wd
)
w
<-
RMassBank
::
newMsmsWorkspace
()
RMassBank
::
msmsRead
(
w
=
w
,
filetable
=
get_ftable_fn
(
wd
),
mode
=
mode
,
readMethod
=
"mzR"
)
}
fwork
<-
Vectorize
(
function
(
w
,
wd
,
steps
)
{
archdir
<-
file.path
(
wd
,
"archive"
)
no_drama_mkdir
(
archdir
)
fn_arch
<-
file.path
(
archdir
,
"archive"
)
reconf
(
wd
)
RMassBank
::
msmsWorkflow
(
w
=
w
,
mode
=
mode
,
steps
=
steps
,
archivename
=
fn_arch
)
},
vectorize.args
=
c
(
"w"
,
"wd"
),
SIMPLIFY
=
F
)
w
<-
if
(
proc
)
{
cl
=
parallel
::
makeCluster
(
proc
)
parallel
::
clusterEvalQ
(
cl
,
library
(
rmbmix
))
parallel
::
clusterMap
(
cl
,
fread
,
fn_data
)
}
else
{
lapply
(
fn_data
,
fread
)
}
wd
<-
fn_data2wd
(
fn_data
,
dest
)
w
<-
fwork
(
w
,
wd
,
steps
=
2
:
split
)
if
(
combine
)
{
## Combined workflow is not based on a single file, but the
## functions that generate config are. Therefore, lets create
## a fake filename.
fakefile
<-
"combine.mzML"
cwd
<-
fn_data2wd
(
fakefile
,
dest
)
xx
<-
get_stgs_fn
(
wd
[[
1
]])
file.copy
(
xx
,
"combine.ini"
,
overwrite
=
T
)
# mk_combine_file(get_stgs_fn(wd),"combine.ini")
conf
(
fakefile
,
fn_cmpd_l
,
dest
)
reconf
(
cwd
)
w
<-
list
(
RMassBank
::
combineMultiplicities
(
w
))
wd
<-
list
(
cwd
)
}
w
<-
fwork
(
w
,
wd
,
steps
=
(
split
+1
)
:
8
)
names
(
w
)
<-
wd
}
##' Creates and prepares mbWorkspace objects before the full workflow
...
...
@@ -85,17 +116,22 @@ sw.do<-function(fn_data,fn_cmpd_list,mode,dest=".",combine=F,proc=F) {
##'
##' @title Prepare mbWorkspace objects
##' @param w A list of spectral workspace inputs.
##' @param rdir Data root.
##' @param proc Split work between this amount of processes. If FALSE
##' @return Named list of prepared mbWorkspace objects.
##' @author Todor Kondić
##' @export
mb.prep
<-
function
(
w
,
rdir
=
"."
)
{
idir
<-
function
(
n
)
file.path
(
rdir
,
stripext
(
n
))
sapply
(
names
(
w
),
function
(
n
)
no_drama_mkdir
(
file.path
(
idir
(
n
),
"info"
)))
fn_info
<-
sapply
(
names
(
w
),
function
(
n
)
file.path
(
idir
(
n
),
"info"
,
attch
(
n
,
'.info.csv'
)))
fn_stgs
<-
sapply
(
names
(
w
),
function
(
n
)
file.path
(
idir
(
n
),
attch
(
n
,
'.ini'
)))
mb.prep.v
(
w
,
fn_info
,
fn_stgs
)
mb.prep
<-
function
(
w
)
{
wd
<-
names
(
w
)
fwork
<-
Vectorize
(
function
(
w
,
wd
)
{
reconf
(
wd
)
idir
<-
gen_info_dir
(
wd
)
mb
<-
RMassBank
::
newMbWorkspace
(
w
)
RMassBank
::
resetInfolists
(
mb
)
RMassBank
::
mbWorkflow
(
mb
,
infolist_path
=
get_info_fn
(
wd
))
},
vectorize.args
=
c
(
"w"
,
"wd"
))
mb
<-
fwork
(
w
,
wd
)
names
(
mb
)
<-
wd
mb
}
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment