From ba39c9204b2f9c559cfdb0c25f167ec87b1656a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Todor=20Kondi=C4=87?= <todor.kondic@uni.lu>
Date: Mon, 17 Jun 2019 15:32:43 +0200
Subject: [PATCH] Rework combine procedure

* R/mix.R(single.sw): Changed lastStep to 7. Need to revert in the
  next commit.
  (v,p.sw): Added combine capability.

* R/run.R(sw.do): Extended to support combine.

* man(p.sw.Rd,sw.do.Rd,v.Rd): Updated documentation.
---
 R/mix.R      | 51 ++++++++++++++++++++++++++++++++++++++-------------
 R/run.R      |  6 ++++--
 man/p.sw.Rd  |  5 ++++-
 man/sw.do.Rd |  5 ++++-
 man/v.Rd     |  5 ++++-
 5 files changed, 54 insertions(+), 18 deletions(-)

diff --git a/R/mix.R b/R/mix.R
index 1b91299..6f652a1 100644
--- a/R/mix.R
+++ b/R/mix.R
@@ -117,7 +117,7 @@ single.sw<-function(fn_data,stgs_alist,wd,fn_cmpd_list,mode,readMethod="mzR",arc
     archdir<-file.path(wd,archdir)
     if (!dir.exists(archdir)) dir.create(archdir)
     fn_arch<-file.path(archdir,paste(fn_data,".archive",sep=''))
-    RMassBank::msmsWorkflow(w, mode=mode, steps=2:lastStep,archivename=fn_arch)
+    RMassBank::msmsWorkflow(w, mode=mode, steps=2:7,archivename=fn_arch)
 }
 
 
@@ -194,15 +194,26 @@ mb.single<-function(mb,infodir,fn_stgs) {
 ##' @param mode Same as in msmsRead.
 ##' @param readMethod Same as in msmsRead.
 ##' @param archdir Name of the archive.
-##' @param lastStep The last step of the spectral workflow. 
+##' @param lastStep The last step of the spectral workflow.
+##' @param combine If TRUE, use combineMultiplicies to merge
+##'     workspaces corresponding to different collisional energies.
 ##' @return A named list of spectral workspaces. The names are derived
 ##'     from data filenames.
 ##' @author Todor Kondić
-v<-function(fn_data,stgs_alist,wd,fn_cmpd_list,mode,readMethod="mzR",archdir="archive",lastStep=8) {
+v<-function(fn_data,stgs_alist,wd,fn_cmpd_list,mode,readMethod="mzR",archdir="archive",lastStep=8,combine=F) {
     f<-Vectorize(single.sw,vectorize.args=c("wd","fn_data","stgs_alist"),SIMPLIFY=F)
-    x<-f(fn_data,stgs_alist,wd,fn_cmpd_list,mode,readMethod=readMethod,archdir=archdir)
-    names(x)<-basename(fn_data)
-    x}
+    if (combine) {
+        z<-f(fn_data,stgs_alist,wd,fn_cmpd_list,mode,readMethod=readMethod,archdir=archdir,lastStep=7)
+        names(z)<-basename(fn_data)
+        zz<-RMassBank::combineMultiplicities(z)
+        fn_arch<-file.path(archdir,paste(fn_data,".archive",sep=''))
+        RMassBank::msmsWorkflow(zz, steps=8, mode=mode, archivename = fn_arch)
+    } else {
+        z<-f(fn_data,stgs_alist,wd,fn_cmpd_list,mode,readMethod=readMethod,archdir=archdir,lastStep=lastStep)
+        names(z)<-basename(fn_data)
+        z
+    }
+}
 
 ##' Interface to parallel spectral workflow.
 ##'
@@ -217,18 +228,32 @@ v<-function(fn_data,stgs_alist,wd,fn_cmpd_list,mode,readMethod="mzR",archdir="ar
 ##' @param readMethod Same as in msmsRead.
 ##' @param archdir Name of the archive.
 ##' @param lastStep The last step in spectral workflow.
+##' @param combine If TRUE, use combineMultiplicies to merge
+##'     workspaces corresponding to different collisional energies.
 ##' @param cl Cluster.
 ##' @return A named list of spectral workspaces. The names are derived
 ##'     from data filenames.
 ##' @author Todor Kondić
-p.sw<-function(fn_data,stgs_alist,wd,fn_cmpd_list,mode,readMethod="mzR",archdir="archive",lastStep=8,cl=NULL) {
-    f<-function(fn,stgs,wd) {
-        single.sw(fn,stgs,wd,fn_cmpd_list,mode,readMethod,archdir,lastStep)
+p.sw<-function(fn_data,stgs_alist,wd,fn_cmpd_list,mode,readMethod="mzR",archdir="archive",lastStep=8,combine=F,cl=NULL) {
+    fnocomb<-function(fn,stgs,wd) {
+        single.sw(fn,stgs,wd,fn_cmpd_list,mode,readMethod,archdir,lastStep=lastStep)
     }
-        
-    x<-parallel::clusterMap(cl,f,fn_data,stgs_alist,wd)
-    names(x)<-basename(fn_data)
-    x}
+    fcomb<-function(fn,stgs,wd) {
+        single.sw(fn,stgs,wd,fn_cmpd_list,mode,readMethod,archdir,lastStep=7)
+    }
+
+    if (combine) {
+        z<-parallel::clusterMap(cl,fcomb,fn_data,stgs_alist,wd)
+        names(z)<-basename(fn_data)
+        zz<-RMassBank::combineMultiplicities(z)
+        fn_arch<-file.path(archdir,paste(fn_data,".archive",sep=''))
+        RMassBank::msmsWorkflow(zz, steps=8, mode=mode, archivename = fn_arch)
+    } else {
+        z<-parallel::clusterMap(cl,fnocomb,fn_data,stgs_alist,wd)
+        names(z)<-basename(fn_data)
+        z
+    }
+}
 
     
 ##' Interface to vectorised Mass Bank workflow.
diff --git a/R/run.R b/R/run.R
index b503cc5..755d4e7 100644
--- a/R/run.R
+++ b/R/run.R
@@ -29,16 +29,18 @@ attch<-function(...) paste(...,sep='')
 ##' @param fn_cmpd_list Compound list.
 ##' @param mode as in msmsRead.
 ##' @param rdir The root data directory.
+##' @param combine If TRUE, use combineMultiplicies to merge
+##'     workspaces corresponding to different collisional energies.
 ##' @return A named list of msmsWorkspace objects.
 ##' @author Todor Kondić
 ##' @export
-sw.do<-function(fn_data,fn_cmpd_list,mode,rdir=".") {
+sw.do<-function(fn_data,fn_cmpd_list,mode,rdir=".",combine=F) {
  
     no_drama_mkdir(rdir)
     wdirs<-sapply(basename(fn_data),function(nm) file.path(rdir,stripext(nm)))
     sapply(wdirs,no_drama_mkdir)
     stgs<-sapply(basename(wdirs),function (nm) paste(nm,"yml",sep='.'))
-    v(fn_data,stgs,wdirs,fn_cmpd_list,mode)
+    v(fn_data,stgs,wdirs,fn_cmpd_list,mode,combine=combine)
 }
 
 ##' Creates and prepares mbWorkspace objects before the full workflow
diff --git a/man/p.sw.Rd b/man/p.sw.Rd
index c5608c9..3f64c0b 100644
--- a/man/p.sw.Rd
+++ b/man/p.sw.Rd
@@ -5,7 +5,7 @@
 \title{Parallel Spectral Workflow.}
 \usage{
 p.sw(fn_data, stgs_alist, wd, fn_cmpd_list, mode, readMethod = "mzR",
-  archdir = "archive", lastStep = 8, cl = NULL)
+  archdir = "archive", lastStep = 8, combine = F, cl = NULL)
 }
 \arguments{
 \item{fn_data}{A sequence of mzML input files.}
@@ -25,6 +25,9 @@ filenames of YAML files containing the settings.}
 
 \item{lastStep}{The last step in spectral workflow.}
 
+\item{combine}{If TRUE, use combineMultiplicies to merge
+workspaces corresponding to different collisional energies.}
+
 \item{cl}{Cluster.}
 }
 \value{
diff --git a/man/sw.do.Rd b/man/sw.do.Rd
index 28cf794..bb2a049 100644
--- a/man/sw.do.Rd
+++ b/man/sw.do.Rd
@@ -4,7 +4,7 @@
 \alias{sw.do}
 \title{Perform MassBank Workflow on Multiple Compound Mixtures}
 \usage{
-sw.do(fn_data, fn_cmpd_list, mode, rdir = ".")
+sw.do(fn_data, fn_cmpd_list, mode, rdir = ".", combine = F)
 }
 \arguments{
 \item{fn_data}{List of mzML data filenames to be processed.}
@@ -14,6 +14,9 @@ sw.do(fn_data, fn_cmpd_list, mode, rdir = ".")
 \item{mode}{as in msmsRead.}
 
 \item{rdir}{The root data directory.}
+
+\item{combine}{If TRUE, use combineMultiplicies to merge
+workspaces corresponding to different collisional energies.}
 }
 \value{
 A named list of msmsWorkspace objects.
diff --git a/man/v.Rd b/man/v.Rd
index 29e840e..8038344 100644
--- a/man/v.Rd
+++ b/man/v.Rd
@@ -5,7 +5,7 @@
 \title{Vectorised Spectral Workflow.}
 \usage{
 v(fn_data, stgs_alist, wd, fn_cmpd_list, mode, readMethod = "mzR",
-  archdir = "archive", lastStep = 8)
+  archdir = "archive", lastStep = 8, combine = F)
 }
 \arguments{
 \item{fn_data}{A sequence of mzML input files.}
@@ -24,6 +24,9 @@ filenames of YAML files containing the settings.}
 \item{archdir}{Name of the archive.}
 
 \item{lastStep}{The last step of the spectral workflow.}
+
+\item{combine}{If TRUE, use combineMultiplicies to merge
+workspaces corresponding to different collisional energies.}
 }
 \value{
 A named list of spectral workspaces. The names are derived
-- 
GitLab