Skip to content
Snippets Groups Projects
mix.R 7.58 KiB
Newer Older
Todor Kondic's avatar
Todor Kondic committed
stripext<-function(fn) {
    bits<-strsplit(fn,split="\\.")[[1]]
    if (length(bits)> 1) paste(head(bits,-1),collapse=".") else fn}

##' Create directories without drama.
##'
##' 
##' @title Create directories without drama
##' @param path Name of the directory.
##' @return The character string containing the input argument `path`.
##' @author Todor Kondić
no_drama_mkdir<-function(path) {

    if (! dir.exists(path)) dir.create(path)
    path
}

##' Produce the Rmb Settings file
##'
##' Produce the Rmb Settings file based on the customisation file in
##' YAML format.
##'
##' @title Generate RMassBank settings file.
##' @param sett_alist The named list of settings that are different
##'     from the RMassBank defaults.
##' @param file The name of the YAML specification that will be merged
##'     with the template Rmb settings file.
##' @return NULL
##' @author Todor Kondić
mk_sett_file<-function(sett_alist,file) {
    require(yaml)
    tmp<-tempfile()
    RMassBank::RmbSettingsTemplate(tmp)
    sett<-yaml.load_file(tmp)
    for (nm in names(sett_alist)) {
        sett[[nm]]<-sett_alist[[nm]]
    }
    write_yaml(x=sett,file=file)
    NULL
}

##' Generate the RMassBank compound list from the input compound list
##' in CSV file src_fn. The input compound list format is either a
##' Chemical Dashboard csv file with, at least, PREFERRED_ SMILES
##' columns _filled_ out, or just an ordinary CSV file with columns
##' SMILES and Names filled. Argument dest_fn is the destination
##' filename. Returns the number of compounds.
##'
##' 
##' @title Generate Compound List File
##' @param src_fn The input compound list CSV filename.
##' @param dest_fn The resulting compound list CSV filename.
##' @return Number of compounds.
##' @author Todor Kondić
gen_comp_list<-function(src_fn,dest_fn) {
    df<-read.csv(src_fn)
    ## Names
    nms<-if ("PREFERRED_NAME" %in% names(df)) df$PREFERRED_NAME else df$Name
    if (is.null(nms)) stop("Unable to read compound names from the input compound list.")

    ## SMILES
    haha<-df$SMILES
    sz<-length(haha)
    
    ## CAS
    casvals<-if ("CASRN" %in% names(df)) df$CASRN else rep(NA,sz)
    if (is.null(haha)) stop("Unable to read SMILES from the input compound list.")

    outdf<-data.frame(ID=1:sz,Name=nms,SMILES=haha,CAS=casvals,RT=rep(NA,sz))
    write.csv(outdf,file=dest_fn,row.names=F,na="")
    length(nms)
}

Todor Kondic's avatar
Todor Kondic committed
##' Runs a compound mixture workflow on a single mzML file.
##' 
##' @title RMassBank Spectral Workflow on a Single Compound Mixture
##' @param fn_data A mzML data file.
##' @param stgs_alist RMassBank settings. It can either be a named
##'     list of settings, or a filename of a YAML file.
##' @param wd The name of the work directory.
##' @param fn_cmpd_list The file name of he compound list
##'     corresponding to `fn_data`.
##' @param mode Modes as described in the standard workflow vignette
##'     of RMassBank.
##' @param readMethod Default read method is "mzR". Consult the
##'     documentation of `msmsRead` for details.
##' @param archdir The directory to store R objects created during
##'     workflow execution.
##' @param lastStep The last step in the workflow. Default is eight.
##' @return MsmsWorkspace object.
##' @author Todor Kondić
single.sw<-function(fn_data,stgs_alist,wd,fn_cmpd_list,mode,readMethod="mzR",archdir="archive",lastStep=8) {
    
    require(RMassBank)
    require(yaml)
    ## Generate settings file and load.
    stgs_alist<-if (is.character(stgs_alist)) yaml.load_file(stgs_alist) else stgs_alist
    sfn<-file.path(wd,paste(fn_data,".ini",sep=''))
    mk_sett_file(stgs_alist,sfn)
    RMassBank::loadRmbSettings(sfn)

    ## Generate and load the compound list.
    fn_comp<-file.path(wd,paste(fn_data,".comp.csv",sep=''))
    n_cmpd<-gen_comp_list(fn_cmpd_list,fn_comp)
    RMassBank::loadList(fn_comp)

    ## Generate file table.
    df_table<-data.frame(Files=rep(fn_data,n_cmpd),ID=1:n_cmpd)
    fn_table<-file.path(wd,paste("fn-table.",fn_data,".csv",sep=''))
    write.csv(x=df_table,file=fn_table,row.names=F)

    ## Make empty workspace.
    w <- newMsmsWorkspace()
    ## Run the workflow.
    message(paste("Reading in file:",fn_data))
    w <-msmsRead(w,filetable=fn_table,readMethod="mzR",mode=mode)
    archdir<-file.path(wd,archdir)
    if (!dir.exists(archdir)) dir.create(archdir)
    fn_arch<-file.path(archdir,paste(fn_data,".archive",sep=''))
    RMassBank::msmsWorkflow(w, mode=mode, steps=2:lastStep,archivename=fn_arch)
}


##' Prepare single mbWorkspace object based on the workspace, the
##' infolist name and RMassBank settings.
##'
##' 
##' @title Prepare Single mbWorkspace object
##' @param w MsmsWorkspace object.
##' @param fn_info Filename of the infolist to be generated.
##' @param fn_stgs Filename of the RMassBank settings.
##' @return A mbWorkspace object.
##' @author Todor Kondić
mb.prep.single<-function(w,fn_info,fn_stgs) {
    RMassBank::loadRmbSettings(fn_stgs)
    mb <- RMassBank::newMbWorkspace(w)
    RMassBank::resetInfolists(mb)
    RMassBank::mbWorkflow(mb,infolist_path=fn_info)
}
##' Vectorize mb.prep function.
##'
##' 
##' @title Vectorized mb.prep function.
##' @param w A sequence of msmsWorkspaces.
##' @param fn_info A sequence of infolist filenams to be generated.
##' @param fn_stgs A sequence of settings associated with each
##'     msmsWorkspace object.
##' @return A list of mbWorkspaces.
##' @author Todor Kondić
mb.prep.v<-function(w,fn_info,fn_stgs) {
    f<-Vectorize(mb.prep.single,vectorize.args=c("w","fn_info","fn_stgs"),SIMPLIFY=F)
    res<-f(w,fn_info,fn_stgs)
    names(res)<-names(w)
    res
}


##' Performs a single MassBank workflow after preparation.
##'
##' 
##' @title Single MassBank workflow.
##' @param mb A mbWorkspace object.
##' @param infodir Directory containing the infolist.
##' @param fn_stgs The settings associated with the mbWorkspace
##'     object.
##' @return A mbWorkflow object.
##' @author Todor Kondić
mb.single<-function(mb,infodir,fn_stgs) {
    RMassBank::loadRmbSettings(fn_stgs)
Todor Kondic's avatar
Todor Kondic committed
    
    mb <- RMassBank::resetInfolists(mb)
    mb <- RMassBank::loadInfolists(mb,infodir)
    ## loadInfolists
    ## addPeaks
Todor Kondic's avatar
Todor Kondic committed
    prevd<-setwd(infodir)
    res<-RMassBank::mbWorkflow(mb,step=1:8)
    setwd(prevd)
    res
Todor Kondic's avatar
Todor Kondic committed




##' Interface to vectorised spectral workflow.
##'
##' @title Vectorised Spectral Workflow.
##' @param fn_data A sequence of mzML input files.
##' @param stgs_alist A list of named list of settings, or a list of
##'     filenames of YAML files containing the settings.
##' @param wd The list of working directories.
##' @param fn_cmpd_list The compound list characterising the mixtures.
##' @param mode Same as in msmsRead.
##' @param readMethod Same as in msmsRead.
##' @param archdir Name of the archive.
##' @return A named list of spectral workspaces. The names are derived
##'     from data filenames.
##' @author Todor Kondić
v<-function(fn_data,stgs_alist,wd,fn_cmpd_list,mode,readMethod="mzR",archdir="archive") {
Todor Kondic's avatar
Todor Kondic committed
    f<-Vectorize(single.sw,vectorize.args=c("wd","fn_data","stgs_alist"),SIMPLIFY=F)
    x<-f(fn_data,stgs_alist,wd,fn_cmpd_list,mode,readMethod=readMethod,archdir=archdir)
    names(x)<-basename(fn_data)
    x}
    
##' Interface to vectorised Mass Bank workflow.
##'
##' @title Vectorised Mass Bank Workflow
##' @param mb List of mass bank workflow objects
##' @param infodir List of subdirs containing info lists.
##' @param fn_stgs List of settings files.
##' @return A named list of mbWorkspace objects. The names are derived
##'     from the input mb sequence.
##' @author Todor Kondić
mb.v<-function(mb,infodir,fn_stgs) {
    f<-Vectorize(mb.single,vectorize.args=c("mb","infodir","fn_stgs"),SIMPLIFY=F)
    x<-f(mb,infodir,fn_stgs)
    names(x)<-names(mb)
    x}