Skip to content
Snippets Groups Projects
api.R 8.42 KiB
Newer Older
Todor Kondic's avatar
Todor Kondic committed
## Copyright (C) 2020 by University of Luxembourg

## Licensed under the Apache License, Version 2.0 (the "License");
## you may not use this file except in compliance with the License.
## You may obtain a copy of the License at

##     http://www.apache.org/licenses/LICENSE-2.0

## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.


Todor Kondic's avatar
Todor Kondic committed
##' @export
    m <- new_state(fn_conf=fn_conf,
                   GUI=F)    
    dir.create(m$conf$project,
               showWarnings = F,
               recursive = T)
Todor Kondic's avatar
Todor Kondic committed
    m <- withr::with_dir(new=conf$project,code = run_in_dir(m))
    return(invisible(m))
Todor Kondic's avatar
Todor Kondic committed

Todor Kondic's avatar
Todor Kondic committed
##' @export
Todor Kondic's avatar
Todor Kondic committed
run_in_dir <- function(m) {
Todor Kondic's avatar
Todor Kondic committed
    m <- mk_tol_funcs(m)
Todor Kondic's avatar
Todor Kondic committed
    m <- load_inputs(m)
Todor Kondic's avatar
Todor Kondic committed
    m <- concurrency(m)
Todor Kondic's avatar
Todor Kondic committed
    m <- mk_comp_tab(m)
Todor Kondic's avatar
Todor Kondic committed
    
Todor Kondic's avatar
Todor Kondic committed
    invisible(m)
Todor Kondic's avatar
Todor Kondic committed
##' @export
load_compound_input <- function(m) {
Todor Kondic's avatar
Todor Kondic committed

    coll <- list()
    fields <- colnames(EMPTY_CMPD_LIST)
    fns <- m$conf$compounds$lists
    for (l in 1:length(fns)) {
        fn <- fns[[l]]
        fnfields <- colnames(fn)
        dt <- file2tab(fn, colClasses=c(ID="character",
                                        SMILES="character",
                                        Formula="character",
                                        Name="character",
                                        RT="numeric",
                                        mz="numeric"))
        verify_cmpd_l(dt=dt,fn=fn)
        nonexist <- setdiff(fnfields,fields)
        coll[[l]] <- if (length(nonexist)==0) dt else dt[,(nonexist) := NULL]
    cmpds <- if (length(fns)>0) rbindlist(l=c(list(EMPTY_CMPD_LIST), coll), use.names = T, fill = T) else EMPTY_CMPD_LIST

    dups <- duplicated(cmpds$ID)
    dups <- dups | duplicated(cmpds$ID,fromLast = T)
    dupIDs <- cmpds$ID[dups]
    dupfns <- cmpds$ORIG[dups]

    msg <- ""
    for (fn in unique(dupfns)) {
        inds <- which(dupfns %in% fn)
        fndupID <- paste(dupIDs[inds], collapse = ',')
        msg <- paste(paste('Duplicate IDs', fndupID,'found in',fn),msg,sep = '\n')
    }

    assert(all(!dups), msg = msg)
    
    cmpds[,("known"):=.(the_ifelse(!is.na(SMILES),"structure",the_ifelse(!is.na(Formula),"formula","mz")))]
    m$input$tab$cmpds <- cmpds
Todor Kondic's avatar
Todor Kondic committed
    m$input$tab$setid <- read_setid(m$conf$compounds$sets,
                                    m$input$tab$cmpds)
Todor Kondic's avatar
Todor Kondic committed
##' @export
load_data_input <- function(m) {
    m$input$tab$mzml <- file2tab(m$conf$data)
    m
}

##' @export
load_inputs <- function(m) {
Todor Kondic's avatar
Todor Kondic committed
    m <- load_compound_input(m)
    m <- load_data_input(m)
    m
}

##' @export
Todor Kondic's avatar
Todor Kondic committed
mk_comp_tab <- function(m) {
    setid <- m$input$tab$setid
Todor Kondic's avatar
Todor Kondic committed
    setkey(setid,set)
Todor Kondic's avatar
Todor Kondic committed
    mzml<- m$input$tab$mzml
    setkey(mzml,set)
    cmpds<-m$input$tab$cmpds
    setkey(cmpds,ID)
Todor Kondic's avatar
Todor Kondic committed
    mzml[,`:=`(wd=sapply(Files,add_wd_to_mzml,m$conf$project))]
    assert(nrow(cmpds)>0,msg="No compound lists have been provided.")
    message("Begin generation of the comprehensive table.")
   
    comp <- cmpds[setid,on="ID"][mzml,.(tag,adduct,ID,RT,set,Name,Files,wd,SMILES,Formula,mz,known),on="set",allow.cartesian=T]
    tab2file(tab=comp,file=paste0("setidmerge",".csv"))
    setkey(comp,known,set,ID)

    ## Known structure.
    ## comp[,`:=`(mz=mapply(calc_mz_from_smiles,SMILES,adduct,ID,USE.NAMES = F))]
    comp[known=="structure",`:=`(mz=calc_mz_from_smiles(SMILES,adduct,ID))]

    ## Known formula.
    comp[known=="formula",`:=`(mz=calc_mz_from_formula(Formula,adduct,ID))]
    setnames(comp,names(COMP_NAME_MAP),
Todor Kondic's avatar
Todor Kondic committed
             function(o) COMP_NAME_MAP[[o]])
    setcolorder(comp,COMP_NAME_FIRST)
Todor Kondic's avatar
Todor Kondic committed
    fn_out <- file.path(m$conf$project,FN_COMP_TAB)
    tab2file(tab=comp,file=fn_out)
Todor Kondic's avatar
Todor Kondic committed
    message("Generation of comp table finished.")
    setkeyv(comp,c("set","tag","mz"))
    m$out$tab$comp <- comp
Todor Kondic's avatar
Todor Kondic committed

verify_compounds <- function(conf) {
    ## * Existence of input files
Todor Kondic's avatar
Todor Kondic committed

    fn_cmpd_sets <- conf$compounds$sets

    ## ** Compound lists and sets
    assert(isThingFile(fn_cmpd_sets),
                            msg=paste("Cannot find the compound sets file:",fn_cmpd_sets))
    for (fn in fns_cmpds) {
        assert(isThingFile(fn), msg=paste("Cannot find compound list:",fn))
    }
    df_sets <- file2tab(fn_cmpd_sets)
    all_sets<-unique(df_sets$set)

    return(list(conf=conf,all_sets=all_sets))
}

verify_data_df <- function(mzml,all_sets) {
    no_files <- which(mzml[,!file.exists(Files)])
    no_adducts <- which(mzml[,!(adduct %in% names(ADDUCTMAP))])
    no_sets <- which(mzml[,!(set %in% all_sets)])
    assert(length(no_files)==0,msg = paste("Non-existent data files at rows:",paste(no_files,collapse = ',')))
    assert(length(no_adducts)==0,msg = paste("Unrecognised adducts at rows:",paste(no_adducts,collapse = ',')))
    assert(length(no_sets)==0,msg = paste("Unknown sets at rows:",paste(no_sets,collapse = ',')))
}

verify_data <- function(conf,all_sets) {
    ## * Existence of input files
    fn_data <- conf$data
    assert(isThingFile(fn_data),msg=paste("Data table does not exist:",fn_data))
    mzml <- file2tab(fn_data)
    verify_data_df(mzml=mzml,all_sets)
Todor Kondic's avatar
Todor Kondic committed
    return(conf)
}
## @export
concurrency <- function(m) {
    m$conf$workers <- if (!is.null(m$conf$workers)) m$conf$workers else NO_WORKERS
    
    future::plan("multiprocess",workers=m$conf$workers)
    message("workers: ",m$conf$workers)
    m
}
mk_tol_funcs <- function(m) {
Todor Kondic's avatar
Todor Kondic committed
    ## Depending on units given when the user specified the errors,
    ## generate functions that calculate errors given the concrete
    ## mass.

    ## Mass errors can be either in ppm, or Da.
    ## Time errors in min, or s.

    ## The mass error calculation functions and the retention time
    ## error in minutes are in m$extr$tol.
    
    grab <- function(entry,unit) {
        what <- paste0("\\<",unit,"\\>$")
        entry <- trimws(entry,which="both")
        if (grepl(what,entry))
            as.numeric(sub(paste0("^(.*)",unit),"\\1",entry)) else NA_real_
    }

    asgn_mz_err <- function (entry, msg) {
        eppm <- grab(entry,"ppm")
        eda <- grab(entry,"Da")
        shinyscreen:::assert(xor(is.na(eda), is.na(eppm)), msg = msg)
        if (is.na(eda)) function(mz) eppm*1e-6*mz else function(mz) eda
    }

    asgn_t_err <- function (entry, msg) {
        em <- grab(entry,"min")
        es <- grab(entry,"s")
        shinyscreen:::assert(xor(is.na(em), is.na(es)), msg = msg)
        if (is.na(em)) es/60. else em
        
        
    }
    m$extr$tol$coarse <- asgn_mz_err(m$conf$tolerance[["ms1 coarse"]], msg = "ms1 coarse error: Only ppm, or Da units allowed.")
    m$extr$tol$fine <- asgn_mz_err(m$conf$tolerance[["ms1 fine"]], msg = "ms1 fine error: Only ppm, or Da units allowed.")
    m$extr$tol$eic <- asgn_mz_err(m$conf$tolerance$eic, msg = "eic error: Only ppm, or Da units allowed.")
    m$extr$tol$rt <- asgn_t_err(m$conf$tolerance$rt, msg = "rt error: Only s(econds), or min(utes) allowed.")

    m
    
}
##' @export
extr_data <- function(m) {

    ## Reduce the comp table to only unique masses (this is because
    ## different sets can have same masses).
    m$out$tab$data <- m$out$tab$comp[,head(.SD,1),by=c('adduct','tag','ID')]
    files <- m$out$tab$data[,unique(Files)]
    allCEs <- do.call(c,args=lapply(files,function(fn) {
        z <- MSnbase::readMSData(files=fn,msLevel = c(1,2),mode="onDisk")
        unique(MSnbase::collisionEnergy(z),fromLast=T)
        
    }))
    allCEs <- unique(allCEs)
    allCEs <- allCEs[!is.na(allCEs)]
    cols <-paste('CE',allCEs,sep = '')
    vals <- rep(NA,length(cols))
    m$out$tab$data[,(cols) := .(rep(NA,.N))]
    files <- m$out$tab$data[,unique(Files)]
    m$extr$tmp <- lapply(files,function(fn) future::future(extract(fn=fn,
                                                                   tab=m$out$tab$data[,.(Files,mz,rt,ID)],
                                                                   err_ms1_eic=m$extr$tol$eic,
                                                                   err_coarse_fun=m$extr$tol$coarse,
                                                                   err_fine_fun=m$extr$tol$fine,
                                                                   err_rt=m$extr$tol$rt),
                                                           lazy = T))