Newer
Older
## Copyright (C) 2020 by University of Luxembourg
## Licensed under the Apache License, Version 2.0 (the "License");
## you may not use this file except in compliance with the License.
## You may obtain a copy of the License at
## http://www.apache.org/licenses/LICENSE-2.0
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
run <- function(fn_conf) {
conf <- read_conf(fn_conf)
dir.create(conf$project,
showWarnings = F,
recursive = T)
m <- withr::with_dir(new=conf$project,code = run_in_dir(m))
return(invisible(m))
run_in_dir <- function(m) {
m <- load_inputs(m)
load_compound_input <- function(m) {
m$input$tab$known <- if (shiny::isTruthy(m$conf$compounds$known))
file2tab(m$conf$compounds$known) else EMPTY_KNOWN
m$input$tab$unknown <- if (shiny::isTruthy(m$conf$compounds$unknown))
file2tab(m$conf$compounds$unknown) else EMPTY_UNKNOWN
m$input$tab$setid <- read_setid(m$conf$compounds$sets,
m$input$tab$known,
m$input$tab$unknown)
load_data_input <- function(m) {
m$input$tab$mzml <- file2tab(m$conf$data)
m
}
##' @export
load_inputs <- function(m) {
m <- load_data_input(m)
m
}
mk_comp_tab <- function(m) {
message("Started assembling the lists of knowns and unknowns into the `comprehensive' table.")
setid <- m$input$tab$setid
setkey(known,ID)
mzml[,`:=`(wd=sapply(Files,add_wd_to_mzml,m$conf$project))]
assert(xor(nrow(unk)==0,nrow(known)==0),msg="No compound lists have been provided. At least one of the known, or unknown compound lists is required.")
message("Begin generation of comp table.")
## knowns
setidKnown<- mzml[setid[origin %in% "known"],.(tag,mode,ID,set,Files,wd),on="set",allow.cartesian=T]
tab2file(tab=setidKnown,file="setidKnown.csv")
compKnown <- known[setidKnown,on=c("ID"),allow.cartesian=T]
setkey(compKnown,set,ID)
tab2file(tab=compKnown,file="compKnown.csv")
compKnown[,`:=`(mz=mapply(get_mz_from_smiles,SMILES,mode,USE.NAMES = F))]
message("Generation of comp table: knowns done.")
setidUnk<-mzml[setid[origin %in% "unknown"],.(tag,mode,ID,set,Files,wd),on="set",allow.cartesian=T]
compUnk <- unk[setidUnk,on="ID"]
message("Generation of comp table: unknowns done.")
df<-rbindlist(l=list(compKnown, compUnk),fill = T)
setnames(df,names(COMP_NAME_MAP),
function(o) COMP_NAME_MAP[[o]])
tab2file(tab=df,file=fn_out)
message("Generation of comp table finished.")
m$out$tab$comp <- df
m
}
##' @export
read_conf <- function(fn) {
yaml::yaml.load_file(fn)
## assert(isThingFile(fn_conf),msg=paste("Unable to read the configuration file:", fn_conf))
## conf <- yaml::yaml.load_file(fn_conf)
## conf <- vrfy_conf(conf)
## conf
## }
verify_compounds <- function(conf) {
## * Existence of input files
fn_cmpd_known <- conf$compounds$known
fn_cmpd_unk <- conf$compounds$unknown
fn_cmpd_sets <- conf$compounds$sets
msg=paste("Cannot find the compound sets file:",fn_cmpd_sets))
## if (!is.null(fn_cmpd_known)) assert(isThingFile(fn_cmpd_known),
## msg=paste("Cannot find known compounds file:",fn_cmpd_known))
## if (!is.null(fn_cmpd_unk)) assert(isThingFile(fn_cmpd_unk),
## msg=paste("Cannot find unknown compounds file:",fn_cmpd_unk))
assert(xor(!isThingFile(fn_cmpd_known),!isThingFile(fn_cmpd_unk)),msg=paste("Both known and unknown compounds lists are missing."))
df_sets <- file2tab(fn_cmpd_sets)
all_sets<-unique(df_sets$set)
## ** Knowns
if (isThingFile(fn_cmpd_unk)) {
df_k <- file2tab(fn_cmpd_known)
are_knowns_OK <- shiny::isTruthy(vald_comp_tab(df_k,fn_cmpd_known, checkSMILES=T, checkNames=T))
assert(are_knowns_OK,msg='Aborted because known compounds table contained errors.')
}
df_u <- file2tab(fn_cmpd_unk)
are_unknowns_OK <- shiny::isTruthy(vald_comp_tab(df_u,fn_cmpd_unk, checkSMILES=F, checkMz=T))
assert(are_unknowns_OK, msg='Aborted because unknown compounds table contained errors.')
return(list(conf=conf,all_sets=all_sets))
}
verify_data_df <- function(mzml,all_sets) {
no_files <- which(mzml[,!file.exists(Files)])
no_modes <- which(mzml[,!(mode %in% names(MODEMAP))])
no_sets <- which(mzml[,!(set %in% all_sets)])
assert(length(no_files)==0,msg = paste("Non-existent data files at rows:",paste(no_files,collapse = ',')))
assert(length(no_modes)==0,msg = paste("Unrecognised modes at rows:",paste(no_modes,collapse = ',')))
assert(length(no_sets)==0,msg = paste("Unknown sets at rows:",paste(no_sets,collapse = ',')))
}
verify_data <- function(conf,all_sets) {
## * Existence of input files
fn_data <- conf$data
assert(isThingFile(fn_data),msg=paste("Data table does not exist:",fn_data))
mzml <- file2tab(fn_data)
verify_data_df(mzml=mzml,all_sets)
##' @export
vrfy_conf <- function(conf) {
## * Existence of input files
z <- verify_compounds(conf)
conf <- z$conf
all_sets <- z$all_sets
verify_data(conf=conf,all_sets=all_sets)
return(conf)
}
## @export
concurrency <- function(m) {
m$conf$workers <- if (!is.null(m$conf$workers)) m$conf$workers else NO_WORKERS
future::plan("multiprocess",workers=m$conf$workers)
message("workers: ",m$conf$workers)
m
}
mk_tol_funcs <- function(m) {
grab <- function(entry,unit) {
what <- paste0("\\<",unit,"\\>$")
entry <- trimws(entry,which="both")
if (grepl(what,entry))
as.numeric(sub(paste0("^(.*)",unit),"\\1",entry)) else NA_real_
}
asgn_mz_err <- function (entry, msg) {
eppm <- grab(entry,"ppm")
eda <- grab(entry,"Da")
shinyscreen:::assert(xor(is.na(eda), is.na(eppm)), msg = msg)
if (is.na(eda)) function(mz) eppm*1e-6*mz else function(mz) eda
}
asgn_t_err <- function (entry, msg) {
em <- grab(entry,"min")
es <- grab(entry,"s")
shinyscreen:::assert(xor(is.na(em), is.na(es)), msg = msg)
if (is.na(em)) es/60. else em
}
m$extr$tol$coarse <- asgn_mz_err(m$conf$tolerance[["ms1 coarse"]], msg = "ms1 coarse error: Only ppm, or Da units allowed.")
m$extr$tol$fine <- asgn_mz_err(m$conf$tolerance[["ms1 fine"]], msg = "ms1 fine error: Only ppm, or Da units allowed.")
m$extr$tol$eic <- asgn_mz_err(m$conf$tolerance$eic, msg = "eic error: Only ppm, or Da units allowed.")
m$extr$tol$rt <- asgn_t_err(m$conf$tolerance$rt, msg = "rt error: Only s(econds), or min(utes) allowed.")
m
}