diff --git a/DESCRIPTION b/DESCRIPTION index 9b8b892f56214a248981074500bbd8e45cd4ea6d..5ed7ee28da358cf2fd2a5ae4942c08ef25346307 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: shinyscreen Title: Pre-screening of Mass Spectrometry Data -Version: 0.9.0 +Version: 0.9.3 Author: Todor Kondić Maintainer: Todor Kondić <todor.kondic@uni.lu> Authors@R: @@ -20,19 +20,19 @@ Authors@R: person(given = "Hiba Mohammed", family = "Taha", role = c("ctb"), - email = "hiba.mohammed-taha@ext.uni.lu"), + email = "hiba.mohammed-taha@uni.lu"), person(given = "Jessy", family = "Krier", role = c("ctb"), - email = "jessy.krier@ext.uni.lu"), + email = "jessy.krier@uni.lu"), person(given = "Mira", family = "Narayanan", role = c("ctb"), - email = "mira.narayanan@ext.uni.lu"), + email = "mira.narayanan@uni.lu"), person(given = "Anjana", family = "Elapavalore", role = c("ctb"), - email = "anjana.elapavalore@ext.uni.lu"), + email = "anjana.elapavalore@uni.lu"), person(given = "Marc", family = "Warmoes", role = c("ctb"), @@ -41,31 +41,34 @@ Description: Pre-screening of Mass Spectrometry Data. License: Apache License (>= 2.0) Encoding: UTF-8 LazyData: true -RoxygenNote: 6.1.1 +RoxygenNote: 7.1.1 Roxygen: list(markdown = TRUE) Collate: - 'resources.R' 'base.R' + 'resources.R' 'mix.R' 'extraction.R' - 'run.R' - 'shinyUI.R' + 'api.R' + 'shiny-ui-base.R' Depends: RMassBank, RChemMass Imports: tools, scales, - parallel, + future, yaml, mzR, MSnbase, + data.table, + assertthat, + withr, ggplot2, cowplot, RColorBrewer, grid, curl, shiny, - shinydashboard, - shinyFiles, - rhandsontable + rhandsontable, + DT, + tcltk diff --git a/NAMESPACE b/NAMESPACE index 3dff728265df969457d46f13debab4cd01b06f51..bc5cd3df79d187b2252c0de32825ba072d1bb3e4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,3 +1,45 @@ # Generated by roxygen2: do not edit by hand -export(launch) +export(app) +export(concurrency) +export(conf_trans) +export(create_plots) +export(extr_data) +export(extract) +export(gen_key_plot_tab) +export(gen_struct_plots) +export(get_fn_comp) +export(get_fn_conf) +export(get_fn_extr) +export(get_fn_ftab) +export(get_fn_summ) +export(grab_unit) +export(list2rev) +export(load_compound_input) +export(load_data_input) +export(load_inputs) +export(merge2rev) +export(mk_comp_tab) +export(mk_tol_funcs) +export(mz_input) +export(new_rv_state) +export(new_state) +export(new_state_fn_conf) +export(plot_ms1_chr) +export(plot_ms2_chr) +export(plot_ms2_spec) +export(plot_struct) +export(plot_struct_nowrap) +export(prescreen) +export(report) +export(rev2list) +export(rt_input) +export(run) +export(run_in_dir) +export(save_plots) +export(setup_phase) +export(sort_spectra) +export(subset_summary) +export(tk_save_file) +import(data.table) +importFrom(shiny,validate) diff --git a/R/api.R b/R/api.R new file mode 100644 index 0000000000000000000000000000000000000000..4878a9afd12a4c5c2b1dd1b46b72b06466dab098 --- /dev/null +++ b/R/api.R @@ -0,0 +1,801 @@ +## Copyright (C) 2020 by University of Luxembourg + +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at + +## http://www.apache.org/licenses/LICENSE-2.0 + +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. + + +##' @export +new_state <- function() { + m <- new_conf() + init_state(m) +} + +##' @export +new_rv_state <- function() react_v(m=list2rev(new_state())) + +##' @export +new_state_fn_conf <- function(fn_conf) { + m <- new_state() + m$conf <- read_conf(fn_conf) + init_state(m) +} + +##' @export +run <- function(fn_conf="",m=NULL,phases=NULL,help=F) { + all_phases=list(setup=setup_phase, + comptab=mk_comp_tab, + extract=extr_data, + prescreen=prescreen, + sort=sort_spectra, + subset=subset_summary, + plot=create_plots, + saveplot=save_plots) + + if (help) { + message("(run): You can run some of the following, or all the phases:") + message(paste(paste0("(run): ",names(all_phases)),collapse = "\n")) + return(invisible(NULL)) + } + the_phases <- if (is.null(phases)) all_phases else { + x <- setdiff(phases,names(all_phases)) + if (length(x)>0) { + message("(run): Error. Unknown phases:") + message(paste(paste0("(run): ",x),collapse = "\n")) + stop("Aborting.") + } + all_phases[phases] + } + + m <- if (nchar(fn_conf)!=0) new_state_fn_conf(fn_conf) else if (!is.null(m)) m else stop("(run): Either the YAML config file (fn_conf),\n or the starting state (m) must be provided\n as the argument to the run function.") + + dir.create(m$conf$project, + showWarnings = F, + recursive = T) + m <- withr::with_dir(new=m$conf$project,code = Reduce(function (prev,f) f(prev), + x = the_phases, + init = m)) + return(invisible(m)) +} + + +##' @export +setup_phase <- function(m) { + m <- mk_tol_funcs(m) + m <- load_inputs(m) + m <- concurrency(m) + m +} + +##' @export +run_in_dir <- function(m) { + m <- setup_phase(m) + m <- mk_comp_tab(m) + m <- extr_data(m) + m <- prescreen(m) + m <- sort_spectra(m) + m <- subset_summary(m) + m <- create_plots(m) + m <- save_plots(m) + invisible(m) + +} + + + + + +##' @export +load_compound_input <- function(m) { + + coll <- list() + fields <- colnames(EMPTY_CMPD_LIST) + fns <- m$conf$compounds$lists + coltypes <- c(ID="character", + SMILES="character", + Formula="character", + Name="character", + RT="numeric", + mz="numeric") + for (l in 1:length(fns)) { + fn <- fns[[l]] + + ## Figure out column headers. + nms <- colnames(file2tab(fn,nrows=0)) + + ## Read the table. Knowing column headers prevents unnecessary + ## warnings. + dt <- file2tab(fn, colClasses=coltypes[nms]) + verify_cmpd_l(dt=dt,fn=fn) + # nonexist <- setdiff(fnfields,fields) + coll[[l]] <- dt #if (length(nonexist)==0) dt else dt[,(nonexist) := NULL] + coll[[l]]$ORIG <- fn + + } + cmpds <- if (length(fns)>0) rbindlist(l=c(list(EMPTY_CMPD_LIST), coll), use.names = T, fill = T) else EMPTY_CMPD_LIST + + dups <- duplicated(cmpds$ID) + dups <- dups | duplicated(cmpds$ID,fromLast = T) + dupIDs <- cmpds$ID[dups] + dupfns <- cmpds$ORIG[dups] + + msg <- "" + for (fn in unique(dupfns)) { + inds <- which(dupfns %in% fn) + fndupID <- paste(dupIDs[inds], collapse = ',') + msg <- paste(paste('Duplicate IDs', fndupID,'found in',fn),msg,sep = '\n') + } + + ## TODO: Should we just kick out the duplicates, instead of + ## erroring? + + assert(all(!dups), msg = msg) + + cmpds[,("known"):=.(the_ifelse(!is.na(SMILES),"structure",the_ifelse(!is.na(Formula),"formula","mz")))] + m$input$tab$cmpds <- cmpds + m$input$tab$setid <- read_setid(m$conf$compounds$sets, + m$input$tab$cmpds) + m +} + +##' @export +load_data_input <- function(m) { + m$input$tab$mzml <- file2tab(m$conf$data) + assert(all(unique(m$input$tab$mzml[,.N,by=c("adduct","tag")]$N)<=1),msg="Some rows in the data table contain multiple entries with same tag and adduct fields.") + m + +} + +##' @export +load_inputs <- function(m) { + m <- load_compound_input(m) + m <- load_data_input(m) + m +} + +##' @export +mk_comp_tab <- function(m) { + setid <- m$input$tab$setid + setkey(setid,set) + mzml<- m$input$tab$mzml + setkey(mzml,set) + cmpds<-m$input$tab$cmpds + setkey(cmpds,ID) + ## mzml[,`:=`(wd=sapply(file,add_wd_to_mzml,m$conf$project))] + assert(nrow(cmpds)>0,msg="No compound lists have been provided.") + message("Begin generation of the comprehensive table.") + + comp <- cmpds[setid,on="ID"][mzml,.(tag,adduct,ID,RT,set,Name,file,SMILES,Formula,mz,known),on="set",allow.cartesian=T] + tab2file(tab=comp,file=paste0("setidmerge",".csv")) + setkey(comp,known,set,ID) + + ## Known structure. + ## comp[,`:=`(mz=mapply(calc_mz_from_smiles,SMILES,adduct,ID,USE.NAMES = F))] + comp[known=="structure",`:=`(mz=calc_mz_from_smiles(SMILES,adduct,ID))] + + ## Known formula. + comp[known=="formula",`:=`(mz=calc_mz_from_formula(Formula,adduct,ID))] + setnames(comp,names(COMP_NAME_MAP), + function(o) COMP_NAME_MAP[[o]]) + setcolorder(comp,COMP_NAME_FIRST) + fn_out <- get_fn_comp(m) + tab2file(tab=comp,file=fn_out) + message("Generation of comp table finished.") + setkeyv(comp,c("set","tag","mz")) + m$out$tab$comp <- comp + m +} + + +verify_compounds <- function(conf) { + ## * Existence of input files + + fns_cmpds <- conf$compounds$lists + fn_cmpd_sets <- conf$compounds$sets + + ## ** Compound lists and sets + + assert(isThingFile(fn_cmpd_sets), + msg=paste("Cannot find the compound sets file:",fn_cmpd_sets)) + + for (fn in fns_cmpds) { + assert(isThingFile(fn), msg=paste("Cannot find compound list:",fn)) + } + + ## * Data files + df_sets <- file2tab(fn_cmpd_sets) + all_sets<-unique(df_sets$set) + + return(list(conf=conf,all_sets=all_sets)) +} + +verify_data_df <- function(mzml,all_sets) { + no_file <- which(mzml[,!file.exists(file)]) + no_adducts <- which(mzml[,!(adduct %in% names(ADDUCTMAP))]) + no_sets <- which(mzml[,!(set %in% all_sets)]) + assert(length(no_file)==0,msg = paste("Non-existent data files at rows:",paste(no_file,collapse = ','))) + assert(length(no_adducts)==0,msg = paste("Unrecognised adducts at rows:",paste(no_adducts,collapse = ','))) + assert(length(no_sets)==0,msg = paste("Unknown sets at rows:",paste(no_sets,collapse = ','))) +} + +verify_data <- function(conf,all_sets) { + ## * Existence of input files + fn_data <- conf$data + assert(isThingFile(fn_data),msg=paste("Data table does not exist:",fn_data)) + mzml <- file2tab(fn_data) + verify_data_df(mzml=mzml,all_sets) + return(conf) +} + +#' @export +concurrency <- function(m) { + ## Reads the concurrency entry in the config. It is optional, if + ## not given, then it is up to the user to define the plan of the + ## futures package. If present, it contains at least the `plan' + ## specification. It can also contain `workers` entry specifying + ## the number of workers. If that entry is absent, the default + ## number of workers is NO_WORKERS from the resources.R. + + ## TODO: Needs a rework to be useful. But, this is not a problem, + ## because the user controls concurrency settings from the outside + ## using future::plan. + + ## workers <- m$conf$concurrency$workers + ## plan <- m$conf$concurrency$plan + ## if (!is.null(plan) && plan!=user) { + ## n <- if (!is.null(workers)) workers else NO_WORKERS + ## if (!is.na(n)) future::plan(plan,workers=workers) else future::plan(plan) + ## m$conf$concurrency$workers <- n + + ## } else { + ## m$conf$concurrency$workers <- NA + ## m$conf$concurrency$plan <- "user" + ## } + ## message("plan: ",m$conf$concurrency$plan) + ## message("workers: ",m$conf$concurrency$workers) + + ## So we can actually debug. + m$future <- if (!m$conf$debug) + future::future + else { + message("Debug: futures evaluate as identity") + function(x,...) identity(x) + } + m +} + +#' @export +mk_tol_funcs <- function(m) { + ## Depending on units given when the user specified the errors, + ## generate functions that calculate errors given the concrete + ## mass. + + ## Mass errors can be either in ppm, or Da. + ## Time errors in min, or s. + + ## The mass error calculation functions and the retention time + ## error in minutes are in m$extr$tol. + + ## TODO make these things compatible with futures. + + + + m$extr$tol$coarse <- gen_mz_err_f(m$conf$tolerance[["ms1 coarse"]], + "ms1 coarse error: Only ppm, or Da units allowed." + ) + + + m$extr$tol$fine <- gen_mz_err_f(m$conf$tolerance[["ms1 fine"]], + "ms1 fine error: Only ppm, or Da units allowed.") + + m$extr$tol$eic <- gen_mz_err_f(m$conf$tolerance$eic, + "eic error: Only ppm, or Da units allowed.") + + + m$extr$tol$rt <- gen_rt_err(m$conf$tolerance$rt, + "rt error: Only s(econds), or min(utes) allowed.") + + m + +} + + +##' @export +extr_data <- function(m) { + + ## Reduce the comp table to only unique masses (this is because + ## different sets can have same masses). + + m$out$tab$data <- m$out$tab$comp[,head(.SD,1),by=BASE_KEY] + m$out$tab$data[,set:=NULL] #This column is meaningless now. + file <- m$out$tab$data[,unique(file)] + allCEs <- do.call(c,args=lapply(file,function(fn) { + z <- MSnbase::readMSData(files=fn,msLevel = c(1,2),mode="onDisk") + unique(MSnbase::collisionEnergy(z),fromLast=T) + + })) + allCEs <- unique(allCEs) + allCEs <- allCEs[!is.na(allCEs)] + cols <-paste('CE',allCEs,sep = '') + vals <- rep(NA,length(cols)) + m$out$tab$data[,(cols) := .(rep(NA,.N))] + file <- m$out$tab$data[,unique(file)] + ftags <- m$out$tab$data[,.(tag=unique(tag)),by=file] + futuref <- m$future + tmp <- lapply(1:nrow(ftags),function(ii) { + fn <- ftags[ii,file] + the_tag <- ftags[ii,tag] + message("(extract): Commencing extraction for tag: ", the_tag, "; file: ",fn) + tab <- as.data.frame(data.table::copy(m$out$tab$data[tag==the_tag,.(file,tag,adduct,mz,rt,ID)])) + ## err_ms1_eic <- m$extr$tol$eic + ## err_coarse_fun <- m$extr$tol$coarse + ## err_fine_fun <- m$extr$tol$fine + ## err_rt <- m$extr$tol$rt + + err_coarse <- m$conf$tolerance[["ms1 coarse"]] + + + err_fine <- m$conf$tolerance[["ms1 fine"]] + + + err_ms1_eic <- m$conf$tolerance$eic + + + err_rt <- m$conf$tolerance$rt + + x <- futuref(extract(fn=fn, + tag=the_tag, + tab=tab, + err_ms1_eic=err_ms1_eic, + err_coarse = err_coarse, + err_fine= err_fine, + err_rt= err_rt), + lazy = F) + + x + + }) + + msk <- sapply(tmp,future::resolved) + curr_done <- which(msk) + + for (x in curr_done) { + message("Done extraction for ", unique(future::value(tmp[[x]])$ms1$tag)) + } + while (!all(msk)) { + msk <- sapply(tmp,future::resolved) + newly_done <- which(msk) + for (x in setdiff(newly_done,curr_done)) { + message("Done extraction for ", unique(future::value(tmp[[x]])$file)) + } + Sys.sleep(0.5) + curr_done <- newly_done + } + + ztmp <- lapply(tmp,future::value) + m$extr$ms1 <- data.table::rbindlist(lapply(ztmp,function(x) x$ms1)) + m$extr$ms2 <- data.table::rbindlist(lapply(ztmp,function(x) x$ms2)) + data.table::setkeyv(m$extr$ms1,BASE_KEY) + data.table::setkeyv(m$extr$ms2,c(BASE_KEY,"CE")) + + fn_ex <- get_fn_extr(m) + timetag <- format(Sys.time(), "%Y%m%d_%H%M%S") + saveRDS(object = m, file = file.path(m$conf$project, + paste0(timetag,"_",FN_EXTR_STATE))) + m + +} + +##' @export +conf_trans <- function(conf) { + conf$prescreen <- conf_trans_pres(conf$prescreen) + conf +} + +##' @export +prescreen <- function(m) { + ## Top-level auto prescreening function. + message("(prescreen): Start.") + confpres <- conf_trans_pres(m$conf$prescreen) + + m$qa <- create_qa_table(m$extr,confpres) + m1 <- assess_ms1(m) + m <- assess_ms2(m1) + m$out$tab$summ <- gen_summ(m$out$tab$comp,m$qa$ms1,m$qa$ms2) + message("(prescreen): End.") + m +} + + + + +##' Sets the key specified by DEF_KEY_SUMM and adds second indices, +##' either from DEF_INDEX_SUMM, or user-specified in +##' conf[["summary table"]]$order. The order entry is a list of +##' strings with names of columns in summ, optionally prefixed with a +##' minus(-) sign. Columns prefixed with the minus are going to be in +##' ascending order. +##' +##' @title Sort the Summary Table +##' @param m +##' @return m +##' @author Todor Kondić +##' @export +sort_spectra <- function(m) { + ## Sorts the summary table (summ) in order specified either in + ## `order spectra` sublist of m$conf, or if that is null, the + ## DEF_INDEX_SUMM. + + ## Now, add secondary indexing. + cols <- if (!is.null(m$conf[["summary table"]]$order)) m$conf[["summary table"]]$order else DEF_INDEX_SUMM + + idx <- gsub("^\\s*-\\s*","",cols) #We need only column names for + #now, so remove minuses where + #needed. + assertthat::assert_that(all(idx %in% colnames(m$out$tab$summ)),msg = "Some column(s) in order key in conf file does not exist in the summary table.") + + data.table::setindexv(m$out$tab$summ,idx) + + ## Now we order based on either summary table order subkey, or + ## DEF_ORDER_SUMM + + tmp <- quote(data.table::setorder()) + tmp[[2]] <- quote(m$out$tab$summ) + for (n in 1:length(cols)) tmp[[2+n]] <- parse(text=cols[[n]])[[1]] + message("Ordering expression: \n",deparse(tmp)) + eval(tmp) #Execute the setorder call + m +} + +##' Subsets the summary table by applying conditions set out in the +##' filter subkey of summary table key of the config. Each member of +##' filter is an expression that and all of them are chained together +##' using AND logical operation and applied to the summary table. +##' +##' +##' @title Subset the Summary Table +##' @param m +##' @return m +##' @author Todor Kondić +##' @export +subset_summary <- function(m) { + filt <- m$conf[["summary table"]]$filter + m$out$tab$flt_summ <- if (!is.null(filt)) { + tmp <- lapply(filt, function (x) parse(text = x)[[1]]) + expr <- Reduce(function (x,y) {z<-call("&");z[[2]]<-x;z[[3]]<-y;z},x=tmp) + message("Filtering with: ",deparse(bquote(m$out$tab$summ[.(expr)]))) + eval(bquote(m$out$tab$summ[.(expr)])) + + + } else m$out$tab$summ + + m +} + + +##' @export +gen_struct_plots <- function(m) { + ## Generate structure plots. + comp <- m$out$tab$comp + + res <- if (NROW(comp)>0) { + structtab <- m$out$tab$comp[known=="structure",unique(.SD),.SDcols=c("ID","SMILES")] + message("Start generating structures.") + structtab[,img:=.({tmp <- lapply(SMILES,function (sm) smiles2img(sm,width = 500,height = 500, zoom = 4.5)) + tmp})] + message("Done generating structures.") + structtab + } else { + dtable(ID=character(0),SMILES=character(0),img=list()) + } + + m$out$tab$structfig <- res + + m +} + + +#' @export +create_plots <- function(m) { + ## Produce plots of EICs and spectra and group them acording to + ## conf$figures$grouping. + + ## Select the data nedeed for plotting. + flt_summ <- m$out$tab$flt_summ + + group_data <- m$conf$figures$grouping + plot_group <- if (!shiny::isTruthy(group_data$group)) FIG_DEF_CONF$grouping$group else group_data$group + plot_plot <- if (!shiny::isTruthy(group_data$plot)) FIG_DEF_CONF$grouping$plot else group_data$plot + plot_ms1_label <- if (!shiny::isTruthy(group_data$plot)) FIG_DEF_CONF$grouping$label else group_data$label + plot_ms2_label <- "CE" + + plot_index <- c(plot_group,plot_plot) + + ## All the possible curve labels. + all_ms1_labels <- flt_summ[,unique(.SD),.SDcols=plot_ms1_label][[plot_ms1_label]] + all_ms1_labels <- sort(all_ms1_labels[!is.na(all_ms1_labels)]) + all_ms2_ce_labels <- flt_summ[,unique(CE)] + all_ms2_ce_labels <- sort(all_ms2_ce_labels[!is.na(all_ms2_ce_labels)]) + + ## Plot styling. + style_eic_ms1 <- plot_decor(m,m$conf$logaxes$ms1_eic_int, + all_ms1_labels=all_ms1_labels, + legend_name_ms1=plot_ms1_label) + style_eic_ms2 <- plot_decor(m,m$conf$logaxes$ms2_eic_int, + all_ms1_labels = all_ms1_labels, + all_ms2_labels = all_ms2_ce_labels, + legend_name_ms1 = plot_ms1_label, + legend_name_ms2 = "CE") + style_spec_ms2 <- plot_decor(m,m$conf$logaxes$ms2_spec_int, + all_ms1_labels = all_ms1_labels, + all_ms2_labels = all_ms2_ce_labels, + legend_name_ms1 = plot_ms1_label, + legend_name_ms2 = "CE") + + style_ms2_leg <- plot_decor(m,m$conf$logaxes$ms2_spec_int, + all_ms1_labels = all_ms1_labels, + all_ms2_labels = all_ms2_ce_labels, + legend_name_ms1 = plot_ms1_label, + legend_name_ms2 = "CE", + ms1_legend_info = F) + + + plot_key <- gen_key_plot_tab(m) + + topdir <- FIG_TOPDIR + dir.create(topdir,showWarnings = F) + + my_theme <- function(...) plot_theme(legend.position = "left", + legend.direction = "vertical") + + theme_full <- my_theme() + theme_noleg <- plot_theme(legend.position = "none") + + + clean_range<-function(def,rng) { + x1 <- rng[1] + x2 <- rng[2] + if (is.na(x1) || x1 == 0) x1 <- def[1] + if (is.na(x2) || x2 == 0) x2 <- def[2] + c(x1,x2) + } + + ## If structures do not exist, generate them. + if (is.null(m$out$tab$structfig)) m <- gen_struct_plots(m) + + plot_key[,mapply(function(gv,pv) { + + key <- c(gv,pv) + names(key) <- plot_index + + + + p_chr_ms1 <- plot_ms1_chr(m, plot_index = key) + p_chr_ms2 <- plot_ms2_chr(m, plot_index = key) + p_spec_ms2 <- plot_ms2_spec(m, plot_index = key) + p_struct <- plot_struct_nowrap(m, plot_index = key) + + + ## Produce the filename. + fn <- paste0(paste(..plot_group,gv,..plot_plot,pv,sep = "_"),".pdf") + fn <- gsub("\\[","",fn) + fn <- gsub("\\]","",fn) + fn <- gsub("\\+","p",fn) + fn <- gsub("-","m",fn) + fn <- if (!is.null(topdir)) file.path(topdir,fn) else fn + + + rt_int <- get_rt_interval(p_chr_ms1$data, p_chr_ms2$data, m$conf$figures) + my_coord <- ggplot2::coord_cartesian(xlim = rt_int) + + p_chr_ms1 <- p_chr_ms1 + my_coord + theme_full + p_chr_ms2 <- p_chr_ms2 + my_coord + theme_full + leg1 <- cowplot::get_legend(p_chr_ms1) + leg2 <- cowplot::get_legend(p_chr_ms2) + p_spec_ms2 <- p_spec_ms2 + theme_full + + + ## Plot labels. + labels <- c(paste0("EIC (MS1) ",..plot_group,": ",gv,", ",..plot_plot,": ",pv), + NA, + paste0("EIC (MS2) ",..plot_group,": ",gv,", ",..plot_plot,": ",pv), + NA, + paste0("MS2 Spectra ",..plot_group,": ",gv,", ",..plot_plot,": ",pv), + NA) + + big_fig <- cowplot::plot_grid(p_chr_ms1+theme_noleg, + p_struct, + p_chr_ms2+theme_noleg, + leg2, + p_spec_ms2+theme_noleg, + leg1, + align = "hv", + axis='l', + ncol = 2, + nrow = 3, + labels = labels, + rel_widths = c(2,1)) + + message("Plotting: ",paste(key,collapse = ", ")," to: ",fn) + ggplot2::ggsave(plot=big_fig,width = 21, height = 29.7, units = "cm", filename = fn) + + }, + get(..plot_group), + get(..plot_plot))] + m +} + +#' @export +save_plots <- function(m) { + topdir <- FIG_TOPDIR + dir.create(topdir,showWarnings = F) + + my_theme <- function(...) plot_theme(legend.position = "left", + legend.direction = "vertical") + + + clean_range<-function(def,rng) { + x1 <- rng[1] + x2 <- rng[2] + if (is.na(x1) || x1 == 0) x1 <- def[1] + if (is.na(x2) || x2 == 0) x2 <- def[2] + c(x1,x2) + } + + get_ms2_leg <- m$aux$get_ms2_leg + + grouping <- m$conf$figures$grouping + plot_group <- grouping$group + plot_plot <- grouping$plot + plot_ms1_label <- grouping$label + plot_ms2_label <- "CE" + + doplot <- function(eic_ms1,eic_ms2,spec_ms2,leg_ms2,struct,group,plot,t_group="",t_plot="",print_labs=T) { + ## Produce the filename. + fn <- paste0(paste(t_group,group,t_plot,plot,sep = "_"),".pdf") + fn <- gsub("\\[","",fn) + fn <- gsub("\\]","",fn) + fn <- gsub("\\+","p",fn) + fn <- gsub("-","m",fn) + fn <- if (!is.null(topdir)) file.path(topdir,fn) else fn + + ## Create an empty figure. + + xxdf <- eic_ms1$data[,.(rt=rt,intensity=intensity)] + empty_fig <- ggplot2::ggplot(xxdf,ggplot2::aes(x=rt,y=intensity)) + + ggplot2::geom_blank() + + ggplot2::theme_void() + leg1 <- cowplot::get_legend(eic_ms1) + leg2 <- empty_fig + if (NROW(eic_ms2$data) == 0) + eic_ms2 <- empty_fig else { + leg2 <- leg_ms2 + } + if (NROW(spec_ms2$data) == 0) spec_ms2 <- empty_fig + if (is.na(struct)) struct <- empty_fig + + ## Plot labels. + labels <- if (print_labs) { + c(paste0("EIC (MS1) ",t_group,": ",group,", ",t_plot,": ",plot), + NA, + paste0("EIC (MS2) ",t_group,": ",group,", ",t_plot,": ",plot), + NA, + paste0("MS2 Spectra ",t_group,": ",group,", ",t_plot,": ",plot), + NA) + } else { + rep(NA,6) + + } + + ## Interval + rt_int <- get_rt_interval(eic_ms1$data, eic_ms2$data, m$conf$figures) + my_coord <- ggplot2::coord_cartesian(xlim = rt_int) + big_fig <- cowplot::plot_grid(eic_ms1+my_coord+my_theme(), + struct, + eic_ms2+my_coord+my_theme(), + leg2, + spec_ms2 + my_theme(), + leg1, + align = "hv", + axis='l', + ncol = 2, + nrow = 3, + labels = labels, + rel_widths = c(2,1)) + + message("Saving plot: ",group,", ",plot," to ",fn) + ggplot2::ggsave(plot=big_fig,width = 21, height = 29.7, units = "cm", filename = fn) + + + + } + + + m$out$tab$ms2_plot[m$out$tab$ms1_plot, + Map(function (ms1eic, + ms2eic, + ms2spec, + leg, + struct, + grp, + plt) + doplot(ms1eic,ms2eic,ms2spec, + leg,struct,grp,plt, + t_group=m$conf$figures$grouping$group, + t_plot=m$conf$figures$grouping$plot), + i.fig_eic, + x.fig_eic, + x.fig_spec, + x.fig_leg, + i.fig_struct, + .SD[[1]], + .SD[[2]]), + on=c(plot_group,plot_plot), + .SDcols=c(plot_group,plot_plot)] + m +} + +#' @export +report <- function(m) { + figtopdir <- FIG_TOPDIR #file.path(m$conf$project,FIG_TOPDIR) + pander::evalsOptions("graph.output","pdf") + author <- if (!is.null(m$conf$report$author)) m$conf$report$author else REPORT_AUTHOR + title <- if (!is.null(m$conf$report$title)) m$conf$report$title else REPORT_TITLE + doc <- pander::Pandoc$new(author,title) + doc$add(pander::pandoc.header.return("Plots",level = 1)) + sets <- m$out$tab$flt_summ[,unique(set)] + rep_theme <- ggplot2::labs(title = NULL) + for (s in sets) { + doc$add(pander::pandoc.header.return(paste('Set', s), level = 2)) + sdf <- m$out$tab$flt_summ[set==s,] + group <- sdf[,unique(adduct)] + for (g in group) { + asdf <- sdf[adduct==g,] + ids <- asdf[,unique(ID)] + for (id in ids) { + message("Image ","set: ",s," group: ", g, " id: ",id) + doc$add(pander::pandoc.header.return(paste('ID',id),level = 3)) + tab <- asdf[ID==id,.(tag,ms1_int,ms1_rt,adduct,mz,file)] + ms2info <- m$out$tab$ms2_spec[adduct==g & ID==id,.(tag,ID,rt,ms2_max_int,file)] + tab2 <- tab[ms2info,on="file"][,.(tag,mz,adduct,"$RT_{ms1}$[min]"=ms1_rt,"$RT_{ms2}$[min]"=rt,"$I{ms1}$"=formatC(ms1_int, format="e",digits = 2), "$I(ms2)$"= formatC(ms2_max_int, format="e",digits = 2))] + data.table::setorderv(tab2,c("$I{ms1}$","$I(ms2)$"),c(-1,-1)) + doc$add.paragraph("") + figpath <- fig_path(top=figtopdir,set=s,group=g,id=id,suff="all",ext="pdf") + doc$add(pander::pandoc.image.return(img=paste0("file:",figpath))) + doc$add.paragraph("") + message("Adding table.") + doc$add.paragraph(pander::pandoc.table.return(tab2)) + message("Done adding table.") + ## doc$add(print(tab)) + doc$add.paragraph("") + + } + + } + } + doc$add(pander::pandoc.header.return("Appendix", level = 1)) + doc$add(pander::pandoc.header.return("Configuration",level = 2)) + doc$add(m$conf) + doc$add(pander::pandoc.header.return("R Session Info",level = 2)) + doc$add(sessionInfo()) + m$out$report <- doc + m$out$report$export('report.pdf') + m +} + + +#' @export +app <- function() { + unlink(list.files(pattern = "app_run.*html$")) + unlink(list.files(pattern = "app_run.*Rmd$")) + file.copy(system.file(file.path("rmd","app.Rmd"),package = "shinyscreen"),"app_run.Rmd") + rmarkdown::run(file = "app_run.Rmd") +} diff --git a/R/base.R b/R/base.R index 9e3b28f7c2ab7fe6080e09d400f02169c5037c6f..abdebdd511ecaa0d09e1077f12f3ec07aee23671 100644 --- a/R/base.R +++ b/R/base.R @@ -12,16 +12,18 @@ ## See the License for the specific language governing permissions and ## limitations under the License. + +##' @import data.table +## Redirections +the_ifelse <- data.table::fifelse +dtable <- data.table::data.table + tab2file<-function(tab,file,...) { - write.csv(x=tab,file=file,row.names=F,...) + data.table::fwrite(x=tab,file=file,...) } -file2tab<-function(file,stringsAsFactors=F,comment.char='',sep=',',...) { - read.csv(file=file, - header=T, - stringsAsFactors=stringsAsFactors, - comment.char=comment.char, - na.strings=c("","NA"),...) +file2tab<-function(file,na.strings=c("","NA","\"\""),...) { + data.table::fread(file=file,na.strings = na.strings, ...) } isThingFile<-function(fn) { @@ -35,3 +37,19 @@ split_path <- function(path) { if (dirname(path) %in% c(".", path)) return(basename(path)) return(c(basename(path), split_path(dirname(path)))) } + + +print_table <- function (df) { + paste(apply(df,1,function (row) paste(row,collapse=',')),collapse = "\n") +} + +assert <- function(expr,msg) shiny::validate(shiny::need(expr,message=msg)) + + +gen_uniq_lab <- function(prev,pref='',suff='') { + l <- length(prev) + gen <- function() paste0(pref,as.integer(runif(1,min=l,max=2L*l+1L)),suff) + cand <- gen() + while (cand %in% prev) cand <- gen() + c(prev,cand) +} diff --git a/R/extraction.R b/R/extraction.R index e3ba5207691988d8aec309fb41593525bc6a7ee0..a1624e3ec8cc17e1feac99f2ceeef0c3d5bd2fb5 100644 --- a/R/extraction.R +++ b/R/extraction.R @@ -19,6 +19,30 @@ load_raw_data<-function(fn,mode="inMemory") { } +centroided1 <- function(ms) { + if (all(MSnbase::centroided(ms)) == T) + return(T) else { + state <- MSnbase::isCentroided(ms) + N <- length(state) + fls <-length(which(state == F)) + if (fls/(1.*N) < 0.01) T else F + } + +} + +centroided <- function(msvec) { + if (is.vector(msvec)) { + f <- list() + for (i in 1:length(msvec)) { + f[[i]] <- future::future(centroided1(msvec[[i]])) + } + lapply(f, FUN = future::value) + } else { + centroided1(msvec) + } + +} + acq_mz<-function(tabFn) { df<-read.csv(tabFn, stringsAsFactors=F, @@ -33,17 +57,15 @@ id2name<-function(id) {paste("ID:",id,sep='')} ppm2dev<-function(m,ppm) 1e-6*ppm*m - - gen_mz_range<-function(mz,err) { - mat<-matrix(data=numeric(1),nrow=length(mz),ncol=2,dimnames=list(as.character(names(mz)))) + mat<-matrix(data=numeric(1),nrow=length(mz),ncol=2) mat[,1]<-mz - err mat[,2]<-mz + err mat } gen_rt_range<-function(rt,err) { - mat<-matrix(data=numeric(1),nrow=length(rt),ncol=2,dimnames=list(as.character(names(rt)))) + mat<-matrix(data=numeric(1),nrow=length(rt),ncol=2) rV<-which(!is.na(rt)) rNA<-which(is.na(rt)) mat[rV,1]<-(rt[rV] - err)*60 @@ -53,10 +75,7 @@ gen_rt_range<-function(rt,err) { mat } -filt_ms2_by_prcs <- function(ms2,mz,errCoarse) { - - mzRng<-gen_mz_range(mz,err=errCoarse) - ids<-rownames(mzRng) +filt_ms2_by_prcs <- function(ms2,mzrng,ids,adduct) { pre<-MSnbase::precursorMz(ms2) psn<-MSnbase::precScanNum(ms2) acN<-MSnbase::acquisitionNum(ms2) @@ -64,37 +83,41 @@ filt_ms2_by_prcs <- function(ms2,mz,errCoarse) { inRange<-function(i) { mp<-pre[[i]] - x<-mzRng[,1]<mp & mp<mzRng[,2] - mRows<-which(x) - sids<-ids[mRows] - sids + x<-mzrng[,1]<mp & mp<mzrng[,2] + ind<-which(x) + sids <- ids[ind] + add <- adduct[ind] + dtable(ID=sids,adduct=add) } - lst<-lapply(1:nR,function(i) list(n=i,prec_scan=psn[[i]],aN=acN[[i]],ids=inRange(i))) + lst<-lapply(1:nR,function(i) { + dt <- inRange(i) + list(n=i,prec_scan=psn[[i]],aN=acN[[i]],ids=dt$ID,adduct=dt$adduct) + }) nemp<-sapply(lst,function(m) length(m$ids)>0) wrk<-lst[nemp] dfL<-sum(sapply(wrk,function(w) length(w$ids))) - df<-data.frame(ID=character(dfL), - prec_scan=integer(dfL), - aN=integer(dfL), - OK=logical(dfL), - stringsAsFactors=F) + df<-dtable(ID=character(dfL), + adduct=character(dfL), + prec_scan=integer(dfL), + aN=integer(dfL), + OK=logical(dfL)) df$OK<-T #TODO Introduced for testing, be careful. offD<-0 for (m in wrk) { l<-length(m$ids) rng<-(offD+1):(offD+l) - df[rng,"ID"]<-m$ids - df[rng,"prec_scan"]=m$prec_scan - df[rng,"aN"]<-m$aN + df[rng,"ID"] <- m$ids + df[rng,"prec_scan"] <- m$prec_scan + df[rng,"aN"] <- m$aN + df[rng,"adduct"] <- m$adduct offD<-offD+l } df[order(df$aN),] } -filt_ms2_by_prcs_ht<-function(ms2,mz,errCoarse) { - lgnd<-filt_ms2_by_prcs(ms2,mz,errCoarse) - +filt_ms2_by_prcs_ht<-function(ms2,mzrng,ids,adduct) { + lgnd<-filt_ms2_by_prcs(ms2,mzrng=mzrng,ids=ids,adduct=adduct) scans<-unique(lgnd$aN) ns<-which(MSnbase::acquisitionNum(ms2) %in% scans) sms2<-ms2[ns] @@ -105,29 +128,37 @@ filt_ms2_by_prcs_ht<-function(ms2,mz,errCoarse) { pick_unique_precScans<-function(idx) { ps<-unique(idx$prec_scan) mind<-match(ps,idx$prec_scan) - ids<-idx$ID[mind] - data.frame(prec_scan=idx$prec_scan[mind],ID=ids,stringsAsFactors=F) + data.frame(prec_scan=idx$prec_scan[mind], + ID=idx$ID[mind], + adduct=idx$adduct[mind], + stringsAsFactors=F) } pick_uniq_pscan<-function(leg) { - ids<-unique(leg$ID) - x<-lapply(ids,function(id) {ups<-unique(leg[id==leg$ID,"prec_scan"]);data.frame(ID=rep(id,length(ups)),prec_scan=ups,stringsAsFactors = F)}) - res<-do.call(rbind,c(x,list(stringsAsFactors=F))) - res[order(res$prec_scan),] + res <- leg[,.(prec_scan=unique(prec_scan)),by=c("ID","adduct")] + res[order(prec_scan),] + ## ids<-unique(leg$ID) + ## x<-lapply(ids,function(id) {ups<-unique(leg[id==leg$ID,"prec_scan"]);data.frame(ID=rep(id,length(ups)),prec_scan=ups,stringsAsFactors = F)}) + ## res<-do.call(rbind,c(x,list(stringsAsFactors=F))) + ## res[order(res$prec_scan),] } -verif_prec_fine_ht<-function(preLeg,ms1,mz,errFinePPM) { - mzRng<-gen_mz_range(mz,err=ppm2dev(mz,errFinePPM)) +verif_prec_fine_ht<-function(preLeg,ms1,mz,mzrng,ids,adduct) { + ## TODO FIXME TESTPHASE Something goes wrong here, all mapply results are + ## not OK. More testing needed. df<-preLeg - df$mz<-mz[df$ID] - mz1<-mzRng[df$ID,1] - mz2<-mzRng[df$ID,2] - ipns<-match(df$prec_scan,MSnbase::acquisitionNum(ms1)) - rms1<-ms1[ipns] - mzsp<-MSnbase::mz(rms1) - df$OK<-mapply(function(m1,sp,m2) any((m1<sp) & (sp<m2)),mz1,mzsp,mz2) - df[df$OK,] + xx <- dtable(adduct=adduct,ID=ids,mz=mz,mz1=mzrng[,1],mz2=mzrng[,2]) + df <- preLeg[xx,on=c("ID","adduct")] + df$ipns<-match(df$prec_scan,MSnbase::acquisitionNum(ms1)) + df[, ("mzsp") := .(lapply(ipns,function (ip) if (!is.na(ip)) MSnbase::mz(ms1[[ip]]) else NA_real_))] + df$OK<-mapply(function(m1,sp,m2) any((m1<sp) & (sp<m2)),df$mz1,df$mzsp,df$mz2) + res<-df[df$OK,] + res$ipns<-NULL + res$mz1<-NULL + res$mz2<-NULL + res$mzsp<-NULL + res } filt_ms2<-function(ms1,ms2,mz,errCoarse,errFinePPM) { @@ -150,6 +181,60 @@ filt_ms2<-function(ms1,ms2,mz,errCoarse,errFinePPM) { names(res)<-uids res } +filt_ms2_fine <- function(ms1,ms2,mz,ids,adduct,err_coarse_fun,err_fine_fun) { + ## This function is supposed to extract only those MS2 spectra for + ## which it is proven that the precursor exists within the fine + ## error range. + mzrng_c <- gen_mz_range(mz,err_coarse_fun(mz)) + mzrng_f <- gen_mz_range(mz,err_fine_fun(mz)) + + tmp<-filt_ms2_by_prcs_ht(ms2,mzrng=mzrng_c,ids=ids,adduct=adduct) + legMS2<-tmp$leg + legPcs<-pick_uniq_pscan(legMS2) + legPcs<-verif_prec_fine_ht(legPcs,ms1=ms1,mz=mz,mzrng=mzrng_f,ids=ids,adduct=adduct) + ## x<-Map(function (id,psn,a) {legMS2[id==legMS2$ID & a==legMS2$adduct & psn==legMS2$prec_scan,]},legPcs[,"ID"],legPcs[,"prec_scan"],legPcs[,"adduct"]) + ## x <- data.table::rbindlist(x)[,.(ID,adduct,aN)] + x <- legMS2[legPcs[,.(ID,adduct,prec_scan)],on=c("ID","adduct","prec_scan")] + ## x<-do.call(rbind,c(x,list(make.row.names=F,stringsAsFactors=F)))[c("ID","aN")] + ## rownames(x)<-NULL + x<-x[order(x$aN),] + x +} +extr_ms2<-function(ms1,ms2,ids,mz,adduct,err_coarse_fun, err_fine_fun) { + ## Extraction of MS2 EICs and spectra. + x <- filt_ms2_fine(ms1=ms1, + ms2=ms2, + mz=mz, + ids=ids, + adduct=adduct, + err_coarse_fun=err_coarse_fun, + err_fine_fun=err_fine_fun) + + ## This was here before and obviously wrong when multiple adducts + ## correspond to the same ID: + ## + ## uids <- unique(x$ID) + ## uadds <- unique(x$adduct) + idadd <- x[,unique(.SD),.SDcols=c("ID","adduct")] + acN<-MSnbase::acquisitionNum(ms2) + chunks <- Map(function(id,ad) { + + + ans <- x[id==x$ID & ad==x$adduct,]$aN + sp<-ms2[which(acN %in% ans)] + + message("id:",id,"ad:",ad) + res <- gen_ms2_spec_blk(sp) + res$ID <- id + res$adduct <- ad + res + }, + idadd$ID,idadd$adduct) + + data.table::rbindlist(chunks,fill = T) + +} + add_ms2_prcs_scans<-function(ms2,idx) { @@ -167,11 +252,6 @@ add_ms2_prcs_scans<-function(ms2,idx) { df } - - - - - refn_ms2_by_prec<-function(idxMS2,preFine) { pf<-preFine[preFine$OK,] pf$ID<-as.character(pf$ID) @@ -214,7 +294,7 @@ grab_ms2_spec<-function(idx,raw) { names(res)<-IDs res } - + gen_ms2_chrom<-function(ms2Spec) { lapply(ms2Spec, function(sp) @@ -241,19 +321,18 @@ gen_ms2_chrom<-function(ms2Spec) { } -gen_ms1_chrom<-function(raw,mz,errEIC,rt=NULL,errRT=NULL) { - mzRng<-gen_mz_range(mz,err=errEIC) - rtRng<-gen_rt_range(rt,err=errRT) - ids<-dimnames(mzRng)[[1]] +gen_ms1_chrom <- function(raw,mz,errEIC,id,rt=NULL,errRT=NULL) { + mzRng<-gen_mz_range(mz,err = errEIC) + rtRng<-gen_rt_range(rt,err = errRT) x<-MSnbase::chromatogram(raw,mz=mzRng,msLevel=1,missing=0.0,rt=rtRng) res<-lapply(x,function (xx) { rt<-MSnbase::rtime(xx)/60. ints<-MSnbase::intensity(xx) - df<-data.frame(rt=rt,intensity=ints,stringsAsFactors=F) + df<-dtable(rt=rt,intensity=ints) df }) - names(res)<-ids + names(res)<-id res } @@ -267,20 +346,6 @@ gen_ms1_chrom_ht<-function(raw,mz,errEIC,rt=NULL,errRT=NULL) { res } - - -tab2file<-function(tab,file,...) { - write.csv(x=tab,file=file,row.names=F,...) -} - -file2tab<-function(file,stringsAsFactors=F,comment.char='',...) { - read.csv(file=file, - header=T, - stringsAsFactors=stringsAsFactors, - comment.char=comment.char, - na.strings=c("","NA"),...) -} - get_ext_width <- function(maxid) {as.integer(log10(maxid)+1)} id_fn_ext<-function(width,id) { formatC(as.numeric(id),width=width,flag=0) @@ -351,7 +416,7 @@ extr_msnb <-function(file,wd,mz,errEIC, errFinePPM,errCoarse=0.5,rt=NULL,errRT=N message("Extracting MS2 spectra.") idxMS2<-filt_ms2_by_prcs(ms2=ms2,mz=mz,errCoarse=errCoarse) message("Resampling MS2 spectra.") - # idxMS2<-add_ms2_prcs_scans(ms2,idxMS2) + # idxMS2<-add_ms2_prcs_scans(ms2,idxMS2) prsc<-pick_unique_precScans(idxMS2) vprsc<-verif_prec_fine(preSc=prsc,ms1=ms1,mz=mz,errFinePPM = errFinePPM) idxMS2<-refn_ms2_by_prec(idxMS2=idxMS2,preFine=vprsc) @@ -411,39 +476,114 @@ extr_msnb_ht <-function(file,wd,mz,errEIC, errFinePPM,errCoarse,fnSpec,rt=NULL,e x } -##' Extracts data from mzML files. -##' -##' @title Data Extraction from mzML Files -##' @param fTab File table with Files,ID,wd,Name,mz and RT -##' columns. Column Files, as well as wd must have all rows -##' identical. -##' @param extr_fun Extraction function from the backend. -##' @param errEIC Absolute mz tolerance used to extract precursor EICs. -##' @param errFinePPM Tolerance given in PPM used to associate input -##' masses with what the instrument assigned as precursors to MS2. -##' @param errCoarse Absolute tolerance for preliminary association of -##' precursors (from precursorMZ), to MS2 spectra. -##' @param errRT The half-width of the retention time window. -##' @param fnSpec Output file specification. -##' @return Nothing useful. -##' @author Todor Kondić -extract<-function(fTab,extr_fun,errEIC,errFinePPM,errCoarse,fnSpec,errRT) { - fnData<-fTab$Files[[1]] - wd<-fTab$wd[[1]] - ID<-fTab$ID - mz<-fTab$mz - rt<-fTab$rt - names(mz)<-id2name(ID) - if (!is.null(rt)) names(rt)<-ID - dir.create(wd,showWarnings=F) - extr_fun(file=fnData, - wd=wd, - mz=mz, - rt=rt, - errRT=errRT, - errEIC=errEIC, - errFinePPM=errFinePPM, - errCoarse=errCoarse, - fnSpec=fnSpec) + + + +extr_eic_ms1 <- function(tab,err) { + ## Asynchronous extraction of ms1 spectra. The result is a list of + ## running futures. + file <- unique(tab$file) + + res <-lapply(file,function (fn) future::futur(extr_fn(fn), lazy=T)) + names(res) <- file + res +} + +##' @export +extract <- function(fn,tag,tab,err_ms1_eic.,err_coarse,err_fine,err_rt.) { + ## Extracts MS1 and MS2 EICs, as well as MS2 spectra, subject to + ## tolerance specifications. + + ## TODO: Still detecting external references ... but which? + ## However, the results check out, compared to sequential access. + err_coarse_fun <- gen_mz_err_f(err_coarse, + "ms1 coarse error: Only ppm, or Da units allowed.") + + err_fine_fun <- gen_mz_err_f(err_fine, + "ms1 fine error: Only ppm, or Da units allowed.") + + err_ms1_eic <- gen_mz_err_f(err_ms1_eic., + "eic error: Only ppm, or Da units allowed.") + + err_rt <- gen_rt_err(err_rt., + "rt error: Only s(econds), or min(utes) allowed.") + + tab <- data.table::as.data.table(tab) + ## chunk <- tab[file==fn] + mz <- tab$mz + rt <- tab$rt + id <- tab$ID + adduct <- tab$adduct + names(mz) <- id + names(rt) <- id + mzerr <- err_coarse_fun(mz) + mzrng <- gen_mz_range(mz=mz,err=mzerr) + rtrng <- gen_rt_range(rt=rt,err=err_rt) + mzmin <- min(mzrng) + mzmax <- max(mzrng) + read_ms1 <- function() { + ms1 <- MSnbase::readMSData(file=fn,msLevel=1,mode="onDisk") + ms1 <- MSnbase::filterMz(ms1,c(mzmin,mzmax)) + ms1 + } + read_ms2 <- function() { + ms2 <- MSnbase::readMSData(file=fn,msLevel=2,mode="onDisk") + ms2 + } + extr_ms1_eic <- function(ms1) { + eic <- MSnbase::chromatogram(ms1,mz=mzrng,msLevel=1,missing=0.0,rt=rtrng) + bits <- dtable(N=sapply(eic,NROW)) + bigN <- bits[,sum(N)] + bits[,idx:=paste0('I',.I)] + bits$ID <- id + bits$adduct <- adduct + bits$tag <- tag + + res<-dtable(rt=numeric(bigN), + intensity=numeric(bigN), + tag=tag, + adduct=bits[,rep(adduct,N)], + ID=bits[,rep(ID,N)], + idx=bits[,rep(idx,N)]) + data.table::setkey(res,idx) + names(eic)<-bits$idx + res[,c("rt","intensity") := + .(MSnbase::rtime(eic[[idx]])/60., + MSnbase::intensity(eic[[idx]])), + by=idx] + + data.table::setkeyv(res,BASE_KEY) + res + } + ms1 <- read_ms1() + ms2 <- read_ms2() + res_ms1 <- extr_ms1_eic(ms1) + res_ms2 <- extr_ms2(ms1=ms1, + ms2=ms2, + ids=id, + mz=mz, + adduct=adduct, + err_coarse_fun=err_coarse_fun, + err_fine_fun=err_fine_fun) + res_ms2[,"tag":=tag] + + res <- list(ms1=res_ms1, + ms2=res_ms2) + res +} + +gen_ms2_spec_blk <- function(spectra) { + + dt <- dtable(mz=MSnbase::mz(spectra), + intensity=MSnbase::intensity(spectra), + rt = lapply(MSnbase::rtime(spectra),function (z) z/60.), + CE = MSnbase::collisionEnergy(spectra), + an = MSnbase::acquisitionNum(spectra)) + dt[,maspI:=sapply(intensity,function (zz) max(zz))] + data.table::rbindlist(apply(dt,1,function(row) dtable(intensity=row[["intensity"]], + rt = row[["rt"]], + mz = row[["mz"]], + CE = row[["CE"]], + an = row[["an"]]))) } diff --git a/R/mix.R b/R/mix.R index 4834d690f838645c838842027afe95781d8fb818..952e19ddd14966508d59ddba16f4c483f8c0e519 100644 --- a/R/mix.R +++ b/R/mix.R @@ -16,7 +16,7 @@ stripext<-function(fn) { bits<-strsplit(fn,split="\\.")[[1]] if (length(bits)> 1) paste(head(bits,-1),collapse=".") else fn} -get_mz_cmp_l<-function(id,mode,cmpL) { +get_mz_cmp_l<-function(id,adduct,cmpL) { ind<-match(id,cmpL$ID) mz<-cmpL$mz[[ind]] smiles<-cmpL$SMILES[[ind]] @@ -24,13 +24,136 @@ get_mz_cmp_l<-function(id,mode,cmpL) { mz } else if (nchar(smiles)>0) { - mde<-as.character(mode) - wh<-MODEMAP[[mde]] + mde<-as.character(adduct) + wh<-ADDUCTMAP[[mde]] RChemMass::getSuspectFormulaMass(smiles)[[wh]] } else stop("Both SMILES and mz fields, for ID ",id,", found empty in the compound list. Aborting.") res } +calc_mz_from_formula_outer <- function(chform,adduct,id) { + check_chform <- enviPat::check_chemform(ISOTOPES,chform) + wind <- which(check_chform$warning) + if (length(wind) > 0) stop("Cannot understand the following formulas: ", + paste(check_chform$new_formula[wind],collapse = ",")) + mol_form <- check_chform$new_formula + l_mol <- length(mol_form) + l_add <- length(adduct) + + adds <- ADDUCTS[Name %in% adduct,.(Name, + add=as.character(Formula_add), + ded=as.character(Formula_ded), + charge=Charge)] + + dt <- dtable(ID = rep(id,each = l_add), + mol_form = rep(mol_form,each = l_add), + adduct = rep(adds$Name,l_mol), + add = rep(adds$add,l_mol), + ded = rep(adds$ded,l_mol), + charge= rep(adds$charge,l_mol)) + + merger <- function (mol_form,add,ded) { + full_form <- rep(NA_character_,length(mol_form)) + both_ind <- which(add != 'FALSE' & ded != 'FALSE') + add_only_ind <- which(add != 'FALSE' & ded == 'FALSE') + ded_only_ind <- which(ded != 'FALSE' & add == 'FALSE') + ainds <- c(both_ind,add_only_ind) + full_form[ainds] <- vapply(ainds,function (i) enviPat::mergeform(mol_form[[i]],add[[i]]),FUN.VALUE = character(1), USE.NAMES = F) + dinds <- c(both_ind,ded_only_ind) + full_form[dinds] <- vapply(dinds,function (i) { + z <- check_ded2(mol_form[[i]],ded[[i]]) + if (z) enviPat::subform(mol_form[[i]],ded[[i]]) else NA_character_ + }, + FUN.VALUE = character(1)) + full_form + } + dt[,("full_form"):=.(merger(mol_form,add,ded))] + dt[!is.na(full_form),("mz"):=.(mapply(function(ff,ch) enviPat::isopattern(ISOTOPES,chemforms = ff, + charge = ch, verbose = F)[[1]][1], + full_form, + charge, USE.NAMES = F))] + dt[is.na(full_form),("mz"):=NA_real_] + dt +} + +calc_mz_from_formula <- function(chform,adduct,id) { + check_chform <- enviPat::check_chemform(ISOTOPES,chform) + wind <- which(check_chform$warning) + if (length(wind) > 0) stop("Cannot understand the following formulas: ", + paste(check_chform$new_formula[wind],collapse = ",")) + mol_form <- check_chform$new_formula + uad <- unique(adduct) + uadds <- lapply(uad,function(a) ADDUCTS[Name==a,.(Name, + add=as.character(Formula_add), + ded=as.character(Formula_ded), + charge=Charge),on=""]) + names(uadds) <- uad + adds <- rbindlist(l=lapply(adduct,function(a) uadds[[a]])) + + merger <- function (mol_form,add,ded) { + res <- numeric(length(mol_form)) + both_ind <- which(add != 'FALSE' & ded != 'FALSE') + add_only_ind <- which(add != 'FALSE' & ded == 'FALSE') + ded_only_ind <- which(ded != 'FALSE' & add == 'FALSE') + ainds <- c(both_ind,add_only_ind) + res[ainds] <- vapply(ainds,function (i) enviPat::mergeform(mol_form[[i]],add[[i]]),FUN.VALUE = character(1), USE.NAMES = F) + dinds <- c(both_ind,ded_only_ind) + res[dinds] <- vapply(dinds,function (i) { + z <- check_ded2(mol_form[[i]],ded[[i]]) + if (z) enviPat::subform(mol_form[[i]],ded[[i]]) else NA_character_ + }, + FUN.VALUE = character(1)) + res + } + forms <- merger(mol_form,adds$add,adds$ded) + + ## Check if formulas actually calculated. + bad_idx <- which(forms=="0") + bad_adducts <- adduct[bad_idx] + bad_ids <- id[bad_idx] + non_dups <- !duplicated(bad_idx) + bad_ids <- bad_ids[non_dups] + bad_adducts <- bad_adducts[non_dups] + if (length(bad_idx)>0) stop(paste0("Unable to process the adducts:\n", + paste(bad_adducts,collapse = ","), + "\nfor id-s:", + paste(bad_ids,collapse = ","))) + + mz <- the_ifelse(!is.na(forms), + mapply(function(ff,ch) enviPat::isopattern(ISOTOPES,chemforms = ff, + charge = ch, verbose = F)[[1]][1], + forms, + adds$charge, USE.NAMES = F), + NA_real_) + mz +} + +calc_mz_from_smiles <- function(smiles,adduct,id) { + mol <- lapply(smiles,function(s) try(RMassBank::getMolecule(s), silent = T)) + check <- which(is.atomic(mol)) + if (length(check) > 0) + stop("Errors in SMILES with IDs:",paste(id[which],collapse = ',')) + + mol_form <- sapply(mol,function(x) (rcdk::get.mol2formula(x))@string,USE.NAMES = F) + names(mol_form) <- id + calc_mz_from_formula(mol_form,adduct,id) + + +} + +calc_mz_from_smiles_outer <- function(smiles,adduct,id) { + mol <- lapply(smiles,function(s) try(RMassBank::getMolecule(s), silent = T)) + check <- which(is.atomic(mol)) + if (length(check) > 0) + stop("Errors in SMILES with IDs:",paste(id[which],collapse = ',')) + + mol_form <- sapply(mol,function(x) (rcdk::get.mol2formula(x))@string,USE.NAMES = F) + names(mol_form) <- id + calc_mz_from_formula_outer(mol_form,adduct,id) + + +} + get_col_from_cmp_l<-function(id,cname,cmpL) { ind<-match(id,cmpL$ID) x<-cmpL[[cname]][[ind]] @@ -38,23 +161,45 @@ get_col_from_cmp_l<-function(id,cname,cmpL) { } -gen_clean_state_ftab<-function(ftable) { - ftable$Comments <- "" - ftable[c("MS1","MS2","Alignment","AboveNoise")] <- T - ftable["MS2rt"] <- NA_real_ - ftable["iMS2rt"] <- NA_integer_ - ftable["rt"]<-NA_real_ - ftable["checked"]<-'NONE' - ftable +gen_clean_state_summ<-function(summ) { + summ$Comments <- "" + summ[c("MS1","MS2","Alignment","AboveNoise")] <- T + summ["MS2rt"] <- NA_real_ + summ["iMS2rt"] <- NA_integer_ + summ["rt"]<-NA_real_ + summ["checked"]<-'NONE' + summ +} +gen_empty_summ <- function() { + EMPTY_SUMM } -pp_touch_q<-function(ftab) { +gen_summ <- function(comp,qa_ms1,qa_ms2) { + comp_cols <- intersect(SUMM_COLS,colnames(comp)) + summ <- comp[,..comp_cols] + data.table::setkeyv(summ,BASE_KEY) + ms1_cols <- intersect(SUMM_COLS,colnames(qa_ms1)) + ms1_cols <- setdiff(ms1_cols,colnames(summ)) + summ <- qa_ms1[summ,c(..comp_cols,..ms1_cols),on=BASE_KEY] + ms2_cols <- intersect(colnames(qa_ms2),SUMM_COLS) + ms2_cols <- setdiff(ms2_cols,colnames(summ)) + summ <- qa_ms2[summ,c(..comp_cols,..ms1_cols,..ms2_cols),on=BASE_KEY] + data.table::setkeyv(summ,c(BASE_KEY_MS2,"an")) + summ[,qa_ms1_exists:=the_ifelse(!is.na(qa_ms1_good_int),T,F)] + summ[,qa_ms2_exists:=the_ifelse(!is.na(CE),T,F)] + summ[,qa_pass:=apply(.SD,1,all),.SDcols=QA_FLAGS[!(QA_FLAGS %in% "qa_pass")]] + summ$Comments<-"" + data.table::setkeyv(summ,DEF_KEY_SUMM) + data.table::setcolorder(summ,SUMM_COLS) + summ +} +pp_touch_q<-function(summ) { ## Returns indices that are ok to be auto processed. - which(ftab$checked==FTAB_CHK_NONE | ftab$checked==FTAB_CHK_AUTO) + which(summ$checked==SUMM_CHK_NONE | summ$checked==SUMM_CHK_AUTO) } -preProc <- function (ftable,noiseFac=3,errRT=0.5,intThreshMS1=1e5,intThreshMS2=5000.) { - wds<-unique(ftable$wd) +preProc <- function (summ,noiseFac=3,errRT=0.5,intThreshMS1=1e5,intThreshMS2=5000.) { + wds<-unique(summ$wd) fn_spec<-function(wd) readRDS(file.path(wd,FN_SPEC)) message("Loading RDS-es ...") allData<-lapply(wds,fn_spec) @@ -80,10 +225,10 @@ preProc <- function (ftable,noiseFac=3,errRT=0.5,intThreshMS1=1e5,intThreshMS2=5 ## during the dataframe generation stage. In this case, the file ## with the corresponding ID will not be there. - okinds<- pp_touch_q(ftable) + okinds<- pp_touch_q(summ) for (ind in okinds) { - wd <- ftable$wd[ind] - id <- ftable$ID[ind] + wd <- summ$wd[ind] + id <- summ$ID[ind] eics<-allData[[wd]]$eic nid<-id2name(id) ii<-match(nid,MSnbase::fData(eics)[["ID"]]) #id, because id-s, not nid-s are in fData for ms1 eics; @@ -99,39 +244,39 @@ preProc <- function (ftable,noiseFac=3,errRT=0.5,intThreshMS1=1e5,intThreshMS2=5 ms1MaxInd<-which.max(eic$intensity) maxInt<-eic$intensity[[ms1MaxInd]] - ftable[ind,"rt"]<-eic$rt[[ms1MaxInd]] + summ[ind,"rt"]<-eic$rt[[ms1MaxInd]] ##If MS1 does not exist, set entry to F. if (maxInt < intThreshMS1) { - ftable[ind,"MS1"] <- F + summ[ind,"MS1"] <- F ## Other checks automatically fail, too. - ftable[ind,"Alignment"] <- F - ftable[ind,"AboveNoise"] <- F + summ[ind,"Alignment"] <- F + summ[ind,"AboveNoise"] <- F } else { ## Noisy? - if (ftable[ind,"AboveNoise"]) { + if (summ[ind,"AboveNoise"]) { mInt <- mean(eic$intensity) if (maxInt < noiseFac*mInt) { - ftable[ind,"AboveNoise"] <- F - ftable[ind,"Alignment"] <- F ## If noisy, this is - ## probably meaningles, so - ## F. + summ[ind,"AboveNoise"] <- F + summ[ind,"Alignment"] <- F ## If noisy, this is + ## probably meaningles, so + ## F. } } } - + ## MS2 checks. ms2<-allData[[wd]]$ms2 ms2nids<-names(ms2) if (! (nid %in% ms2nids)) { - ftable[ind,"MS2"] <- F - ftable[ind,"Alignment"] <- F + summ[ind,"MS2"] <- F + summ[ind,"Alignment"] <- F } else { sp<-ms2[[nid]] ## Alignment still makes sense to be checked? - if (ftable[ind,"Alignment"]) { + if (summ[ind,"Alignment"]) { ## rtInd <- ms1MaxInd #match(maxInt,eic$intensity) rtMS1Peak <- eic$rt[[ms1MaxInd]] msms<-MSnbase::fData(sp)[,c("rtm","maxI")] @@ -150,29 +295,29 @@ preProc <- function (ftable,noiseFac=3,errRT=0.5,intThreshMS1=1e5,intThreshMS2=5 msmsInt<- msms$intensity[rtInd] if (length(msmsRT) > 0) { msmsRTind <- which.min(abs(msmsRT - rtMS1Peak)) - ftable[ind,"iMS2rt"] <- rtInd[msmsRTind] - ftable[ind,"MS2rt"] <- msmsRT[msmsRTind] + summ[ind,"iMS2rt"] <- rtInd[msmsRTind] + summ[ind,"MS2rt"] <- msmsRT[msmsRTind] } else { - ftable[ind,"Alignment"] <- F + summ[ind,"Alignment"] <- F } } } - ftable[ind,"checked"]<-FTAB_CHK_AUTO + summ[ind,"checked"]<-SUMM_CHK_AUTO } - - ftable + + summ } smiles2img <- function(smiles, kekulise=TRUE, width=300, height=300, - zoom=1.3,style="cow", annotate="off", abbr="on",suppressh=TRUE, - showTitle=FALSE, smaLimit=100, sma=NULL) { - dep <- rcdk::get.depictor(width = width, height = height, zoom = zoom, style = style, annotate = annotate, - abbr = abbr, suppressh = suppressh, showTitle = showTitle, smaLimit = smaLimit, - sma = NULL) - - mol <- RMassBank::getMolecule(smiles) - z<-rcdk::view.image.2d(mol, depictor=dep) - grid::rasterGrob(z) + zoom=1.3,style="cow", annotate="off", abbr="on",suppressh=TRUE, + showTitle=FALSE, smaLimit=100, sma=NULL) { + dep <- rcdk::get.depictor(width = width, height = height, zoom = zoom, style = style, annotate = annotate, + abbr = abbr, suppressh = suppressh, showTitle = showTitle, smaLimit = smaLimit, + sma = NULL) + + mol <- RMassBank::getMolecule(smiles) + z<-rcdk::view.image.2d(mol, depictor=dep) + grid::rasterGrob(z) } gen_ms2_spec_data <- function(id,tag,iMS2rt,data,luckyN=NA) { @@ -203,12 +348,12 @@ gen_ms2_spec_data <- function(id,tag,iMS2rt,data,luckyN=NA) { } else return(NULL) } -gen_ms2_spec_fn <- function(id,tag,mode,set,width=6) { +gen_ms2_spec_fn <- function(id,tag,adduct,set,width=6) { suppressWarnings({ iid<-as.numeric(id) iid<- if (!is.na(iid)) iid else id num <- formatC(iid,width = width,format='d',flag='0') - ss<-trimws(paste(num,mode,tag,set,sep="_"),which='both') + ss<-trimws(paste(num,adduct,tag,set,sep="_"),which='both') paste(ss,".csv",sep='') }) } @@ -225,7 +370,7 @@ plot_id_msn <- function(ni, mass, smile, tags, - fTab, + summ, prop, theme, pal="Dark2", @@ -250,8 +395,6 @@ plot_id_msn <- function(ni, ")",sep='') mk_leg_lab<-function(tag,rt) {paste(tag,"; rt= ",formatC(rt[[tag]],format='f',digits=rt_digits),"min")} - sci10<-function(x) {ifelse(x==0, "0", parse(text=gsub("[+]", "", gsub("e", " %*% 10^", scales::scientific_format()(x)))))} - i<-name2id(ni) @@ -355,11 +498,11 @@ plot_id_msn <- function(ni, ggobj+ ggplot2::geom_linerange(ggplot2::aes(colour=legend),key_glyph=KEY_GLYPH)+ ggplot2::coord_cartesian(xlim = rrtMS2, - ylim = rintMS2)+ + ylim = rintMS2)+ ggplot2::labs(x=CHR_GRAM_X,y=CHR_GRAM_Y,title=NULL,subtitle = "MS2",tag = " ")+ scale_y(labels=sci10)+ - ggplot2::labs(colour=PLOT_MS2_LEG_TIT)+theme() + ggplot2::labs(colour=PLOT_MS2_LEG_TIT)+theme() } @@ -373,7 +516,7 @@ plot_id_msn <- function(ni, ggobj+ ggplot2::geom_linerange(ggplot2::aes(colour=tag),key_glyph=KEY_GLYPH)+ ggplot2::coord_cartesian(xlim = rmzSpMS2, - ylim = rintSpMS2)+ + ylim = rintSpMS2)+ ggplot2::labs(subtitle="MS2",y="intensity")+ scale_y(labels=sci10)+theme() } @@ -382,7 +525,7 @@ plot_id_msn <- function(ni, ## MS1 time series. plMS1<- if(is.data.frame(dfChrMS1) && nrow(dfChrMS1)>0) { ch_ms1_deco(ggplot2::ggplot(data=dfChrMS1,ggplot2::aes(x=rt,y=intensity,group=legend))) - } else NULL + } else NULL ## Empty plEmpty<-ggplot2::ggplot(data=dfChrMS1,ggplot2::aes(x=rt,y=intensity))+ggplot2::theme_void() @@ -394,9 +537,9 @@ plot_id_msn <- function(ni, } else plEmpty - + - + ## Structure if (!is.null(smile) && !is.na(smile) && !nchar(smile)<1) { @@ -409,14 +552,14 @@ plot_id_msn <- function(ni, ## MS2 Spectrum if (!all(sapply(dfsChrMS2,is.null))) { plSpecMS2<-if (is.data.frame(dfSpecMS2)) { #sometimes - #dfSpecMS2 ends up - #as a list of - #logicals; this - #probably happens - #when either MS2 is - #bad in some way, - #or the RT - #intervals are + #dfSpecMS2 ends up + #as a list of + #logicals; this + #probably happens + #when either MS2 is + #bad in some way, + #or the RT + #intervals are #mismatched. ch_spec_deco(ggplot2::ggplot(data=dfSpecMS2, ggplot2::aes(x=mz, @@ -439,7 +582,7 @@ plot_id_msn <- function(ni, ## str(df) ## message("---DF") ## gridExtra::tableGrob(df) #+ggplot2::labs(subtitle="Top m/z") - + ## } else NULL res<- if (!is.null(plMS1)) cowplot::plot_grid(plMS1,plStruc,plMS2,plEmpty,plSpecMS2,align = "hv",axis='l',ncol = 2,nrow=3,rel_widths=c(3,1)) else NULL @@ -447,92 +590,59 @@ plot_id_msn <- function(ni, res } - - -adornmzMLTab<-function(df,projDir=getwd()) { - pref<-df$set - mask<-is.na(pref) - drop<-df$files[mask] - for (d in drop) warning("Dropping",d,"because no set specified for it.") - df<-df[!mask,] - pref<-df$set - wd<-basename(tools::file_path_sans_ext(df$Files)) - wd<-file.path(projDir,pref,wd) - df$wd<-wd - df +add_wd_to_mzml <- function(fn,proj) { + wd<-basename(tools::file_path_sans_ext(fn)) + file.path(proj,wd) } -genSuprFileTab <- function(fileTab,compTab) { - genOne<-function(ids,fn) { - K<-length(ids) - fTabRow<-fileTab[fileTab$Files == fn,] - cols<-lapply(names(fileTab),function(n) rep(fTabRow[[n]],K)) - names(cols)<-NULL - cols<-c(cols,list(ids)) - names(cols)<-c(names(fileTab),"ID") - df<-as.data.frame(cols,stringsAsFactors = F) - df - } - - tabs<-lapply(fileTab$Files,function(fn) - { - wh<-which(fileTab$Files==fn) - set<-fileTab$set[[wh]] - md<-fileTab$mode[[wh]] - sel<-(compTab$set %in% set) & (compTab$mode %in% md) - ids<-compTab$ID[sel] - genOne(ids,fn) - - }) - res<-do.call(rbind,tabs) - res -} - -getEntryFromComp<-function(entry,id,set,mode,compTab) { +getEntryFromComp<-function(entry,id,set,adduct,compTab) { ind <- which(compTab$ID %in% id & compTab$set %in% set & - compTab$mode %in% mode) + compTab$adduct %in% adduct) res<- if (length(ind)==1) compTab[ind,entry] else { if (length(ind)>1) { warning("Nonunique selection in comprehensive table:") for (i in ind) { - message('ID: ',compTab$ID[[i]],' set: ',compTab$set[[i]],' mode: ',compTab$mode[[i]]) + message('ID: ',compTab$ID[[i]],' set: ',compTab$set[[i]],' adduct: ',compTab$adduct[[i]]) } - warning("The compound set table likely containes duplicate IDs per set/mode combination. Please correct this.") + warning("The compound set table likely containes duplicate IDs per set/adduct combination. Please correct this.") } else { - warning("Entries not found for id ", id,"set ",set, "and mode ", mode, " .") + warning("Entries not found for id ", id,"set ",set, "and adduct ", adduct, " .") } } res names(res)<-entry res - + } -addCompColsToFileTbl<-function(ft,compTab) { - nR<-nrow(ft) - mzCol<-rep(NA,nR) - nmCol<-rep("",nR) - rtCol<-rep(NA,nR) - - for (ir in 1:nR) { - id<-ft[ir,"ID"] - set<-ft[ir,"set"] - m<-ft[ir,"mode"] - entries<-getEntryFromComp(c("mz","Name","rt"),id,set,m,compTab) - mzCol[[ir]]<- entries[["mz"]] - nm<-entries[["Name"]] - nmCol[[ir]]<- if (!is.na(nm)) nm else "" - rtCol[[ir]]<- entries[["rt"]] - } - ft$mz<-mzCol - ft$Name<-nmCol - ft$rt<-rtCol - ft +## add_comp_summ <- function(ft,ctab) { +## nR<-nrow(ft) +## mzCol<-rep(NA,nR) +## nmCol<-rep("",nR) +## rtCol<-rep(NA,nR) + +## for (ir in 1:nR) { +## id<-ft[ir,"ID"] +## set<-ft[ir,"set"] +## m<-ft[ir,"adduct"] +## entries<-getEntryFromComp(c("mz","Name","rt"),id,set,m,ctab) +## mzCol[[ir]]<- entries[["mz"]] +## nm<-entries[["Name"]] +## nmCol[[ir]]<- if (!is.na(nm)) nm else "" +## rtCol[[ir]]<- entries[["rt"]] +## } +## ft$mz<-mzCol +## ft$Name<-nmCol +## ft$rt<-rtCol +## ft +## } + +get_set_adduct <- function(s,mzml) { + unique(mzml[set == s,adduct]) } - vald_comp_tab<-function(df,ndf,checkSMILES=F,checkMz=F,checkNames=F) { ## Fields. if (is.null(df$ID)) stop("Column ID missing in ",ndf," .") @@ -594,3 +704,909 @@ vald_comp_tab<-function(df,ndf,checkSMILES=F,checkMz=F,checkNames=F) { df } + +read_setid <- function(fn,cmpds) { + assert(file.exists(fn),msg=paste("Please provide valid compounds set table:", fn)) + assert(nrow(cmpds) > 0,msg="Please provide at least one compounds list.") + setid <- file2tab(fn,colClasses=c(ID="character")) + x<-cmpds[setid,on='ID'][,.SD,.SDcols=c(colnames(setid),'known')] + + sids <- unique(setid$ID) + cids <- unique(cmpds$ID) + diff <- setdiff(sids,cids) + assert(length(diff)==0,msg=paste("The following IDs from set table have not been found in the compound table:","------",print_table(dtable(diff)),"------",sep = "\n")) + x +} + + +write_conf <- function(m,fn) { + m$conf$data <- get_fn_ftab(m) + if (NROW(m$input$tab$mzml)>0) tab2file(tab=m$input$tab$mzml,file=file.path(m$conf$project,FN_DATA_TAB)) + yaml::write_yaml(x=m$conf,file=fn) + + + +} +write_state <- function(m,fn_conf) { + write_conf(m,fn_conf) + tab2file(tab=m$input$tab$mzml,file=file.path(m$conf$project,FN_DATA_TAB)) +} + +read_conf <- function(fn) { + cf <- yaml::yaml.load_file(fn) + fnl <- cf$compound$lists + if (length(fnl)>0) { + nms <- character(0) + for (i in 1:length(fnl)) { + nms <- gen_uniq_lab(nms,pref = 'L') + } + names(fnl) <- nms + + } + cf$compound$lists <- fnl + ## conf_trans(cf) + cf +} + + + +##' @export +get_fn_comp <- function(m) { + file.path(m$conf$project,FN_COMP_TAB) +} + +##' @export +get_fn_summ <- function(m) { + file.path(m$conf$project, FN_SUMM) +} + +##' @export +get_fn_extr <- function(m) { + file.path(m$conf$project, "extracted.rds") +} + +##' @export +get_fn_conf <- function(m) { + file.path(m$conf$project, FN_CONF) +} + + +##' @export +get_fn_ftab <- function(m) { + file.path(m$conf$project, FN_DATA_TAB) +} + +init_state <- function(m) { + m$out$tab <- list() + m$input$datafiles <- NULL + m$input$tab$mzml <- EMPTY_MZML + lab <- gen_uniq_lab(list(),pref="L") + m$input$tab$lists <- list() + m$input$tab[[lab[[1]]]] <- EMPTY_CMPD_LIST + + m$out$tab$ms1_plot <- EMPTY_MS1_PLOT_TAB + m$out$tab$ms2_plot <- EMPTY_MS2_PLOT_TAB + m$out$tab$comp <- EMPTY_COMP_TAB + m +} + +base_conf <- function () { + m <- list() + m$conf <- list(project=getwd(), + compounds=list(lists=list(), + sets="", + data=""), + extr=list(fn=""), + debug = F) + m +} + +extr_conf <- function(m) { + m$conf$tolerance <- list("ms1 coarse"=MS1_ERR_COARSE, + "ms1 fine"=MS1_ERR_FINE, + "eic"=EIC_ERR, + "rt"=RT_EXTR_ERR) + m +} + +presc_conf <- function(m) { + m$conf$prescreen <- list("ms1_int_thresh"=1e5, + "ms2_int_thresh"=2.5e3, + "s2n"=3, + "ret_time_shift_tol"=0.5) + m +} + + +new_conf <- function() presc_conf( + extr_conf( + base_conf())) + + + +verify_cmpd_l <- function(dt,fn) { + fields <- colnames(EMPTY_CMPD_LIST) + dtflds <- colnames(dt) + + assert('ID' %in% dtflds, msg = paste('ID column must be present and filled in', fn)) + ess <- c('SMILES','Formula','mz') + pres <- ess %in% dtflds + assert(length(pres) > 0, + msg = paste('Compound list from ',fn, + 'does not contain any of "SMILES", "Formula", or "mz". \nThe compound list needs at least one of those to be valid.')) + exst <- ess[pres] + x <- lapply(exst,function (nm) do.call(all,as.list(is.na(dt[[nm]])))) + assert(!do.call(all,x), msg = paste('At least one of', paste(exst,collapse = ','), + '\nmust contain some values in compound list from',fn)) + + invisible(T) +} + + +## INPUT TRANSLATORS + +#' @export +grab_unit <- function(entry,unit) { + what <- paste0("\\<",unit,"\\>$") + entry <- trimws(entry,which="both") + if (grepl(what,entry)) + suppressWarnings(as.numeric(sub(paste0("^(.*)",unit),"\\1",entry))) else NA_real_ +} + + +rt_in_min <- function(entry) { + xs <- grab_unit(entry,"s") + xm <- grab_unit(entry,"min") + x <- if (is.na(xm)) xs/60. else xm + x +} + +conf_trans_pres <- function(pres_list) { + ## Translate and validate prescreening input. + pres_list[CONF_PRES_NUM] <- sapply(pres_list[CONF_PRES_NUM],as.numeric) + for (par in CONF_PRES_NUM) { + assert(!suppressWarnings(is.na(pres_list[[par]])),msg=paste("Prescreen parameter",par,"is not a number.")) + } + for (par in CONF_PRES_TU) { + xs <- grab_unit(pres_list[[par]],"s") + xm <- grab_unit(pres_list[[par]],"min") + x <- if (is.na(xm)) xs else xm + assert(!is.na(x),msg = paste("Time unit parameter error for",par,"Only s(econds) or min(utes) allowed.")) + pres_list[[par]] <- x + } + pres_list +} + +## PRESCREENING + +create_qa_table <- function(extr,conf_presc) { + ## The first input argument is the extracted `ms`, table + ## containing MS1 and MS2 spectra. The argument `conf_presc` is + ## m$conf$prescreen, the prescreening parameters given in the conf + ## file. + + ## The qa table is just a copy of ms with added quality control + ## columns QA_COLS. + + ## The QA_FLAGS columns are flags specifying which properties of + ## compounds are known well, or not. + + ## For each compound (mass) we ask the following questions: + ## qa_ms1_exists -- does the MS1 spectrum exist at all? + ## qa_ms2_exists -- do we have any MS2 spectra at all? + ## qa_ms1_above_noise -- is MS1 above the noise treshold? + ## qa_ms2_near -- is there any MS2 spectrum inside the tolerated + ## retention time window around the MS1 peak? That is, are we + ## non-RT-shifted? + ## qa_ms2_good_int -- Is there any MS2 spectral intensity greater + ## than the MS2 threshold and less than the MS1 peak? + ## qa_pass -- did the spectrum pass all the checks? + + + ## The columns in QA_NUM_REAL are: + ## + ## ms1_int -- the maximum intensity of MS1 spectrum over the + ## entire run; + ## + ## ms1_rt -- the retention time of the peak MS1. + + ## The columns in QA_NUM_INT are: + ## + ## ms2_sel -- index of the selected MS2 spectrum; if not NA, the + ## associated spectrum passed all the checks (qa_pass == T); the + ## spectrum itself is in one of the member sublists of the `spec' + ## column. The integer `ms2_sel' is then the index of the spectrum + ## in that sublist. + ## + ## ms1_rt_ind -- TODO (but not important to end users). + + + qa <- list(prescreen=conf_presc) + + checks <- extr$ms2[,{ + z <-..QA_FLAGS + z[1:length(z)]<-F + names(z)<-..QA_FLAGS + z + },keyby=BASE_KEY_MS2] + checks[,(QA_NUM_INT):=NA_integer_] + checks[,(QA_NUM_REAL):=NA_real_] + setkeyv(checks,BASE_KEY_MS2) + qa$checks <- checks + qa +} + +assess_ms1 <- function(m) { + qa <- m$qa + ms1 <- m$extr$ms1 + ## Calculate auxiliary variables and indices. + qa_ms1 <- ms1[,.(ms1_rt_ind=which.max(intensity)),keyby=BASE_KEY] + qa_ms1 <- ms1[qa_ms1,.(ms1_rt_ind=ms1_rt_ind, + ms1_int=intensity[[ms1_rt_ind]], + ms1_rt=rt[[ms1_rt_ind]], + ms1_mean=mean(intensity)),on=BASE_KEY,by=.EACHI] + + + qa_ms1[,qa_ms1_good_int := ms1_int > qa$prescreen$ms1_int_thresh] + qa_ms1[,qa_ms1_above_noise := F] + qa_ms1[qa_ms1_good_int==T,qa_ms1_above_noise := .(ms1_int > qa$prescreen$s2n*ms1_mean)] + + + + + + ## checks[(!qa_ms1_above_noise),c("qa_ms2_good_int","qa_ms2_near","qa_ms2_exists","qa_pass"):=F] + ## qa_ms1 <- check_ms1_noise(check_ms1(qa_ms1)) + m$qa$ms1 <- qa_ms1 + m +} + +assess_ms2 <- function(m) { + + presconf <- conf_trans_pres(m$conf$prescreen) + + ms1 <- m$extr$ms1 + ms2 <- m$extr$ms2 + qa_ms1 <- m$qa$ms1 + qa_ms2 <- ms2[qa_ms1[qa_ms1_above_noise==T],.(CE=unique(CE), + pc_rt=i.ms1_rt, + pc_int=i.ms1_int, + an=unique(an)),on=BASE_KEY,by=.EACHI,nomatch=NULL] + + rt_win2 <- presconf$ret_time_shift_tol + qa_ms2 <- ms2[qa_ms2,.(pc_rt=pc_rt, + pc_int=pc_int, + ms2_int=max(intensity), + ms2_rt=unique(rt), + qa_ms2_near=head(rt,1) < pc_rt + rt_win2 & head(rt,1) > pc_rt - rt_win2), + by=.EACHI,on=c(BASE_KEY_MS2,"an")] + + qa_ms2$qa_ms2_good_int <-F + qa_ms2[qa_ms2_near==T, + qa_ms2_good_int := ms2_int > presconf$ms2_int_thresh & ms2_int < pc_int, + by=c(BASE_KEY_MS2,"an")] + + + ## qa_ms2$qa_pass <- F + ## qa_ms2[qa_ms2_good_int==T,qa_pass:=T] + qa_ms2$ms2_sel <- F + qa_ms2[qa_ms2_good_int==T,ms2_sel:={ + ind<-which.min(abs(ms2_rt-pc_rt)) + z<-ms2_sel + z[[ind]]<-T + z + },by=BASE_KEY_MS2] + setkeyv(qa_ms2,BASE_KEY_MS2) + m$qa$ms2 <- qa_ms2 + m +} + +gen_mz_err_f <- function(entry,msg) { + eppm <- grab_unit(entry,"ppm") + eda <- grab_unit(entry,"Da") + shinyscreen:::assert(xor(is.na(eda), is.na(eppm)), msg = msg) + if (is.na(eda)) function(mz) eppm*1e-6*mz else function (mz) eda +} + + +gen_rt_err <- function(entry,msg) { + em <- grab_unit(entry,"min") + es <- grab_unit(entry,"s") + shinyscreen:::assert(xor(is.na(em), is.na(es)), msg = msg) + if (is.na(em)) es/60. else em +} + +fig_path <- function(top,set,group,id,suff,ext="pdf") { + base <- paste("plot",set,group,id,suff,sep="_") + fn <- paste0(base,".",ext) + fn <- gsub("\\[","",fn) + fn <- gsub("\\]","",fn) + fn <- gsub("\\+","p",fn) + fn <- gsub("-","m",fn) + if (!is.null(top)) file.path(top,fn) else fn +} + +get_coord_lim <- function(new,def) { + if (is.null(new)) return(def) + res <- new + if (length(new[[1]])==0) res[[1]]<-def[[1]] + if (length(new[[2]])==0) res[[2]]<-def[[2]] + res +} + + + +gen_base_ms1_plot_tab <- function(summ,ms1_spec) { + + ident <- c("set", + "adduct", + "tag", + "ID", + "mz", + "file") + + res <- summ[ms1_spec,c(.SD, + list(rt_peak=i.ms1_rt, + eicMS1=lapply(i.eicMS1,list))), + .SDcols=ident, + on=BASE_KEY, + nomatch=NULL] + setkeyv(res,cols=BASE_KEY) + res +} + +gen_base_ms2_plot_tab <- function(summ,ms2_spec) { + ident <- c("set", + "adduct", + "tag", + "ID", + "mz", + "file") + res <- summ[ms2_spec,c(.SD, + list(CE=i.CE, + rt_peak = i.rt, + int_peak = ms2_max_int, + spec = i.spec, + ms2_sel = i.ms2_sel)), + .SDcols=ident, + on=BASE_KEY, + nomatch=NULL] + setkeyv(res,cols=BASE_KEY) + res +} + +## Format the y labels. +sci10 <- function(x) { + prefmt <- formatC(x,format="e",digits=2) + bits <- strsplit(prefmt,split="e") + bits1 <-sapply(bits,function(x) { + if (length(x) > 1) { + res <- x[[1]] + sub(" ","~",res) + } else { + x + } + }) + bits2 <-sapply(bits,function(x) if (length(x)>1) paste0(" %*% 10^","'",sub("[+]"," ",x[[2]]),"'") else "") + txt <- mapply(function(b1,b2) if (nchar(b2)!=0) {paste0("'",b1,"'",b2)} else NA, + bits1, + bits2, + SIMPLIFY = F) + names(txt) <- NULL + txt <- gsub(pattern = "^'0\\.00'.*$"," 0",x=txt) + parse(text=txt) + + + +} + +sci10_old <- function(x) { + if (length(x)!=0) { + x<-sapply(x,function(x) if(x!=0) x else "0") + ifelse(x==0,"0",{ + prefmt <- formatC(x,format="e",digits=2) + bits <- strsplit(prefmt,split="e") + bits1 <-sapply(bits,function(x) { + if (length(x) > 1) { + res <- x[[1]] + sub(" ","~",res) + } else { + x + } + }) + print(bits1) + bits2 <-sapply(bits,function(x) if (length(x)>1) paste0(" %*% 10^",sub("[+]","~",x[[2]])) else "") + txt <- mapply(function(b1,b2) if (nchar(b2)!=0) {paste0(b1,b2)} else NA, + bits1, + bits2, + SIMPLIFY = F) + names(txt) <- NULL + message("---------") + print(txt) + message("________") + ## parse(text=txt)}) + txt}) + + } else "" +} + +plot_theme <- function (legend.position="none",...) + ggplot2::theme( + plot.margin = unit(c(0,0,0,0),"cm"), + legend.position = legend.position, + axis.text = ggplot2::element_text(size=ggplot2::rel(1.2)), + axis.title = ggplot2::element_text(size=ggplot2::rel(1.2)), + ...) + +plot_decor <- function(m,islog,all_ms1_labels,legend_name_ms1,legend_name_ms2="CE",all_ms2_labels=NULL, + ms1_legend_info=T) { + textf <- ggplot2::element_text + + my_theme <- plot_theme(legend.position="bottom",legend.box="horizontal") + ## Logarithmic, or linear y axis? + scale_y <- if (shiny::isTruthy(islog)) + ggplot2::scale_y_log10 else ggplot2::scale_y_continuous + + + getpal <- colorRampPalette(RColorBrewer::brewer.pal(8,"Dark2")) + + col_all_vals <- getpal(length(all_ms1_labels)) + names(col_all_vals) <- all_ms1_labels + + scale_colour <- if (ms1_legend_info) { + function(breaks, labels, ...) ggplot2::scale_colour_manual(values = col_all_vals, + breaks = breaks, + labels = labels, + name = legend_name_ms1,...) + } else { + function(breaks=NULL, labels=NULL, ...) NULL + } + + + shape_all_vals <- 1:length(all_ms2_labels) + scale_ms2 <- if (length(shape_all_vals)>0) { + names(shape_all_vals) <- all_ms2_labels + function(breaks, labels, ...) ggplot2::scale_shape_manual(values = shape_all_vals, + breaks = breaks, + labels = labels, + name = legend_name_ms2, ...) + } else { + function(breaks=NULL, labels=NULL, ...) NULL + + + } + + + + function(plot, breaks, labels, ms2_breaks=NULL, ms2_labels=NULL) { + plot + + scale_colour(breaks=breaks, + labels=labels) + + scale_ms2(breaks=ms2_breaks, + labels=ms2_labels) + + scale_y(labels=sci10) + my_theme + } +} + + +gen_get_ms2_legend <- function(m,legend_name_ms2="CE",all_ms2_labels) { + shape_all_vals <- 1:length(all_ms2_labels) + scale_ms2 <- if (length(shape_all_vals)>0) { + names(shape_all_vals) <- all_ms2_labels + function(breaks, labels, ...) ggplot2::scale_shape_manual(values = shape_all_vals, + breaks = breaks, + labels = labels, + name = legend_name_ms2, ...) + } else { + function(breaks=NULL, labels=NULL, ...) NULL + + + } + function(plot,breaks,labels) { + thing <- plot + scale_ms2(breaks=breaks, + labels=labels) + + cowplot::get_legend(thing) + + } +} + + +plot_eic_ms1_df <- function(df,style_fun,plot_label) { + mk_leg_lab<-function(tag,rt) {if (length(tag) > 0) paste(tag,"; rt= ",formatC(rt,format='f',digits=RT_DIGITS)," min",sep='') else character(0)} + + ## mz <- df[,unique(prec_mz)] + tbl <- df[,.(verb_labs=mk_leg_lab(get(..plot_label),rt_peak),plot_label=get(..plot_label)), + by=c(plot_label,"rt_peak")] + + verb_labs <- tbl[,verb_labs] + labs <- tbl[,plot_label] + df[,plot_label:=factor(get(..plot_label))] + style_fun(ggplot2::ggplot(df,ggplot2::aes(x=rt,y=intensity,colour=plot_label)), + breaks=labs, + labels=verb_labs) + + ggplot2::geom_line(key_glyph=KEY_GLYPH) + + ggplot2::labs(x=CHR_GRAM_X, + y=CHR_GRAM_Y) +} + +plot_eic_ms2_df <- function(df,style_fun) { + mz <- df[,unique(prec_mz)] + ddf <- df[!is.na(rt_peak)==T] + + + ms2_labs <- ddf[,plot_label] + ms1_labs <- ddf[,levels(parent_label)] + + res <- if (NROW(ddf)>0) { + plot <- style_fun(ggplot2::ggplot(ddf,ggplot2::aes(x = rt_peak,ymin = 0,ymax = int_peak, + y = int_peak, + color = parent_label, shape = plot_label)), + breaks=ms1_labs, + labels=ms1_labs, + ms2_breaks=ms2_labs, + ms2_labels=ms2_labs) + plot + ggplot2::geom_linerange(key_glyph=KEY_GLYPH) + + ggplot2::geom_point() + + ggplot2::labs(x=CHR_GRAM_X, + y=CHR_GRAM_Y) + } else { + p <- ggplot2::ggplot(ddf,ggplot2::aes(x=1:10,y=1:10))+ggplot2::geom_blank()+ggplot2::labs(x="",y="") + p + ggplot2::annotate(geom="text", x=5, y=5, size=6, label="NO MS2 SPECTRA", color="black")+ggplot2::theme(axis.text.x=ggplot2::element_blank(), + axis.ticks.x=ggplot2::element_blank(), + axis.text.y=ggplot2::element_blank(), + axis.ticks.y=ggplot2::element_blank()) + } + + res +} + +plot_spec_ms2_df <- function(df,style_fun) { + + ms2_labs <- df[,levels(plot_label)] + ms1_labs <- df[,levels(parent_label)] + + plot <- if (NROW(df)>0) { + ddf <- df[,.(mz,intensity,parent_label,plot_label)] + plot <-style_fun(ggplot2::ggplot(ddf, + ggplot2::aes(x=mz,ymin=0,ymax=intensity, + y = intensity, + color=parent_label, + shape=plot_label)), + labels=ms1_labs, + breaks=ms1_labs, + ms2_breaks=ms2_labs, + ms2_labels=ms2_labs) + + plot + ggplot2::geom_linerange(key_glyph=KEY_GLYPH) + + ggplot2::geom_point() + + ggplot2::labs(x="mz", y="intensity") + + } else { + ddf <- data.table(x=1:10,y=1:10) + p <- ggplot2::ggplot(ddf,ggplot2::aes(x=x,y=y))+ggplot2::geom_blank()+ggplot2::labs(x="",y="") + p + ggplot2::annotate(geom="text", x=5, y=5, size=6, label="NO MS2 SPECTRA", color="black")+ggplot2::theme(axis.text.x=ggplot2::element_blank(), + axis.ticks.x=ggplot2::element_blank(), + axis.text.y=ggplot2::element_blank(), + axis.ticks.y=ggplot2::element_blank()) + + + } + + plot + + +} + +plot_leg_ms2 <- function(df,style_fun) { + mz <- df[,unique(mz)] + ddf <- df[!is.na(rt_peak)==T] + + mk_leg_lab<-function(tag,rt,have_sel) {if (length(tag) > 0 && have_sel) paste(tag,"; rt= ",formatC(rt,format='f',digits=RT_DIGITS)," min",sep='') else if (!have_sel) tag else character(0)} + tbl <- ddf[,.(verb_labs=mk_leg_lab(plot_label,.SD[ms2_sel==T,rt_peak],any(ms2_sel)),plot_label), + by="plot_label"] + ms2_verb_labs <- tbl[,verb_labs] + ms2_labs <- tbl[,plot_label] + ms1_labs <- ddf[,levels(parent_label)] + blah <- ggplot2::ggplot(ddf,ggplot2::aes(shape = plot_label,y=int_peak,x=rt_peak)) + ggplot2::geom_point() + + + plot <- style_fun(blah, + breaks=ms1_labs, + labels=ms1_labs, + ms2_breaks=ms2_labs, + ms2_labels=ms2_verb_labs) + cowplot::get_legend(plot) + +} + +#' @export +tk_save_file <- function (default = "", caption = "Select files", filters = NULL, index = 1) { + args <- list("tk_getSaveFile", title = caption) + if (nzchar(default)) + args <- c(args, initialdir = dirname(default), initialfile = basename(default)) + if (!is.null(filters)) { + if (!is.character(filters) || length(dim(filters)) != + 2 || ncol(filters) != 2) + stop("'filters' must be a 2-column character matrix") + f <- filters + f[] <- paste0("{", filters, "}") + ff <- apply(f, 1, paste, collapse = " ") + fff <- paste0("{", ff, "}") + args <- c(args, filetypes = paste(fff, collapse = " ")) + } + as.character(do.call(tcltk::tcl, args)) +} + +get_rt_interval <- function(data_ms1,data_ms2,conf_figures) { + rt_new_lim <- c(rt_in_min(conf_figures$rt_min), + rt_in_min(conf_figures$rt_max)) + rt_lim <- get_coord_lim(rt_new_lim,DEFAULT_RT_RANGE) + + ms1_lim <- range(data_ms1$rt) + ms2_lim <- if (NROW(data_ms2)>0) range(data_ms2$rt_peak) else c(NA,NA) + + rlim <- min(rt_lim[[2]],ms1_lim[[2]],ms2_lim[[2]],na.rm = T) + llim <- max(rt_lim[[1]],ms1_lim[[1]],ms2_lim[[1]],na.rm = T) + c(llim-0.5,rlim+0.5) +} + + + +get_plot_data <- function(plot_index,plot_label, + summ_tab,summ_cols,extr_tab=NULL, + extr_cols=NULL) { + thenames<-ifelse(nchar(names(summ_cols))!=0,names(summ_cols),summ_cols) + names(summ_cols)<-thenames + + ind_nms <- names(plot_index) + plot_group <- ind_nms[[1]] + plot_plot <- ind_nms[[2]] + + + meta <- summ_tab[get(ind_nms[[1]]) == plot_index[[1]] & + get(ind_nms[[2]]) == plot_index[[2]], + unique(.SD),.SDcols=c(plot_label,summ_cols)] + + data.table::setkeyv(meta,plot_label) + data.table::setnames(meta,summ_cols,names(summ_cols)) + + if (!is.null(extr_tab)) { + data_cols <- c(plot_label,extr_cols) + data <- extr_tab[get(ind_nms[[1]]) == plot_index[[1]] & + get(ind_nms[[2]]) == plot_index[[2]],..data_cols] + return(meta[data,on=plot_label]) + } else meta + +} + + +get_ms1_chr_pdata <- function(m,plot_index) get_plot_data(m$out$tab$summ, + c("mz", + rt_peak="ms1_rt"), + m$extr$ms1, + extr_cols = c("rt","intensity"), + plot_index = plot_index, + plot_label = m$conf$figures$grouping$label) + +get_ms2_chr_pdata <- function(m,plot_index) { + z<- get_plot_data(plot_index = plot_index, + plot_label = c(m$conf$figures$grouping$label,"an"), + summ_tab = m$out$tab$summ, + summ_cols = c(prec_mz="mz", + rt_peak="ms2_rt", + int_peak="ms2_int", + "CE", + "ms2_sel")) + z$plot_label = factor(z$CE) + z$parent_label = factor(z[[m$conf$figures$grouping$label]]) + z +} + +get_ms2_spec_pdata <- function(m,plot_index) { + z <- get_plot_data(plot_index = plot_index, + plot_label = c(m$conf$figures$grouping$label,"an"), + summ_tab = m$out$tab$summ[ms2_sel == T,], + summ_cols = c(prec_mz="mz", + rt_peak="ms2_rt", + int_peak="ms2_int", + "CE", + "ms2_sel"), + extr_tab = m$extr$ms2, + extr_cols = c("mz","intensity")) + z$plot_label = factor(z$CE) + z$parent_label = factor(z[[m$conf$figures$grouping$label]]) + z +} + +##' @export +plot_ms1_chr <- function(m,plot_index) { + pdata <- get_ms1_chr_pdata(m,plot_index) + + if (NROW(data) < 0 ) { + p <- ggplot2::ggplot(data.frame(x=1:10,y=1:10), + ggplot2::aes(x=x,y=y))+ + ggplot2::geom_blank()+ggplot2::labs(x="",y="") + + p <- p + ggplot2::annotate(geom="text", x=5, y=5, size=6, label="NO MS1 SPECTRA", color="black")+ + ggplot2::theme(axis.text.x=ggplot2::element_blank(), + axis.ticks.x=ggplot2::element_blank(), + axis.text.y=ggplot2::element_blank(), + axis.ticks.y=ggplot2::element_blank()) + return(p) + } else + + group_data <- m$conf$figures$grouping + plot_group <- names(plot_index)[[1]] + plot_plot <- names(plot_index)[[2]] + plot_label <- group_data$label + + all_labels <- m$out$tab$flt_summ[,sort(unique(get(..plot_label)))] + + style <- plot_decor(m,m$conf$logaxes$ms1_eic_int, + all_ms1_labels=all_labels, + legend_name_ms1=plot_label) + + plot_eic_ms1_df(pdata, + style_fun = style, + plot_label = plot_label) + + + +} + + +##' @export +plot_ms2_chr <- function(m,plot_index) { + pdata <- get_ms2_chr_pdata(m,plot_index) + + if (NROW(data) < 0 ) { + p <- ggplot2::ggplot(data.frame(x=1:10,y=1:10), + ggplot2::aes(x=x,y=y))+ + ggplot2::geom_blank()+ggplot2::labs(x="",y="") + + p <- p + ggplot2::annotate(geom="text", x=5, y=5, size=6, label="NO MS2 SPECTRA", color="black")+ + ggplot2::theme(axis.text.x=ggplot2::element_blank(), + axis.ticks.x=ggplot2::element_blank(), + axis.text.y=ggplot2::element_blank(), + axis.ticks.y=ggplot2::element_blank()) + return(p) + } else + + group_data <- m$conf$figures$grouping + plot_group <- names(plot_index)[[1]] + plot_plot <- names(plot_index)[[2]] + plot_ms1_label <- group_data$label + + all_ms1_labels <- m$out$tab$summ[,sort(unique(get(plot_ms1_label)))] + all_ms2_ce_labels <- m$out$tab$summ[,sort(na.omit(unique(CE)))] + + style <- plot_decor(m,m$conf$logaxes$ms2_eic_int, + all_ms1_labels = all_ms1_labels, + all_ms2_labels = all_ms2_ce_labels, + legend_name_ms1 = plot_ms1_label, + legend_name_ms2 = "CE") + + plot_eic_ms2_df(pdata, style_fun = style) + + + +} + + +##' @export +plot_ms2_spec <- function(m,plot_index) { + pdata <- get_ms2_spec_pdata(m,plot_index) + if (NROW(data) < 0 ) { + p <- ggplot2::ggplot(data.frame(x=1:10,y=1:10), + ggplot2::aes(x=x,y=y))+ + ggplot2::geom_blank()+ggplot2::labs(x="",y="") + + p <- p + ggplot2::annotate(geom="text", x=5, y=5, size=6, label="NO MS2 SPECTRA", color="black")+ + ggplot2::theme(axis.text.x=ggplot2::element_blank(), + axis.ticks.x=ggplot2::element_blank(), + axis.text.y=ggplot2::element_blank(), + axis.ticks.y=ggplot2::element_blank()) + return(p) + } else + + group_data <- m$conf$figures$grouping + plot_group <- names(plot_index)[[1]] + plot_plot <- names(plot_index)[[2]] + plot_ms1_label <- group_data$label + + all_ms1_labels <- m$out$tab$summ[,sort(unique(get(plot_ms1_label)))] + all_ms2_ce_labels <- m$out$tab$summ[,sort(na.omit(unique(CE)))] + + style <- plot_decor(m,m$conf$logaxes$ms2_spec_int, + all_ms1_labels = all_ms1_labels, + all_ms2_labels = all_ms2_ce_labels, + legend_name_ms1 = plot_ms1_label, + legend_name_ms2 = "CE") + + + plot_spec_ms2_df(pdata, style_fun = style) + + + +} + +##' @export +plot_struct <- function(m,plot_index) { + + id <- plot_index[["ID"]] + if (is.null(id)) { + p <- ggplot2::ggplot(data.frame(x=1:10,y=1:10), + ggplot2::aes(x=x,y=y))+ + ggplot2::geom_blank()+ggplot2::labs(x="",y="") + + p <- p + ggplot2::annotate(geom="text", x=5, y=5, size=6, label="STRUCTURE PLOT UNAVAILABLE", color="black")+ + ggplot2::theme(axis.text.x=ggplot2::element_blank(), + axis.ticks.x=ggplot2::element_blank(), + axis.text.y=ggplot2::element_blank(), + axis.ticks.y=ggplot2::element_blank()) + return(p) + } else { + ## grid::grid.draw(gridExtra::arrangeGrob(m$out$tab$structfig[ID==id,img][[1]])) + grid::grid.draw(m$out$tab$structfig[ID==id,img][[1]], + recording = F) + + } + +} + + +##' @export +plot_struct_nowrap <- function(m,plot_index) { + + id <- plot_index[["ID"]] + if (is.null(id)) { + p <- ggplot2::ggplot(data.frame(x=1:10,y=1:10), + ggplot2::aes(x=x,y=y))+ + ggplot2::geom_blank()+ggplot2::labs(x="",y="") + + p <- p + ggplot2::annotate(geom="text", x=5, y=5, size=6, label="STRUCTURE PLOT UNAVAILABLE", color="black")+ + ggplot2::theme(axis.text.x=ggplot2::element_blank(), + axis.ticks.x=ggplot2::element_blank(), + axis.text.y=ggplot2::element_blank(), + axis.ticks.y=ggplot2::element_blank()) + return(p) + } else { + m$out$tab$structfig[ID==id,img][[1]] + } + +} + + +##' @export +gen_key_plot_tab <- function(m) { + + + fltsumm <- m$out$tab$flt_summ + validate(need(NROW(fltsumm) > 0, + message = "Generate summary table first.")) + + + plot_group <- m$conf$figures$grouping$group + plot_plot <- m$conf$figures$grouping$plot + plot_label <- m$conf$figures$grouping$label + plot_key <- c(plot_group,plot_plot) + + + idx <- fltsumm[,{ + lapply(.SD, + function (col) { + val <- unique(col) + if (length(val)<=1) T else F + }) + + }, + by=c(plot_group,plot_plot)] + idxlst <- as.logical(idx[,lapply(.SD,function (col) all(col))]) + nmidx <- colnames(idx) + cols <- nmidx[idxlst] + cols <- na.omit(setdiff(cols,plot_key)) + fltsumm[,unique(.SD[,..cols]), + by=plot_key] + +} diff --git a/R/resources.R b/R/resources.R index aec28722c9d23b4ce8c21f4a0b62468b131cb281..9fe17a4765fd3ad954b7d6f55fb1579dff1d3b55 100644 --- a/R/resources.R +++ b/R/resources.R @@ -14,28 +14,52 @@ + +## Config defaults +CONF <- list(data=NA_character_, + project=getwd(), + compounds=list(lists=list(), + sets=NA_character_)) + ## Constants -FN_FTAB_BASE<-"ftable.base.csv" -FN_FTAB_PP<-"ftable.pp.csv" +FN_SUMM_BASE<-"summ.base.csv" +FN_SUMM_PP<-"summ.pp.csv" FN_PP_OUT_PREF<-"PP.filetable" -FN_FTAB_STATE<-"ftable_state.csv" -FN_FTAB_DEF_OUT<-"ftable.csv" +FN_SUMM_STATE<-"summ_state.csv" +FN_SUMM <- "summ.csv" +FN_SUMM_DEF_OUT <- FN_SUMM FN_CMP_L<-"compounds.csv" FN_LOC_SETID <-"setid.csv" FN_COMP_TAB<-"comprehensive.csv" FN_SPEC<-"specdata.rds" -MODEMAP<-list(pH="MpHp_mass", - mH="MmHm_mass", - pNH4="MpNH4_mass", - pNa="MpNa_mass") - -TAG_DEF <- "unspecified" +FN_CONF <- "conf-state.yaml" +FN_EXTR_STATE <- "state_after_extraction.rds" + +.envp <- new.env(parent = emptyenv()) +data(adducts,package = "enviPat", envir = .envp) +data(isotopes,package = "enviPat", envir = .envp) +ADDUCTS <- dtable(.envp$adducts) +ISOTOPES <- dtable(.envp$isotopes) +.envp <- NULL +ADDUCTMAP <- ADDUCTS$Name +ADDUCTS$Name <- the_ifelse(ADDUCTS$Charge>0,paste0("[",ADDUCTS$Name,"]+"),paste0("[",ADDUCTS$Name,"]-")) +## names(ADDUCTMAP) <- apply(ADDUCTS,1,function(row) { +## nm <- row[["Name"]] +## sgn <- row[["Charge"]] +## suff <- if (sgn > 0) "+" else if (sgn < 0) "-" else "" +## paste0("[",nm,"]",suff) +## }) +## ADDUCTS$Name <- names(ADDUCTMAP) +DISP_ADDUCTS <- ADDUCTS$Name +TAG_NA <- "::UNSET::" +SET_NA <- "::UNSET::" +TAG_DEF <- TAG_NA TAG_DEF_DESC<-"Case" DEFAULT_RT_RANGE=c(NA,NA) DEFAULT_INT_RANGE=c(NA,NA) DEFAULT_MZ_RANGE=c(NA,NA) -QANAMES <- c("MS1","MS2","Alignment","AboveNoise") +## QANAMES <- c("MS1","MS2","Alignment","AboveNoise") PLOT_DEF_TAGS<-NA PLOT_DEF_SET<-NA @@ -44,11 +68,6 @@ RT_DIGITS=2 M_DIGITS=4 PAL="Dark2" -REST_TXT_INP<-c("fnKnownL", - "fnUnkL", - "fnSetId", - "tagsInp") - REST_TAB<-c("mzml") @@ -76,11 +95,11 @@ EXTR_MS2_DIR<-"MS2" EXTR_MS2_FLAG<-file.path(EXTR_MS2_DIR,'.ms2.DONE') -FTAB_CHK_NONE<-'NONE' +SUMM_CHK_NONE<-'NONE' -FTAB_CHK_AUTO<-'AUTO' +SUMM_CHK_AUTO<-'AUTO' -FTAB_CHK_MANL<-'MANUAL' +SUMM_CHK_MANL<-'MANUAL' MS1_ERR_COARSE<-0.5 # Da @@ -97,4 +116,161 @@ MS1_SN_FAC <- 3.0 ## Shiny objects -NUM_INP_WIDTH="15%" +NUM_INP_WIDTH=40 +NUM_INP_HEIGHT="5%" + + + +## Possible compound list fields +EMPTY_CMPD_LIST <- dtable(ID=character(), + SMILES=character(), + Name=character(), + Formula=character(), + RT=numeric(), + mz=numeric(), + known=character(), + ORIG=character()) +COMP_LIST_COLS <- c("ID","Name","SMILES","Formula","RT","mz") +## Comprehensive table properties +COMP_NAME_MAP <- list(RT="rt") +COMP_NAME_FIRST <- c("ID","mz","rt","adduct","tag","set","Name","known","SMILES","Formula","file") + + + +## Trivial data table +EMPTY_MZML <- dtable(file=character(0), + tag=character(0), + adduct=character(0), + set=character(0)) + +FN_DATA_TAB <- "data-files.csv" + + +## Default number of concurrent workers +NO_WORKERS <- 2 + +## Input parameters for prescreening. +CONF_PRES_NUM <- c("ms1_int_thresh","ms2_int_thresh","s2n") +CONF_PRES_TU <- c("ret_time_shift_tol") + + +## Prescreening columns +QA_FLAGS <- c("qa_pass", + "qa_ms1_exists", + "qa_ms2_exists", + "qa_ms1_good_int", + "qa_ms1_above_noise", + "qa_ms2_near", + "qa_ms2_good_int") + +QA_NUM_REAL <- c("ms1_int","ms1_rt","ms1_mean") + +QA_NUM_INT <- c("ms2_sel","ms1_rt_ind") + +QA_COLS <- c(QA_FLAGS,QA_NUM_REAL,QA_NUM_INT) + +## MS2 spectral table columns +MS2_SPEC_COLS <- c("adduct","tag","ID","CE","rt","file","spec","ms2_max_int") + +## MS1 spectral table columns +MS1_SPEC_COLS <- c("adduct","tag","ID","eicMS1","ms1_int","ms1_rt","ms1_mean") + + + +## Default secondary indexing in the summary table +DEF_INDEX_SUMM <- c("set", "-qa_pass", "-ms1_int", "adduct","-mz") + +## Top-level directory to store the figures +FIG_TOPDIR <- "figures" + +## Figure filter +FIG_DEF_FILTER <- "" + +FIG_DEF_SUBSET <- c("set","adduct","ID") + + +REPORT_AUTHOR <- "Anonymous" +REPORT_TITLE <- "Plots of EICs and MS2 Spectra" + +## Select the most fundamental group of entries. Within this group, +## each ID is unique. +BASE_KEY <- c("adduct","tag","ID") +BASE_KEY_MS2 <- c(BASE_KEY,"CE") + +FIG_DEF_CONF <-list(grouping=list(group="adduct", + plot="ID", + label="tag")) + + +## File table properties +SUMM_COLS=c("set",BASE_KEY_MS2,"an","mz","ms1_rt", "ms1_int", "ms2_rt", "ms2_int", + "ms1_mean","ms2_sel",QA_FLAGS,"Name", "SMILES", "Formula", "known","Comments","file") + +PLOT_FEATURES <- c("adduct", + "tag", + "ID") + +## Empty summary table. +EMPTY_SUMM <- data.table::data.table(set=character(0), + adduct=character(0), + tag=character(0), + ID=character(0), + CE=character(0), + an=integer(0), + mz=numeric(0), + ms1_rt=numeric(0), + ms1_int=numeric(0), + ms2_rt=numeric(0), + ms2_int=numeric(0), + ms1_mean=numeric(0), + ms2_sel=logical(0), + qa_pass=logical(0), + qa_ms1_exists=logical(0), + qa_ms2_exists=logical(0), + qa_ms1_good_int=logical(0), + qa_ms1_above_noise=logical(0), + qa_ms2_near=logical(0), + qa_ms2_good_int=logical(0), + Name=character(0), + SMILES=character(0), + Formula=character(0), + known=character(0), + Comments=character(0), + file=character(0)) + +## Default sorting keys of spectra in the summary table +DEF_KEY_SUMM <- c(BASE_KEY_MS2,"an") + + +SUBSET_VALS <- c(IGNORE="ignore", + GOOD="select good", + BAD="select bad") + + +## Empty plotting tables. +EMPTY_MS1_PLOT_TAB <- dtable(ID=character(), + SMILES=character(), + tag=character(), + fig_eic=list(), + fig_struct=list()) + +EMPTY_MS2_PLOT_TAB <- dtable(tag=character(), + ID=character(), + fig_eic=list(), + fig_spec=list(), + fig_leg=list()) + + +## Empty comprehensive table. +EMPTY_COMP_TAB <- dtable(ID=character(), + mz=numeric(), + rt=numeric(), + adduct=character(), + tag=character(), + set=character(), + Name=character(), + known=character(), + SMILES=character(), + Formula=character(), + file=character()) + diff --git a/R/run.R b/R/run.R deleted file mode 100644 index 33a92c746233c9eb7e369969525fd7bdad4473cb..0000000000000000000000000000000000000000 --- a/R/run.R +++ /dev/null @@ -1,100 +0,0 @@ -## Copyright (C) 2020 by University of Luxembourg - -## Licensed under the Apache License, Version 2.0 (the "License"); -## you may not use this file except in compliance with the License. -## You may obtain a copy of the License at - -## http://www.apache.org/licenses/LICENSE-2.0 - -## Unless required by applicable law or agreed to in writing, software -## distributed under the License is distributed on an "AS IS" BASIS, -## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -## See the License for the specific language governing permissions and -## limitations under the License. - -is_gen_done<-function(dest) { - fnFlag<-file.path(dest,".gen.DONE") - file.exists(fnFlag) -} - -is_ms2_done<-function(set,dest) { - fnFlag<-file.path(dest,paste('.',set,'.DONE',sep='')) - file.exists(fnFlag) -} - -set_ms2_done<-function(set,dest) { - fnFlag<-file.path(dest,paste('.',set,'.DONE',sep='')) - file.create(fnFlag) -} - -set_gen_done<-function(dest) { - fnFlag<-file.path(dest,".gen.DONE") - file.create(fnFlag) -} - -unset_gen_done<-function(dest) { - fnFlag<-file.path(dest,".gen.DONE") - if (is_gen_done(dest)) unlink(fnFlag,force=T) -} - - -##' Paste with no separator. -##' -##' -##' @title Paste With No Separator -##' @param ... Strings to paste together. -##' @return Pasted string. -##' @author Todor Kondić -attch<-function(...) paste(...,sep='') - -##' Do the prescreening. -##' -##' @title Prescreening on bunch of files. -##' @param fTab File table with Files,ID,wd,Name and mz -##' columns. Column Files, as well as wd must have all rows -##' identical. -##' @param extr_fun Extraction function from the backend. -##' @param errEIC Absolute mz tolerance used to extract precursor EICs. -##' @param errFinePPM Tolerance given in PPM used to associate input -##' masses with what the instrument assigned as precutsors to MS2. -##' @param proc Amount of processors, or FALSE. -##' @param fnLog For parallel execution, dump messages there. -##' @return Nothing useful. -##' @author Todor Kondić -##' @export -gen<-function(fTab, - errEIC, - errFinePPM, - errCoarse, - errRT, - proc=F,fnLog='prescreen.log',extr_fun=extr_msnb_ht) { - message("*** Started to generate prescreen data ...") - unlink(fnLog) - fread<-function(fTab) { - extract(fTab=fTab, - extr_fun=extr_fun, - errEIC=errEIC, - errFinePPM=errFinePPM, - errRT=errRT, - errCoarse=errCoarse, - fnSpec=FN_SPEC) - - return(T) - } - - - fns<-unique(fTab$Files) - fTabs<-lapply(fns,function(fn) fTab[fTab$Files==fn,]) - if (proc>1) { - cl<-parallel::makeCluster(spec=proc,type='PSOCK',outfile=fnLog) - parallel::clusterEvalQ(cl,library(shinyscreen)) - ## parallel::clusterExport(cl,c("extract"),envir=environment()) - res<-parallel::parLapply(cl,fTabs,fread) - parallel::stopCluster(cl) - res - } else { - lapply(fTabs,fread) - } - message("*** ... done generating prescreen data.") -} - diff --git a/R/shiny-ui-base.R b/R/shiny-ui-base.R new file mode 100644 index 0000000000000000000000000000000000000000..bc83bf3e49f47b3ff8a25ec3acbf63f2cd0a49eb --- /dev/null +++ b/R/shiny-ui-base.R @@ -0,0 +1,206 @@ +## Copyright (C) 2020 by University of Luxembourg + +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at + +## http://www.apache.org/licenses/LICENSE-2.0 + +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. + +##' @importFrom shiny validate + +react_v <- shiny::reactiveValues +react_f <- shiny::reactive +react_e <- shiny::eventReactive +obsrv <- shiny::observe +obsrv_e <- shiny::observeEvent +isol <- shiny::isolate + +# volumes <- function() c(wd=getwd(), shinyFiles::getVolumes()()) +validate1 <- function(expr,msg) shiny::validate(shiny::need(expr,msg)) + + +path2vol <- function(path) { + ## This function returns shinyFiles compatible volumes. + splits <- split_path(path) + file.path(tail(splits,1),'') +} + + +prim_box<-function(...) {shinydashboard::box(..., + status="primary", + solidHeader=T)} +good_box<-function(...) {shinydashboard::box(..., + status="success", + solidHeader=T)} +err_box<-function(...) {shinydashboard::box(..., + status="danger", + solidHeader=T)} + +inact_box<-function(...) {shinydashboard::box(..., + status="danger", + solidHeader=T)} + + +html<-function(...) {shiny::tags$div(shiny::HTML(...))} + +## num_input<-function(...,width=NUM_INP_WIDTH) {shiny::tags$div(id="inline",shiny::textInput(...,width=width))} + +num_input <- function(inputId,label,...,width=NUM_INP_WIDTH) { + shiny::tags$div(style="display:inline-block", + shiny::tags$label(label, `for` = inputId), + shiny::tags$input(id = inputId, type = "text",style=paste("width:",width,sep = ""),...)) +} +num_input_unit <- function(inputId,l1,l2,width=NUM_INP_WIDTH,...) { + shiny::tags$div(style="display:inline-block", + shiny::tags$label(l1, `for` = inputId), + shiny::tags$input(id = inputId, type = "text",style=paste("width:",width,sep = ""),...), + shiny::tags$label(paste(" ",l2,sep=""), `for` = inputId)) +} + +txt_file_input <- function(inputId,input,fileB,label,volumes,default = "") { + + fnobj<-shinyFiles::parseFilePaths(roots = volumes, + selection = input[[fileB]]) + fn <- fnobj[['datapath']] + + if (isThingFile(fn)) { + shiny::textInput(inputId = inputId, + label = label, + value = fn) + } else { + shiny::isolate(currFn <- input[[inputId]]) + if (!isThingFile(currFn)) { + shiny::textInput(inputId = inputId, + label = label, + value = default) + } else { + shiny::textInput(inputId = inputId, + label = label, + value = currFn) + } + } + +} + +##' @export +mz_input <- function(input_mz,input_unit,width=NUM_INP_WIDTH,height=NUM_INP_HEIGHT,def_mz=0,def_unit="Da",pref="+/-") { + style <- "display: inline-block; vertical-align:top; width: " + stylel <- "display: inline-block; vertical-align:top;" + style=paste0(style,width,"; ") + shiny::div(shiny::div(style=stylel, + shiny::tags$label(pref,`for`=input_mz)), + shiny::div(style=style, + shiny::numericInput(input_mz, + label=NULL, + value = def_mz)), + shiny::div(style=style, + shiny::selectInput(input_unit, + label=NULL, + c("ppm","Da"), + selected=def_unit))) +} + +##' @export +rt_input <- function(input_rt,input_unit,width=NUM_INP_WIDTH,width_u=1-NUM_INP_WIDTH,height=NUM_INP_HEIGHT,def_rt=0,def_unit="min",pref="+/-") { + width=paste0(as.character(width), "%") + width_u=paste0(as.character(width_u), "%") + + style="display: inline-block; vertical-align:top; width: " + style=paste0(style,width,"; ") + stylel <- "display: inline-block; vertical-align:top;" + styleu <- paste0("display: inline-block; vertical-align:top; color: black; width: ",width_u,";") + shiny::div(shiny::div(style=stylel, + shiny::tags$label(pref,`for`=input_rt)), + shiny::div(style=style, + shiny::numericInput(input_rt, + label=NULL, + value = def_rt)), + shiny::div(style=styleu, + shiny::selectInput(input_unit, + label=NULL, + c("min","s"), + selected=def_unit))) + +} + +##'@export +rev2list <- function(rv) { + ## Take reactive values structure and convert them to nested + ## lists. + if (class(rv)[[1]] != "reactivevalues") + rv else lapply(shiny::reactiveValuesToList(rv),rev2list) +} + +##' @export +list2rev <- function(lst) { + ## Take nested named list and create reactive values from it. + if (class(lst)[[1]] != "list") + lst else do.call(react_v,lapply(lst,list2rev)) +} + +mk_roots <- function(wd) local({ + addons <- c("project"=normalizePath(wd,winslash = '/')) + def_vol <- function() { + path <- addons[['project']] + svols <- shinyFiles::getVolumes()() + vol <- path2vol(path) + sel <- match(vol,svols) + res <- names(svols)[[sel]] + res + } + list(set=function (rts) {addons <<- rts}, + get=function () c(addons,shinyFiles::getVolumes()()), + def_vol=def_vol, + def_path=function() { + vol <- def_vol() + svols <- shinyFiles::getVolumes()() + pref <- svols[[vol]] + res <- sub(paste0(pref,'(.*)'),'\\1',addons[["project"]]) + message('Relative path: ',res) + res + }) +}) + +#' @export +merge2rev <- function(rev,lst) { + crawllist <- function(lst,currname=""){ + cls <- class(lst) + + if (cls[[1]]=="list" && length(names(lst)) > 0) + invisible(lapply(names(lst), + function (nm) + crawllist(lst[[nm]], + currname=paste0(currname,'[["',nm,'"]]')))) + + else { + currname + } + } + + vars <- unlist(crawllist(lst),recursive = T) + vars + pref_r <- deparse(substitute(rev)) + pref_l <- deparse(substitute(lst)) + lhs <- paste0(pref_r,vars) + rhs <- paste0(pref_l,vars) + exprs <- Map(function (a,b) call("<-", + parse(text=a)[[1]], + parse(text=b)[[1]]), + lhs, + rhs) + code <- quote({}) + for (n in 1:length(exprs)) { + code[[n+1]] <- exprs[[n]] + + } + code + +} + + diff --git a/README.org b/README.org index 891eb435f3b6698731178bf1da4345d3e8546d11..82cc96d52e30cda94bb194a0996f099c414d9b65 100644 --- a/README.org +++ b/README.org @@ -237,20 +237,20 @@ | 999 | mixC | | -*** Data Files +*** Data files These should be in mzML format. -** Sets, Tags, Modes, Files and IDs - Each file is labelled by a tag, mode and set. Sets are defined in +** Sets, Tags, Adducts, Files and IDs + Each file is labelled by a tag, adduct and set. Sets are defined in the compound set CSV file and group compounds according to their - IDs. Modes correspond to the adducts. Tags label files in the + IDs. Adducts correspond to the adducts. Tags label files in the plots. - For known compounds, each set can contain multiple modes. Sets of - unknowns can only contain a single mode. Any files belonging to the - same set that have been acquired in a single mode, must carry + For known compounds, each set can contain multiple adducts. Sets of + unknowns can only contain a single adduct. Any files belonging to the + same set that have been acquired in a single adduct, must carry unique tags. - In addition, the IDs of compounds belonging to the same set/mode + In addition, the IDs of compounds belonging to the same set/adduct combination must be unique. Different ID sets may overlap. Essentially, sets serve the purpouse of visually grouping files in @@ -260,7 +260,7 @@ ** Config Screen This is the start tab. Import the compound and set lists first, then proceed to import the mzML files. Provide tags in the tag text - box and then assign the sets, modes and tags to the imported mzML + box and then assign the sets, adducts and tags to the imported mzML files using table widget. Once this is done, move on to the ~Spectra Extraction~ tab. diff --git a/inst/rmd/app.Rmd b/inst/rmd/app.Rmd new file mode 100644 index 0000000000000000000000000000000000000000..47f0556fb375338533914380bfc294ae8da213bf --- /dev/null +++ b/inst/rmd/app.Rmd @@ -0,0 +1,1392 @@ +--- +output: html_document +runtime: shiny_prerendered +author: Environmental Cheminformatics Group, LCSB, University of Luxembourg +title: "`r paste('Shinyscreen', packageVersion('shinyscreen'))`" +--- + +```{r, context='setup', include='false'} +library(data.table) +library(shinyscreen) +library(ggplot2) +## library(shinydashboard) +def_state <- new_state() +def_datafiles <- shinyscreen:::dtable(file=character(0), + tag=character(0)) +def_datatab <- shinyscreen:::dtable("tag"=factor(), + "adduct"=factor(levels=shinyscreen:::DISP_ADDUCTS), + "set"=factor()) + +def_summ_subset <- shinyscreen:::dtable("QA Column"=shinyscreen:::QA_FLAGS, + "Select"=factor("ignore",levels=shinyscreen:::SUBSET_VALS)) +## RMassBank masks shiny::validate. Unmask it. +validate <- shiny::validate +## def_state$input$tab$tags <- def_datatab +rv_state <- list2rev(def_state) +compl_sets <- eventReactive(rv_state$input$tab$setid, + rv_state$input$tab$setid[,unique(set)]) +## Reactive values to support some of the UI elements. +## rv_ui <- reactiveValues(datatab=def_tags) + +## Update with data-files. +rv_dfile <- reactiveVal(def_datafiles) + +## Data-file table when loading. +rv_datatab <- reactiveVal(def_datatab) + +## Re-definitions. +PLOT_FEATURES <- shinyscreen:::PLOT_FEATURES + +## Plotting parameters. + +## Transient rt range. +rv_rtrange <- reactiveValues(min=def_state$conf$rt_min, + max=def_state$conf$rt_max) + +## Transient mz range. +rv_mzrange <- reactiveValues(min=NA, + max=NA) + +``` +<style type="text/css"> +.main-container { + max-width: 100%; + margin-left: auto; + margin-right: auto; +} +</style> + + + +# Configuration {.tabset} + +## Inputs + + +<details> +<summary>Specify the project directory</summary> +This is where the output files and the state of the analysis will be +saved. +</details> +```{r, echo=FALSE} +actionButton(inputId = "project_b", + label= "Project") + +``` + +Current project directory is `r textOutput("project", inline=T)` + +<details><summary>Load the compound list(s)</summary> +A compound list is composed of entries describing compounds. This +description is used to search for its spectrum in the data file. The +list is a table in the ***CSV*** format and contains these columns, + +* ***ID*** : required column, must be filled; this is a user-defined + ID, uniquely associated with a compound + +* ***Name*** : this column can be left blank; if not, it should contain the + names of the compounds + +* ***SMILES*** : a _SMILES_ string, describing the structure of the + compound; this entry can be left empty only if one of either + ***Formula***, or ***mz*** entries are not + +* ***Formula*** : a chemical formula of a compound; this field can be + empty only if one of either ***SMILES***, or ***mz*** entries are + not + +* ***mz*** : mass of the ionised compound; this field can be left + empty only if one of either ***SMILES***, or ***Formula*** is not + +* ***CAS*** : the CAS number of the compound; it can be left empty + +* ***RT*** : retention time of the MS1 peak in minutes, if known; can + be left empty. + +Only ***ID*** and one of ***SMILES***, ***Formula*** or ***mz*** must +be filled. When structure, or a formula of a compound is known, it is +also possible to look for various adducts in the sample. Of course, +scanning for completely unknown compounds is also supported by the +***mz*** column. In this case, ***mz*** is the mass of the ion. + +It is strongly recommended to quote SMILES, names and formulas in the +CSV file used with Shinyscreen. +</details> +```{r, echo=FALSE} +actionButton(inputId = "comp_list_b", + label= "Compound list(s)") + +``` + +`r htmlOutput("comp_lists")` + +<details><summary>Load compound set list (_setid_ table)</summary> +The compound lists can contain more entries than is necessary. Using +the _setid_ lists, it is possible to create _compound sets_ which +contain only those compounds that will actually be searched for in the +data files. A _setid table_ is a _CSV_ containing at least two +columns, + +* ***ID*** : the ID entry from the compound list + +* ***set*** : an user-defined set name. +</details> +```{r, echo=FALSE} +actionButton(inputId = "setid_b", + label= "Load the setid table") + +``` + +`r htmlOutput("setids", inline=T)` + +## Data files +<details><summary>Load data files</summary> +Shinyscreen currently supports only the **mzML** file format. After +loading the files, set file tags in the file table (column +**tag**). Additionally, specify a set of compounds that is supposed +to be extracted from the file using the **set** column. Finally, +specify the **adduct** in the adduct column. In case of compounds +with unknown structure and formula, the adduct is ignored for obvious +reasons. +</details> +```{r, echo=FALSE} +actionButton(inputId = "datafiles_b", + label= "Load data files.") + +``` + + + + +<details><summary>Assign tags to data files.</summary> +Each tag designates an unique file. Use the table below to assign +tags. +</details> + +```{r, echo=FALSE} +rhandsontable::rHandsontableOutput("datafiles") + +``` + + + + +<details><summary>Assign sets to tags.</summary> +For each tag, assign a set and an adduct (if the structure information +exists, otherwise _adduct_ column is ignored). +</details> + +```{r, echo=F} +rhandsontable::rHandsontableOutput("datatab") +``` + +## Extraction + +### Spectra extraction based settings + +<details><summary>MS1 coarse error</summary> + +Extract all entries matching the target mass within this error in the +precursor table. +</details> +```{r, echo=F} +shinyscreen::mz_input(input_mz = "ms1_coarse", + input_unit = "ms1_coarse_unit", + def_mz = def_state$conf$tolerance[["ms1 coarse"]], + def_unit = "Da") +``` + +<details><summary>MS1 fine error</summary> + +The precursor table masses can be of lower accuracy. Once there is a +match within the coarse error, it can be further checked versus the +fine error bounds directly in the mass spectrum. + +</details> +```{r, echo=F} +shinyscreen::mz_input(input_mz = "ms1_fine", + input_unit = "ms1_fine_unit", + def_mz = def_state$conf$tolerance[["ms1 fine"]], + def_unit = "ppm") +``` + +<details><summary>MS1 EIC window</summary> + +The mz interval over which the intensities are aggregated to generate +a chromatogram. + +</details> +```{r, echo=F} +shinyscreen::mz_input(input_mz = "ms1_eic", + input_unit = "ms1_eic_unit", + def_mz = def_state$conf$tolerance[["eic"]], + def_unit = "Da") +``` + +<details><summary>Retention time window</summary> + +If the expected retention time has been specified for the compound, +then search for the MS1 signature inside the window defined by this +range. + +</details> +```{r, echo=F} +shinyscreen::rt_input(input_rt = "ms1_rt_win", + input_unit = "ms1_rt_win_unit", + def_rt = def_state$conf$tolerance[["rt"]], + def_unit = "min") +``` + +## Prescreening + +<details><summary>MS1 intensity threshold</summary> + +Ignore MS1 signal below the threshold. + +</details> +```{r, echo=F} + +numericInput(inputId = "ms1_int_thresh", + label = NULL, + value = def_state$conf$prescreen$ms1_int_thresh) +``` + +<details><summary>MS2 intensity threshold</summary> + +Ignore MS2 signal below the threshold. + +</details> +```{r, echo=F} + +numericInput(inputId = "ms2_int_thresh", + label = NULL, + value = def_state$conf$prescreen$ms2_int_thresh) +``` + + +MS1 signal-to-noise ratio. + +```{r, echo=F} + +numericInput(inputId = "s2n", + label = NULL, + value = def_state$conf$prescreen$s2n) +``` + + +<details><summary>MS1/MS2 retention delay.</summary> + +Look for associated MS2 spectrum within this window around the MS1 +peak. + +</details> +```{r, echo=F} +shinyscreen::rt_input(input_rt = "ret_time_shift_tol", + input_unit = "ret_time_shift_tol_unit", + def_rt = def_state$conf$prescreen[["ret_time_shift_tol"]], + def_unit = "min") +``` + +## Filter and order the summary table +<div style= "display: flex; vertical-align:top; "> + +<div style="padding-right: 0.5em"> +<details><summary>Filter summary table</summary> + +Filter entries in the summary table according to the QA criteria. + +* **qa_pass** : entries that passed all checks + +* **qa_ms1_exists** : MS1 intensity is above the MS1 threshold + +* **qa_ms2_exists** : those entries for which some MS2 spectra have been found + +* **qa_ms1_above_noise** : MS1 is intense enough and above the noise level + +* **qa_ms2_good_int** : MS2 intensity is above the MS2 threshold + +* **qa_ms2_near** : MS2 spectrum is close enough to the MS1 peak + +Values: + +* **ignore** : ignore QA criterion +* **take the good ones** : entry passed QA +* **take the bad ones** : entry failed QA + +</details> +```{r, echo=F} + +rhandsontable::rHandsontableOutput("summ_subset") + +``` +</div> + +<div style="padding-left: 0.5em"> + +<details><summary>Ordering by columns</summary> +It is possible to order the summary table using columns (keys): +*`r paste(gsub("^-(.+)","\\1",shinyscreen:::DEF_INDEX_SUMM), collapse = ',')`*. +The sequence of columns in the table below describes the +sequence of ordering steps -- the key in the first row sorts the +entire summary table and subsequent keys break the ties. + +</details> + +```{r, echo=F} +rhandsontable::rHandsontableOutput("order_summ") +``` + +</div> + +</div> + +## Report + +```{r, echo=F} +shiny::textInput(inputId = "rep_aut", label = "Report author", value = def_state$conf$report$author) +shiny::textInput(inputId = "rep_tit", label = "Report title", value = def_state$conf$report$title) +``` + +# View compound Lists and Sets {.tabset} + +## Compound List + +```{r, echo=F} +DT::dataTableOutput("comp_table") +``` + +## Setid Table +```{r, echo=F} +DT::dataTableOutput("setid_table") +``` + +# Save and Restore + +Shinyscreen can start from either a previously saved _state_ file (in +RDS format), or from a _YAML_ config file. States saved using GUI can +also be used from the script. + +```{r, echo=FALSE} +actionButton(inputId = "state_file_load_b", + label= "Restore project") + +``` + +```{r, echo=FALSE} +actionButton(inputId = "state_file_save_b", + label= "Save project") + +``` + +# Extract Data and Prescreen + +<details><summary>Extract spectra from data files.</summary> + +After Shinyscreen is configured, the compound and setid lists loaded, it +is possible to proceed with extracting the data. This is potentially a +time-intensive step, so some patience might be needed. + +Once the data is extracted, it will be possible to quality check the +spectra associated with the compounds specified in the _setid_ list, +to subset that data, look at the plots and publish a report. + +</details> +```{r, echo=FALSE} +actionButton(inputId = "extract_b", + label = "Extract") +``` + +<details><summary>Prescreen extracted spectra.</summary> + +After the data extraction is finished, the quality of the retrieved +spectra can be checked using the criteria defined in the +_Prescreening_ tab of the _Configuration_ Section. The resulting +_summary table_ is given below and is based on the prescreening +criteria and filter and ordering setup specified in _Filter and order +the summary table_ configuration subsection. + +</details> + +</details> + +<div style="display: flex; vertical-align: top;"> + +<div> +```{r, echo=FALSE} +actionButton(inputId = "presc_b", + label = "Prescreen") +``` +</div> + +<div> +```{r, echo=FALSE} +actionButton(inputId = "sortsubset_b", + label = "Sort and subset") +``` +</div> + +</div> + + +# Browse Results {.tabset} + +## Summary table +```{r, echo=FALSE} +DT::dataTableOutput("summ_table") +``` + +## Data Explorer + +### Grouping plots + +<details><summary>How to group plots?</summary> + +Plots are organised according to three categories: _plot group_, +_label group_ and _label_. Category _label_ represents a line in a +plot (for example, data from a specific file designated by _tag_ key +in the file table). Category _label group_ defines how different +labels are grouped together onto a graph (for continuing from the +previous example, if _label group_ is _ID_ then all _tag_s connected +to a single _ID_ are going to be plotted together on a graph). Lastly, +Different _label group_s are going to be organised into _plot group_s. + +Keys in different categores must be different. + +</details> + +<div style= "display: flex; vertical-align:top; "> + +<div style="padding-right:0.5em;"> +```{r, echo=F} +p_feats <- 1:length(PLOT_FEATURES) +names(p_feats) <- PLOT_FEATURES +selectInput(inputId = "plot_grp", + label = "Group plots", + choices = p_feats, + selected = p_feats[[shinyscreen:::FIG_DEF_CONF$grouping[["group"]]]], + width = "100%") +``` +</div> + +<div style="padding-right:0.5em;padding-left:0.5em;"> +```{r, echo=F} +p_feats <- 1:length(PLOT_FEATURES) +names(p_feats) <- PLOT_FEATURES +selectInput(inputId = "plot_grp_plot", + label = "Group labels", + choices = p_feats, + selected = p_feats[[shinyscreen:::FIG_DEF_CONF$grouping[["plot"]]]], + width = "100%") +``` +</div> + +<div style="padding-left:0.5em;"> +```{r, echo=F} +p_feats <- 1:length(PLOT_FEATURES) +names(p_feats) <- PLOT_FEATURES +selectInput(inputId = "plot_label", + label = "Label", + choices = p_feats, + selected = p_feats[[shinyscreen:::FIG_DEF_CONF$grouping[["label"]]]], + width = "100%") +``` +</div> + +</div> + + +### Plot selection + +```{r, echo=F} +DT::dataTableOutput("plot_sel") +``` + +Each plot supports zooming by selecting an area within the plot. The +zoom can be reset by double clicking on the plot surface. + +<!-- <div style="overflow-y:scroll;max-height:80vh; width: 100%; background: white;"> --> +```{r echo=F} +sidebarLayout(sidebarPanel(h5("Global RT range"), + shinyscreen::rt_input(input_rt = "plot_rt_min", + input_unit = "plot_rt_min_unit", + width=55, + width_u=40, + def_rt = "min", + def_unit = "min", + pref = ""), + + shinyscreen::rt_input(input_rt = "plot_rt_max", + input_unit = "plot_rt_max_unit", + width=55, + width_u=40, + def_rt = "max", + def_unit = "min", + pref = ""), + hr(), + h5("Log scale"), + checkboxGroupInput("plot_log", + label=NULL, + choices = c("MS1 EIC","MS2 EIC","MS2 Spectrum"), + selected = character(0)), + hr(), + uiOutput("plot_b_ctrl"), + h5("Pointer position"), + textOutput("plot_hover_out"), + width=4), + wellPanel(id = "tPanel",style = "overflow-y:scroll; max-height:80vh; background: white", + column(6, + plotOutput("plot_ms1_eic", + hover = hoverOpts(id="plot_hover", + delayType = "throttle", + delay=100), + dblclick = "plot_rt_click", + brush = "plot_brush"), + plotOutput("plot_ms2_eic", + hover = hoverOpts(id="plot_hover", + delayType = "throttle", + delay=100), + dblclick = "plot_rt_click", + brush = "plot_brush")), + column(6, + plotOutput("plot_struct"), + plotOutput("plot_ms2_spec", + hover = hoverOpts(id="plot_hover", + delayType = "throttle", + delay=100), + dblclick = "plot_mz_click", + brush = brushOpts(id="plot_mz_brush"))))) + + + +``` +<!-- </div> --> + +```{r, echo=F} + +``` + +<!-- ENGINE --> + +<!-- setup is here --> + +```{r, include="false", context='setup'} + +ord_nms <- gsub("^-(.+)","\\1",shinyscreen:::DEF_INDEX_SUMM) +ord_asc <- grepl("^-.+",shinyscreen:::DEF_INDEX_SUMM) +ord_asc <- factor(ifelse(ord_asc, "descending", "ascending"),levels = c("ascending","descending")) +def_ord_summ <- shinyscreen:::dtable("Column Name"=ord_nms,"Direction"=ord_asc) + +adapt_range <- function(fig,x_range=NULL) { + if (is.null(x_range)) fig else fig+coord_cartesian(xlim=x_range) +} + + +plot_boiler <- function(m,tab,row,plot_fun,rv_x_range,adapt_x_range=T) { + req(row) + plot_group <- m$conf$figures$grouping$group + plot_plot <- m$conf$figures$grouping$plot + ms1_plot <- m$out$tab$ms1_plot + ms1_plot <- m$out$tab$ms1_plot + req(row) + idx <- get_plot_idx(tab = tab, + plot_group = plot_group, + plot_plot = plot_plot, + row =row) + fig <- plot_fun(m=m,plot_index = idx) + x_range <- if (adapt_x_range) c(rv_x_range$min,rv_x_range$max) else NULL + adapt_range(fig,x_range=x_range) +} + + +get_plot_idx <- function(tab,plot_group,plot_plot,row) { + pg <- tab[row,..plot_group] + pp <- tab[row,..plot_plot] + res <- c(pg,pp) + names(res) <- c(plot_group,plot_plot) + res + +} + +update_gui <- function(in_conf, session) { + upd_unit <- function(entry,inp_val,inp_unit,choices) { + if (isTruthy(entry)) { + cntnt <- strsplit(entry,split = "[[:space:]]+")[[1]] + cntnt <- cntnt[nchar(cntnt) > 0] + if (length(cntnt)!=2) stop("(upd_unit) ","Unable to interpret ", entry) + val <- cntnt[[1]] + unit <- cntnt[[2]] + updateNumericInput(session = session, + inputId = inp_val, + value = as.numeric(val)) + updateSelectInput(session = session, + inputId = inp_unit, + selected = unit, + choices = choices) + } + } + + upd_num <- function(entry,inp_val) { + if (isTruthy(entry)) { + updateNumericInput(session = session, + inputId = inp_val, + value = as.numeric(entry)) + } + } + + upd_sel <- function(inputId,selected,choices) { + if (isTruthy(selected)) { + updateSelectInput(session = session, + inputId = inputId, + selected = selected, + choices = choices) + } + } + + isolate({ + rv_state$conf$project <- in_conf$project + rv_state$conf$data <- in_conf$data + ## Lists + rv_state$conf$compounds$lists <- in_conf$compounds$lists + rv_state$conf$compounds$sets <- in_conf$compounds$sets + + ## Tolerance + + upd_unit(in_conf$tolerance[["ms1 fine"]], + "ms1_fine", + "ms1_fine_unit", + choices=c("ppm","Da")) + upd_unit(in_conf$tolerance[["ms1 coarse"]], + "ms1_coarse", + "ms1_coarse_unit", + choices=c("ppm","Da")) + + upd_unit(in_conf$tolerance[["eic"]], + "ms1_eic", + "ms1_eic_unit", + choices=c("ppm","Da")) + upd_unit(in_conf$tolerance[["rt"]], + "ms1_rt_win", + "ms1_rt_win_unit", + choices=c("min","s")) + + ## Prescreen + upd_num(in_conf$prescreen[["ms1_int_thresh"]], + "ms1_int_thresh") + upd_num(in_conf$prescreen[["ms2_int_thresh"]], + "ms2_int_thresh") + upd_num(in_conf$prescreen[["s2n"]], + "s2n") + upd_unit(in_conf$prescreen[["ret_time_shift_tol"]], + "ret_time_shift_tol", + "ret_time_shift_tol_unit", + choices=c("min","s")) + + ## Files + df <- shinyscreen:::file2tab(in_conf$data) + df[,tag:=as.character(tag),with=T] + rv_dfile(df[,.(file,tag),by=c("file","tag"),mult="first"][,file:=NULL]) + nms <- colnames(df) + nms <- nms[nms!="file"] + fdt <- df[,..nms] + rv_datatab(fdt) + + ## figures + upd_unit(in_conf$figures$rt_min, + "plot_rt_min", + "plot_rt_min_unit", + choices=c("min","s")) + + upd_unit(in_conf$figures$rt_max, + "plot_rt_max", + "plot_rt_max_unit", + choices=c("min","s")) + + logentry <- in_conf$figures$logaxes + logchoice <- logical(0) + logchoice <- mapply(function(cn,uin) if (cn %in% logentry) uin else NA, + c("ms1_eic_int","ms2_eic_int","ms2_spec_int"), + c("MS1 EIC","MS2 EIC","MS2 Spectrum"),USE.NAMES = F) + logchoice <- logchoice[!is.na(logchoice)] + + updateCheckboxGroupInput(session = session, + inputId = "plot_log", + choices = c("MS1 EIC", + "MS2 EIC", + "MS2 Spectrum"), + selected = logchoice) + ## Report + if (isTruthy(in_conf$report$author)) updateTextInput(session,"rep_aut",value = in_conf$report$author) + if (isTruthy(in_conf$report$title)) updateTextInput(session,"rep_tit",value = in_conf$report$title) + + + }) +} + +``` + +```{r, include="false", context='server'} +## REACTIVE FUNCTIONS + +rf_compound_input_state <- reactive({ + sets <- rv_state$conf$compounds$sets + lst <- as.list(rv_state$conf$compounds$lists) + validate(need(length(lst)>0, + message = "Load the compound lists(s) first.")) + validate(need(length(sets)>0 && nchar(sets)>0, + message = "Load the setid table first.")) + isolate({ + state <- rev2list(rv_state) + m <- load_compound_input(state) + ## Side effect! This is because my pipeline logic does not + ## work nicely with reactive stuff. + rv_state$input$tab$cmpds <- list2rev(m$input$tab$cmpds) + rv_state$input$tab$setid <- m$input$tab$setid + m + }) +}) + +rf_conf_proj <- reactive({ + + state <- rev2list(rv_state) + dir.create(state$conf$project,showWarnings = F) + state + +}) + +rf_conf_state <- reactive({ + state <- rf_conf_proj() + ## mzml1 <- rf_get_inp_datatab() + ## mzml1[,`:=`(tag=as.character(tag), + ## set=as.character(set), + ## adduct=as.character(adduct))] + ## mzml2 <- rf_get_inp_datafiles() + + ## mzml <- mzml1[mzml2,on="tag"] + + ftab <- get_fn_ftab(state) + state$conf$data <- ftab + state$conf[["summary table"]]$filter <- rf_get_subset() + state$conf[["summary table"]]$order <- rf_get_order() + state +}) + +rf_get_subset <- reactive({ + input$summ_subset + dt <- tryCatch(rhandsontable::hot_to_r(input$summ_subset), + error = function(e) def_summ_subset) + dt[Select == shinyscreen:::SUBSET_VALS[["GOOD"]], extra := T] + dt[Select == shinyscreen:::SUBSET_VALS[["BAD"]], extra := F] + sdt <- dt[!is.na(extra)] + if (NROW(sdt) > 0) { + sdt[,paste0(`QA Column`," == ",extra)] + } else NULL +}) + +rf_get_order <- reactive({ + dt <- tryCatch(rhandsontable::hot_to_r(input$order_summ),error = function(e) def_ord_summ) + tmp <- dt[Direction == "descending",.(`Column Name`=paste0("-",`Column Name`))] + tmp[,`Column Name`] +}) + +rf_get_inp_datatab <- eventReactive(input$datatab,{ + z <- data.table::as.data.table(tryCatch(rhandsontable::hot_to_r(input$datatab)), + error = function(e) def_datatab) + + + z[,.(tag=as.character(tag), + adduct=as.character(adduct), + set=as.character(set)), with = T] +}) + +rf_get_inp_datafiles <- eventReactive(input$datafiles,{ + z <- data.table::as.data.table(tryCatch(rhandsontable::hot_to_r(input$datafiles)), + error = function(e) def_datafiles) + + + z[,.(file, + tag=as.character(tag)), with = T] +}) + +rf_summ_table_rows <- eventReactive(input$summ_table_rows_all,{ + input$summ_table_rows_all + +}) + +rf_gen_sel_plot_tab <- reactive({ + + + rv_state$out$tab$flt_summ + m <- rev2list(rv_state) + fltsumm <- m$out$tab$flt_summ + validate(need(NROW(fltsumm) > 0, + message = "Generate summary table first.")) + + rows <- rf_summ_table_rows() + + ## Reduce to currently selected rows. + m$out$tab$flt_summ <- m$out$tab$flt_summ[rows,] + gen_key_plot_tab(m) + +}) + +rf_rtrange_from_data <- eventReactive(input$plot_sel_cell_clicked,{ + ## Determine the plotting RT range. + + ptab <- rf_gen_sel_plot_tab() + plot_group <- rv_state$conf$figures$grouping$group + plot_plot <- rv_state$conf$figures$grouping$plot + ms1_plot <- rv_state$out$tab$ms1_plot + ms2_plot <- rv_state$out$tab$ms2_plot + row <- input$plot_sel_cell_clicked[["row"]] + req(row) + sel_grp <- ptab[row,.SD,.SDcols=c(plot_group,plot_plot)] + mpl1 <- ms1_plot[sel_grp,on=c(plot_group,plot_plot),nomatch=NULL] + mpl2 <- ms2_plot[sel_grp,on=c(plot_group,plot_plot),nomatch=NULL] + + res <- shinyscreen:::get_rt_interval(mpl1$fig_eic[[1]]$data,mpl2$fig_eic[[1]]$data,rv_state$conf$figures) + res + + +}) + + +rf_gen_struct_figs <- eventReactive(rv_state$out$tab$comp,gen_struct_plots(rv_state)) +``` + +<!-- OBSERVERS --> + +```{r, include="false", context='server'} + +observeEvent(input$setid_b, { + filters <- matrix(c("CSV files", ".csv", + "All files", "*"), + 2, 2, byrow = TRUE) + setids <- tcltk::tk_choose.files(filters=filters) + message("(config) Selected compound sets (setid): ", paste(setids,collapse = ",")) + rv_state$conf$compounds$sets <- if (length(setids)>0 && nchar(setids[[1]])>0) setids else "Nothing selected." +}) + +observeEvent(input$project_b,{ + wd <- tcltk::tk_choose.dir(default = getwd(), + caption = "Choose project directory") + message("Set project dir to ", wd) + dir.create(wd,recursive = T,showWarnings = F) + rv_state$conf$project <- wd +}) + +observeEvent(input$comp_list_b, { + filters <- matrix(c("CSV files", ".csv", + "All files", "*"), + 2, 2, byrow = TRUE) + compfiles <- tcltk::tk_choose.files(filters=filters) + message("(config) Selected compound lists: ", paste(compfiles,collapse = ",")) + rv_state$conf$compounds$lists <- if (length(compfiles)>0 && nchar(compfiles[[1]])>0) compfiles else "Nothing selected." +}) + +observeEvent(input$datafiles_b,{ + filters <- matrix(c("mzML files", ".mzML", + "All files", "*"), + 2, 2, byrow = TRUE) + fns <- tcltk::tk_choose.files(filters=filters) + message("(config) Selected data files: ", paste(fns,collapse = ",")) + ## Did the user choose any files? + if (length(fns) > 0) { + oldtab <- rf_get_inp_datafiles() + + newf <- setdiff(fns,oldtab$file) + nr <- NROW(oldtab) + tmp <- if (length(newf)>0) shinyscreen:::dtable(file=newf,tag=paste0('F',(nr+1):(nr + length(newf)))) else shinyscreen:::dtable(file=character(),tag=character()) + + z <- rbind(oldtab, tmp) + z[,tag:=as.character(tag)] + rv_dfile(z) + } +}) + +observe({ + df_tab <- rf_get_inp_datafiles() + state <- rf_compound_input_state() + isolate(oldtab <- rf_get_inp_datatab()) + + oldt <- oldtab$tag + tagl <- df_tab$tag + diff <- setdiff(tagl, + oldt) + + res <- if (length(diff)!=0) { + ## Only change the tag names in the old ones. + pos_tag <- 1:length(tagl) + pos_old <- 1:NROW(oldtab) + pos_mod <- intersect(pos_tag,pos_old) + new_tag <- tagl[pos_mod] + if (NROW(oldtab)>0) oldtab[pos_mod,tag := ..new_tag] + + ## Now add tags for completely new files, if any. + rest_new <- if (NROW(oldtab) > 0) setdiff(diff,new_tag) else diff + tmp <- shinyscreen:::dtable(tag=rest_new, + adduct=character(0), + set=character(0)) + + dt <-data.table::as.data.table(rbind(as.data.frame(oldtab), + as.data.frame(tmp))) + dt[tag %in% df_tab$tag,] + } else oldtab + + rv_datatab(res) +}) + +observe({ + mc <- rf_conf_state() + rv_state$conf <- mc$conf +}, label = "conf_state") + +observe({ + dtab <- rv_datatab() + dfiles <- rv_dfile() + message("(config) Generating mzml from rv.") + isolate(rv_state$input$tab$mzml <- dtab[dfiles,on="tag"]) + message("(config) Done generating mzml from rv.") + + +}, label = "mzml_from_rv") + +observe({ + dtab <- rf_get_inp_datatab() + dfiles <- rf_get_inp_datafiles() + + message("(config) Generating mzml from inputs.") + res <- dtab[dfiles,on="tag"] + isolate(rv_state$input$tab$mzml <- res) + message("(config) Generating mzml from inputs.") + + +}, label = "mzml_from_inp") + +observeEvent(input$extract_b,{ + m <- rf_conf_state() + fn_c_state <- file.path(m$conf$project, + paste0("extract.",shinyscreen:::FN_CONF)) + yaml::write_yaml(x=m$conf,file=fn_c_state) + message("(extract) Config written to ", fn_c_state) + state <- shinyscreen::run(m=m, + phases=c("setup", + "comptab", + "extract")) + message("(extract) Done extracting.") + z <- shinyscreen::merge2rev(rv_state,lst = state) + eval(z) +}) + +observeEvent(input$presc_b,{ + validate(need(NROW(rv_state$extr$ms1) > 0, + message = "Perform extraction first.")) + m <- rev2list(rv_state) + + fn_c_state <- file.path(m$conf$project, + paste0("presc.",shinyscreen:::FN_CONF)) + yaml::write_yaml(x=m$conf,file=fn_c_state) + message("(prescreen) Config written to ", fn_c_state) + state <- shinyscreen::run(m=m, + phases=c("prescreen")) + message("(prescreen) Done prescreening.") + + z <- shinyscreen::merge2rev(rv_state,lst = state) + eval(z) + + +}) + +observeEvent(input$sortsubset_b,{ + m <- rev2list(rv_state) + + fn_c_state <- file.path(m$conf$project, + paste0("sortsubset.",shinyscreen:::FN_CONF)) + yaml::write_yaml(x=m$conf,file=fn_c_state) + message("(sortsubset) Config written to ", fn_c_state) + state <- shinyscreen::run(m=m, + phases=c("sort", + "subset")) + message("(sortsubset) Done with sorting and subsetting.") + + z <- shinyscreen::merge2rev(rv_state,lst = state) + eval(z) + + +}) + +observeEvent(input$plot_b,{ + validate(need(NROW(rv_state$out$tab$flt_summ) > 0, + message = "Perform prescreening first.")) + m <- rev2list(rv_state) + + fn_c_state <- file.path(m$conf$project, + paste0("genplot.",shinyscreen:::FN_CONF)) + yaml::write_yaml(x=m$conf,file=fn_c_state) + message("(generate plots) Config written to ", fn_c_state) + state <- shinyscreen::run(m=m, + phases=c("plot")) + message("(generate plots) Done generating plots.") + + z <- shinyscreen::merge2rev(rv_state,lst = state) + eval(z) +}) + +observeEvent(input$state_file_load_b,{ + filters <- matrix(c("RDS files", ".rds", + "YAML config files", ".yaml", + "All files", "*"), + 3, 2, byrow = TRUE) + fn <- tcltk::tk_choose.files(filters=filters, + multi = F) + + message("(config) Loading state from: ", paste(fn,collapse = ",")) + fn <- if (length(fn)>0 && nchar(fn[[1]])>0) fn else "" + + if (nchar(fn) > 0) { + if (grepl("yaml",fn)) { + state <- new_state_fn_conf(fn) + conf <- state$conf + update_gui(conf,session=session) + } else { + state <- readRDS(file=fn) + z <- shinyscreen::merge2rev(rv_state,lst = state) + eval(z) + update_gui(rv_state$conf, session=session) + } + + } +}) + +observeEvent(input$state_file_save_b,{ + filters <- matrix(c("RDS files", ".rds", + "All files", "*"), + 2, 2, byrow = TRUE) + fn <- tk_save_file(filters=filters, + default = "state.rds") + message("(config) Saving state to: ", paste(fn,collapse = ",")) + fn <- if (length(fn)>0 && nchar(fn[[1]])>0) fn else "" + + if (nchar(fn) > 0) { + m <- rev2list(rv_state) + ftab <- get_fn_ftab(m) + fconf <- get_fn_conf(m) + yaml::write_yaml(m$conf, + file = fconf) + shinyscreen:::tab2file(tab=m$input$tab$mzml,file=ftab) + m$conf$data <- ftab + + + saveRDS(object=m,file=fn) + } +}) + +observe({ + res <- rf_rtrange_from_data() + rv_rtrange$min <- res[[1]] + rv_rtrange$max <- res[[2]] +},label = "rt_from_data") + +observeEvent(input$plot_brush,{ + xmin <- input$plot_brush[["xmin"]] + xmax <- input$plot_brush[["xmax"]] + if (!is.null(xmin)) rv_rtrange$min <- xmin + if (!is.null(xmin)) rv_rtrange$max <- xmax + session$resetBrush("plot_brush") + +}) + +observeEvent(input$plot_rt_click, +{ + res <- rf_rtrange_from_data() + rv_rtrange$min <- res[[1]] + rv_rtrange$max <- res[[2]] +}) + +observeEvent(input$plot_mz_click, +{ + rv_mzrange$min <- NA + rv_mzrange$max <- NA +}) + + +observeEvent(rv_state$out$tab$comp,{ + m <- gen_struct_plots(rev2list(rv_state)) + rv_state$out$tab$structfig <- m$out$tab$structfig +}, label = "gen_struct_plots") + +observeEvent(input$plot_mz_brush,{ + xmin <- input$plot_mz_brush[["xmin"]] + xmax <- input$plot_mz_brush[["xmax"]] + if (!is.null(xmin)) rv_mzrange$min <- xmin + if (!is.null(xmin)) rv_mzrange$max <- xmax + session$resetBrush("plot_mz_brush") + +}) + +``` + +<!-- Tolerance --> +```{r, include='false', context = 'server'} +uni_ass <- function(val,unit) { + paste(input[[val]], + input[[unit]]) +} +observe({ + + rv_state$conf$tolerance[["ms1 fine"]] <- uni_ass("ms1_fine", + "ms1_fine_unit") + + rv_state$conf$tolerance[["ms1 coarse"]] <- uni_ass("ms1_coarse", + "ms1_coarse_unit") + + rv_state$conf$tolerance[["eic"]] <- uni_ass("ms1_eic", + "ms1_eic_unit") + + rv_state$conf$tolerance[["rt"]] <- uni_ass("ms1_rt_win", + "ms1_rt_win_unit") + + +}) +``` + +<!-- Prescreen --> +```{r, include='false', context = 'server'} +## uni_ass <- function(val,unit) { +## paste(input[[val]], +## input[[unit]]) +## } +observe({ + + rv_state$conf$prescreen[["ms1_int_thresh"]] <- input[["ms1_int_thresh"]] + rv_state$conf$prescreen[["ms2_int_thresh"]] <- input[["ms2_int_thresh"]] + rv_state$conf$prescreen[["s2n"]] <- input$s2n + rv_state$conf$prescreen[["ret_time_shift_tol"]] <- uni_ass("ret_time_shift_tol", + "ret_time_shift_tol_unit") + +}) +``` +<!-- Plotting --> +```{r, include='false', context = 'server'} +observe({ + plot_group <- PLOT_FEATURES[[as.integer(input$plot_grp)]] + plot_plot <- PLOT_FEATURES[[as.integer(input$plot_grp_plot)]] + plot_label <-PLOT_FEATURES[[as.integer(input$plot_label)]] + isolate({ + rv_state$conf$figures$grouping$group <- plot_group + rv_state$conf$figures$grouping$plot <- plot_plot + rv_state$conf$figures$grouping$label <- plot_label + }) + +}, label = "plot-grouping") +observe({ + vals <- input$plot_log + checked <- c("MS1 EIC"=F, + "MS2 EIC"=F, + "MS2 Spectrum"=F) + if (length(vals)!=0) checked[vals] <- T + l <- list() + l <- c(if (checked[["MS1 EIC"]]) "ms1_eic_int" else NULL,l) + l <- c(if (checked[["MS2 EIC"]]) "ms2_eic_int" else NULL,l) + l <- c(if (checked[["MS2 Spectrum"]]) "ms2_spec_int" else NULL,l) + rv_state$conf$figures[["logaxes"]] <- l[!sapply(l,is.null)] + + rv_state$conf$figures$rt_min <- uni_ass("plot_rt_min","plot_rt_min_unit") + rv_state$conf$figures$rt_max <- uni_ass("plot_rt_max","plot_rt_max_unit") +}) +``` +<!-- Report --> +```{r, include='false', context = 'server'} +observe({ + rv_state$conf$report$author <- input$rep_aut + rv_state$conf$report$title <- input$rep_tit +}) +``` + +<!-- RENDER --> +```{r, include="false", context="server"} +output$project <- renderText(rv_state$conf$project) + +output$comp_lists <- renderText({ + lsts <- rev2list(rv_state$conf$compounds$lists) + if (length(lsts) > 0 && + isTruthy(lsts) && + lsts != "Nothing selected.") { + paste(c("<ul>", + sapply(lsts, + function (x) paste("<li>",x,"</li>")), + "</ul>")) + } else "No compound list selected yet." +}) + +output$setids <- renderText({ + sets <- rv_state$conf$compounds$sets + if (isTruthy(sets) && sets != "Nothing selected.") + paste("selected <em>setid</em> table:", + sets) else "No <em>setid</em> table selected." +}) + +output$order_summ <- rhandsontable::renderRHandsontable(rhandsontable::rhandsontable(def_ord_summ, + manualRowMove = T)) + +output$datafiles <- rhandsontable::renderRHandsontable( +{ + res <- rv_dfile() + rhandsontable::rhandsontable(as.data.frame(res), + width = "50%", + height = "25%", + allowInvalid=F) +}) + +output$datatab <- rhandsontable::renderRHandsontable( +{ + setid <- rv_state$input$tab$setid + res <- rv_datatab() + + if (NROW(res)>0) { + res$tag <- factor(res$tag, + levels = c(unique(res$tag), + "invalid")) + res$set <- factor(res$set, + levels = c(unique(setid$set), + "invalid")) + res$adduct <- factor(res$adduct, + levels = shinyscreen:::DISP_ADDUCTS) + } + + + rhandsontable::rhandsontable(res,stretchH="all", + allowInvalid=F) +}) + +output$comp_table <- DT::renderDataTable({ + state <- rf_compound_input_state() + + + DT::datatable(state$input$tab$cmpds, + style = 'bootstrap', + class = 'table-condensed', + extensions = 'Scroller', + options = list(scrollX = T, + scrollY = 200, + deferRender = T, + scroller = T)) +}) + +output$setid_table <- DT::renderDataTable({ + state <- rf_compound_input_state() + + DT::datatable(state$input$tab$setid, + style = 'bootstrap', + class = 'table-condensed', + extensions = 'Scroller', + options = list(scrollX = T, + scrollY = 200, + deferRender = T, + scroller = T)) +}) + +output$summ_subset <- rhandsontable::renderRHandsontable({ + + + rhandsontable::rhandsontable(def_summ_subset) +}) + +output$summ_table <- DT::renderDataTable({ + + + tab <- rv_state$out$tab$flt_summ + nms <- colnames(tab) + dpl_nms <- nms[nms!="file"] + validate(need(NROW(tab)>0, message = "Please prescreen the data first.")) + DT::datatable(tab[,..dpl_nms], + style = 'bootstrap', + class = 'table-condensed', + extensions = 'Scroller', + options = list(scrollX = T, + scrollY = 200, + deferRender = T, + scroller = T)) +}) + + +output$plot_sel <- DT::renderDataTable({ + tab <- rf_gen_sel_plot_tab() + DT::datatable(tab, + style = 'bootstrap', + class = 'table-condensed', + extensions = 'Scroller', + selection = 'single', + options = list(scrollX = T, + scrollY = 200, + deferRender = T, + scroller = T)) +}) + + +output$plot_b_ctrl <- renderUI({ + tab <- rv_state$out$tab$flt_summ + req(NROW(tab)>0) + actionButton(inputId = "plot_b", + label= "Save all plots") + +}) + +output$plot_ms1_eic <- renderPlot({ + plot_boiler(m=rv_state, + tab=rf_gen_sel_plot_tab(), + row=input$plot_sel_cell_clicked[["row"]], + plot_fun=plot_ms1_chr, + rv_x_range=rv_rtrange) +}) + +output$plot_ms2_eic <- renderPlot({ + plot_boiler(m=rv_state, + tab=rf_gen_sel_plot_tab(), + row=input$plot_sel_cell_clicked[["row"]], + plot_fun=plot_ms2_chr, + rv_x_range=rv_rtrange) + +}) + + + +output$plot_ms2_spec <- renderPlot({ + plot_boiler(m=rv_state, + tab=rf_gen_sel_plot_tab(), + row=input$plot_sel_cell_clicked[["row"]], + plot_fun=plot_ms2_spec, + rv_x_range=rv_mzrange) + +}) + +output$plot_struct <- renderPlot({ + req(NROW(rv_state$out$tab$structfig)>0) + message("plot struuuct") + plot_boiler(m=rv_state, + tab=rf_gen_sel_plot_tab(), + row=input$plot_sel_cell_clicked[["row"]], + plot_fun=plot_struct, + adapt_x_range=F) + +}) + +output$plot_hover_out <- renderText({ + inp1 <- input$plot_hover[[1]] + inp2 <- input$plot_hover[[2]] + res <- if (all(!(c(is.null(inp1),is.null(inp2))))) { + paste0('(', + format(inp1,digits=5), + ',', + format(inp2,digits=2,scientific=T), + ')') + } else "Currently not in the plot." + +}) + + +``` + + +```{r, echo=F, context = 'server'} +session$onSessionEnded(function () stopApp()) +``` diff --git a/man/RMB_EIC_prescreen_df.Rd b/man/RMB_EIC_prescreen_df.Rd deleted file mode 100644 index 13016b13aeb3f850bf6dad2fabda3cd1eedee151..0000000000000000000000000000000000000000 --- a/man/RMB_EIC_prescreen_df.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mix.R -\name{RMB_EIC_prescreen_df} -\alias{RMB_EIC_prescreen_df} -\title{Prescreen} -\usage{ -RMB_EIC_prescreen_df(wd, RMB_mode, FileList, cmpd_list, - ppm_limit_fine = 10, EIC_limit = 0.001) -} -\arguments{ -\item{wd}{Absolute path to the directory that will contain the -resulting data frame.} - -\item{RMB_mode}{...} - -\item{FileList}{...} - -\item{cmpd_list}{...} - -\item{ppm_limit_fine}{...} - -\item{EIC_limit}{...} -} -\description{ -Prescreens. Writes data out. Adapted from ReSOLUTION -} -\author{ -Emma Schymanski, Todor Kondić -} diff --git a/man/attch.Rd b/man/attch.Rd deleted file mode 100644 index d752dbd55280620a463cccf385688d9ff0fe2255..0000000000000000000000000000000000000000 --- a/man/attch.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/run.R -\name{attch} -\alias{attch} -\title{Paste With No Separator} -\usage{ -attch(...) -} -\arguments{ -\item{...}{Strings to paste together.} -} -\value{ -Pasted string. -} -\description{ -Paste with no separator. -} -\author{ -Todor Kondić -} diff --git a/man/gen_cmpd_l.Rd b/man/gen_cmpd_l.Rd deleted file mode 100644 index e2a69af6910b934079dd85aa43e3763f50a6ff5e..0000000000000000000000000000000000000000 --- a/man/gen_cmpd_l.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mix.R -\name{gen_cmpd_l} -\alias{gen_cmpd_l} -\title{Generate Compound List File} -\usage{ -gen_cmpd_l(src_fn, dest_fn) -} -\arguments{ -\item{src_fn}{The input compound list CSV filename.} - -\item{dest_fn}{The resulting compound list CSV filename.} -} -\value{ -Number of compounds. -} -\description{ -Generate the RMassBank compound list from the input compound list -in CSV file src_fn. The input compound list format is either a -Chemical Dashboard csv file with, at least, PREFERRED_ SMILES -columns \emph{filled} out, or just an ordinary CSV file with columns -SMILES and Names filled. Argument dest_fn is the destination -filename. Returns the number of compounds. -} -\author{ -Todor Kondić -} diff --git a/man/gen_cmpdl_and_load.Rd b/man/gen_cmpdl_and_load.Rd deleted file mode 100644 index a782c7bc8c0b4c9138ac02f3733d4e45af5a204a..0000000000000000000000000000000000000000 --- a/man/gen_cmpdl_and_load.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mix.R -\name{gen_cmpdl_and_load} -\alias{gen_cmpdl_and_load} -\title{Generate and Load the RMassBank Compound List} -\usage{ -gen_cmpdl_and_load(wd, fn_cmpdl) -} -\arguments{ -\item{wd}{Directory under which results are archived.} - -\item{fn_cmpdl}{The input compound list filename.} -} -\value{ -Named list. The key \code{fn_cmpdl} is the path of the -generated compound list and the key \code{n} the number of -compounds. -} -\description{ -Generates the RMassBank compound list and loads it. -} -\author{ -Todor Kondić -} diff --git a/man/gen_ftable.Rd b/man/gen_ftable.Rd deleted file mode 100644 index de5f27d08217170cf6b89b75d3dcb34741dd56ec..0000000000000000000000000000000000000000 --- a/man/gen_ftable.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mix.R -\name{gen_ftable} -\alias{gen_ftable} -\title{Generate and Load the RMassBank Settings File} -\usage{ -gen_ftable(fn_data, wd, n_cmpd) -} -\arguments{ -\item{fn_data}{The mzML filename.} - -\item{wd}{Directory under which results are archived.} - -\item{n_cmpd}{Number of compounds.} -} -\value{ -File path of the file table. -} -\description{ -Generates file table. -} -\author{ -Todor Kondić -} diff --git a/man/gen_stgs_and_load.Rd b/man/gen_stgs_and_load.Rd deleted file mode 100644 index 1fb26fcadaa0cd354710dcbf389120cdfd1913d9..0000000000000000000000000000000000000000 --- a/man/gen_stgs_and_load.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mix.R -\name{gen_stgs_and_load} -\alias{gen_stgs_and_load} -\title{Generate and Load the RMassBank Settings File} -\usage{ -gen_stgs_and_load(stgs, wd) -} -\arguments{ -\item{stgs}{Settings named list, or a settings filename.} - -\item{wd}{Directory under which results are archived.} -} -\value{ -result of RMassBank::loadRmbSettings -} -\description{ -Generates settings file and loads it. -} -\author{ -Todor Kondić -} diff --git a/man/mk_combine_file.Rd b/man/mk_combine_file.Rd deleted file mode 100644 index 9c02d2cfc77c04d850667107850af02b2c7e4e67..0000000000000000000000000000000000000000 --- a/man/mk_combine_file.Rd +++ /dev/null @@ -1,26 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mix.R -\name{mk_combine_file} -\alias{mk_combine_file} -\title{Combine RMB Settings With Different Collisional Energies} -\usage{ -mk_combine_file(sett_fns, fname) -} -\arguments{ -\item{sett_fns}{A list of settings files.} - -\item{fname}{The name of the combined file.} -} -\value{ -fname -} -\description{ -Combine the RMB settings files -} -\details{ -Combine RMB settings with different collisional energies into one -settings file with multiple collisional energy entries. -} -\author{ -Todor Kondić -} diff --git a/man/mk_sett_file.Rd b/man/mk_sett_file.Rd deleted file mode 100644 index cbf885acf6429be534d34326454305720c5054e4..0000000000000000000000000000000000000000 --- a/man/mk_sett_file.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mix.R -\name{mk_sett_file} -\alias{mk_sett_file} -\title{Generate RMassBank settings file.} -\usage{ -mk_sett_file(sett_alist, file) -} -\arguments{ -\item{sett_alist}{The named list of settings that are different -from the RMassBank defaults.} - -\item{file}{The name of the YAML specification that will be merged -with the template Rmb settings file.} -} -\description{ -Produce the Rmb Settings file -} -\details{ -Produce the Rmb Settings file based on the customisation file in -YAML format. -} diff --git a/man/no_drama_mkdir.Rd b/man/no_drama_mkdir.Rd deleted file mode 100644 index c33d17dca9fe4c8aad8577c798f3294c3915f057..0000000000000000000000000000000000000000 --- a/man/no_drama_mkdir.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mix.R -\name{no_drama_mkdir} -\alias{no_drama_mkdir} -\title{Create directories without drama} -\usage{ -no_drama_mkdir(path) -} -\arguments{ -\item{path}{Names of the directories.} -} -\value{ -The character string containing the input argument \code{path}. -} -\description{ -Create directories without drama. -} -\details{ -Create directories without drama. -} -\author{ -Todor Kondić -} diff --git a/man/presc.do.Rd b/man/presc.do.Rd deleted file mode 100644 index 162677c29787fcad7eeb80ecc9a1eb03550f3636..0000000000000000000000000000000000000000 --- a/man/presc.do.Rd +++ /dev/null @@ -1,31 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/run.R -\name{presc.do} -\alias{presc.do} -\title{Prescreening on bunch of files.} -\usage{ -presc.do(fn_data, fn_cmpd_l, mode, dest = ".", proc = F, ...) -} -\arguments{ -\item{fn_data}{The mzML files. Basis for the out directory name -generation.} - -\item{fn_cmpd_l}{The compound list.} - -\item{mode}{RMB mode.} - -\item{dest}{Destination directory.} - -\item{proc}{Amount of processors, or FALSE.} - -\item{fn_cmpd_list}{The compound list CSV.} -} -\value{ -Nothing useful. -} -\description{ -Do the prescreening. -} -\author{ -Todor Kondić -} diff --git a/man/presc.plot.Rd b/man/presc.plot.Rd deleted file mode 100644 index fb9939d6d477215c8bd9efb4d6436dbfe3c7aa8c..0000000000000000000000000000000000000000 --- a/man/presc.plot.Rd +++ /dev/null @@ -1,43 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mix.R -\name{presc.plot} -\alias{presc.plot} -\title{Plot the Output of Prescreen} -\usage{ -presc.plot(prescdf, mode, out = "prescreen.pdf", fn_cmpd_l, - pal = "Dark2", cex = 0.75, rt_digits = 2, m_digits = 4) -} -\arguments{ -\item{prescdf}{File table data-frame. See presc.shiny for details.} - -\item{mode}{RMB mode.} - -\item{out}{The name of the output file.} - -\item{fn_cmpd_l}{The compound list name.} - -\item{pal}{ColorBrewer palette name.} - -\item{cex}{As in legend.} - -\item{rt_digits}{Number of digits after the point for the retention time.} - -\item{m_digits}{Number of digits after the point for the mass.} - -\item{wd}{Sequence of data dirs containing the prescreen subdir.} - -\item{digits}{Number of significant digits for peak ret times.} -} -\value{ -Nothing useful. -} -\description{ -Plot the output of prescreen. -} -\author{ -Todor Kondić - -Mira Narayanan - -Anjana Elapavalore -} diff --git a/man/presc.shiny.Rd b/man/presc.shiny.Rd deleted file mode 100644 index d2f0e5d163fa35d128aa7205e1de7951f30f0244..0000000000000000000000000000000000000000 --- a/man/presc.shiny.Rd +++ /dev/null @@ -1,41 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mix.R -\name{presc.shiny} -\alias{presc.shiny} -\title{Prescreening with Shiny} -\usage{ -presc.shiny(prescdf, mode, fn_cmpd_l, pal = "Dark2", cex = 0.75, - rt_digits = 2, m_digits = 4) -} -\arguments{ -\item{prescdf}{File table data-frame. Columns: Files,ID,wd,tag,set_name ...} - -\item{mode}{RMassBank mode.} - -\item{fn_cmpd_l}{Compound list file name.} - -\item{pal}{ColorBrewer palette.} - -\item{cex}{Size of fonts.} - -\item{rt_digits}{Number of decimal places for the retention time.} - -\item{m_digits}{Number of decimal places for the mass.} -} -\value{ -Nothing useful. -} -\description{ -Prescreening using shiny interface. -} -\author{ -Jessy Krier - -Mira Narayanan - -Hiba Mohammed Taha - -Anjana Elapavalore - -Todor Kondić -} diff --git a/man/rendersmiles2.Rd b/man/rendersmiles2.Rd deleted file mode 100644 index f9bf7abeb3d1f8c8e56eeac1e2b503fccf8f6976..0000000000000000000000000000000000000000 --- a/man/rendersmiles2.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mix.R -\name{rendersmiles2} -\alias{rendersmiles2} -\title{Turn SMILES to an Image Using Online Resource} -\usage{ -rendersmiles2(smiles, style = "cow", ...) -} -\arguments{ -\item{smiles}{The SMILES string.} - -\item{style}{Structure style.} - -\item{...}{Hand over to renderurl.} -} -\value{ -Nothing useful. -} -\description{ -Render smiles from an online resource. -} -\author{ -Todor Kondić -} diff --git a/man/renderurl.Rd b/man/renderurl.Rd deleted file mode 100644 index b1a143f46a0f5ba9655608ae19b03b45155be6d3..0000000000000000000000000000000000000000 --- a/man/renderurl.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mix.R -\name{renderurl} -\alias{renderurl} -\title{Render Compound from an Online Resource} -\usage{ -renderurl(depictURL, coords = c(0, 0, 100, 100), - filename = tempfile(fileext = ".svg")) -} -\arguments{ -\item{depictURL}{The URL of the object to plot.} - -\item{coords}{The positioning of the image (in data coords).} - -\item{filename}{Temp filename.} -} -\value{ -Nothing useful. -} -\description{ -Helper function for rendersmiles2 -} -\author{ -Todor Kondić -} diff --git a/man/sort_spectra.Rd b/man/sort_spectra.Rd new file mode 100644 index 0000000000000000000000000000000000000000..49954c4ac3954785862b6844d18aece0bd8095cf --- /dev/null +++ b/man/sort_spectra.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/api.R +\name{sort_spectra} +\alias{sort_spectra} +\title{Sort the Summary Table} +\usage{ +sort_spectra(m) +} +\arguments{ +\item{m}{} +} +\value{ +m +} +\description{ +Sets the key specified by DEF_KEY_SUMM and adds second indices, +either from DEF_INDEX_SUMM, or user-specified in +conf[\link{"summary table"}]$order. The order entry is a list of +strings with names of columns in summ, optionally prefixed with a +minus(-) sign. Columns prefixed with the minus are going to be in +ascending order. +} +\author{ +Todor Kondić +} diff --git a/man/subset_summary.Rd b/man/subset_summary.Rd new file mode 100644 index 0000000000000000000000000000000000000000..45deb6e014a31c3a9dd969d36e3c56cfec1f9f41 --- /dev/null +++ b/man/subset_summary.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/api.R +\name{subset_summary} +\alias{subset_summary} +\title{Subset the Summary Table} +\usage{ +subset_summary(m) +} +\arguments{ +\item{m}{} +} +\value{ +m +} +\description{ +Subsets the summary table by applying conditions set out in the +filter subkey of summary table key of the config. Each member of +filter is an expression that and all of them are chained together +using AND logical operation and applied to the summary table. +} +\author{ +Todor Kondić +} diff --git a/shiny-lessons.org b/shiny-lessons.org new file mode 100644 index 0000000000000000000000000000000000000000..51bcf2793a54a62fb8dd89010b6ce2e6ac64fb6b --- /dev/null +++ b/shiny-lessons.org @@ -0,0 +1,40 @@ +#+TITLE: Empirical Notes on Shiny + + +- It used to be better for Shinyscreen to retain the bare minimum of + observers and keep as much of functionality as possible inside + reactive functions. The fluidity of UI experience was reported to be + better. Probably the logic of the application got better, too. But, + going full reactive can sometimes be exhausting. These days, we + build some stuff inside observers. All in moderation, though. + +- Assignment to a reactive value does not add a dependency on that + value. + +- A reactive values list is not adding a dependency on all its + members. Not even if it was nested inside some other reactive values + list. + +- An ordinary list inside reactive values list, if used, will add + dependency on all its members. Even if only some of its members are + used, the dependency is on all the members. + +- Recursive dependencies can get ugly. If really needed, do not + depend, but use isolate. + +- Do *NOT* change the value of things you depend on. This creates + infinite loops. It may even work, but will be sloooow. + +- Single output produced by any input from a group of inputs: separate + observers are needed do differentiate between which of the inputs + have been triggered. + +- The only way to detect if the button press was acted upon in a + reactive expression that depends on multiple inputs is to raise a + flag, or write the state of the button when done. + +- As development progresses, keep an observer that transforms + shinyscreen mess into the logics of the workflow. Then move out as + much of this as possible into reactive functions. Example, + Shinyscreen underlying script flow was first emulated by an + observer.