diff --git a/R/api.R b/R/api.R index deac8818f130421ec946b79afb65649460a68c0e..c70cfc6f77c41387728ff88fd1fb24349afe87c5 100644 --- a/R/api.R +++ b/R/api.R @@ -13,7 +13,7 @@ ## limitations under the License. - +##' @export run <- function(fn_conf) { conf <- read_conf(fn_conf) dir.create(conf$project, @@ -21,7 +21,7 @@ run <- function(fn_conf) { recursive = T) withr::with_dir(new=conf$project,code = run_in_dir(conf)) - conf + return() } @@ -29,122 +29,66 @@ run <- function(fn_conf) { run_in_dir <- function(conf) { m <- load_inputs(conf) m <- mk_comp_tab(m) + m <- gen_base_tab(m) + stop() m } +##' @export +gen_base_ftab <- function(m) { + files <- add_wd_to_mzml(df=m$input$tab$mzml,wdir=m$conf$project) + df <- gen_sup_ftab(files,m$out$tab$comp) + tab2file(df,file.path(m$conf$project,FN_FTAB_BASE)) + m$out$tab$ftab <- df +} + ##' @export load_inputs <- function(conf) { m<-list() m$conf <- conf m$input$tab$mzml <- file2tab(m$conf$data) m$input$tab$known <- file2tab(m$conf$compounds$known) - if (shiny::isTruthy(m$input$tab$unknown)) m$input$tab$unknown <- file2tab(m$conf$compounds$unknown) - m$input$tab$setid <- read_setid(m$conf$compounds$sets,m$input$tab$known,m$input$tab$unknown)#file2tab(m$conf$compounds$sets) + m$input$tab$unknown <- if (shiny::isTruthy(m$input$tab$unknown)) { + file2tab(m$conf$compounds$unknown) + } else EMPTY_UNK + m$input$tab$setid <- read_setid(m$conf$compounds$sets, + m$input$tab$known, + m$input$tab$unknown) m } - - mk_comp_tab <- function(m) { message("Started assembling the lists of knowns and unknowns into the `comprehensive' table.") setid <- m$input$tab$setid - mzML<- m$input$tab$mzml + setkey(setid,set,ID) + mzml<- m$input$tab$mzml + setkey(mzml,set) unk<-m$input$tab$unknown + setkey(unk,ID) known<-m$input$tab$known - assertthat::assert_that(xor(is.null(unk),is.null(known)),msg="No compound lists have been provided. At least one of the known, or unknown compound lists is required.") + setkey(known,ID) + assertthat::assert_that(xor(nrow(unk)==0,nrow(known)==0),msg="No compound lists have been provided. At least one of the known, or unknown compound lists is required.") message("Begin generation of comp table.") - idKnown<-known$ID - idUnk<-unk$ID ## knowns - setidKnown<- setid[origin=="known",] - sets<-setid[origin=="known",unique(set)] - nRow<-0 - for (s in sets) { - sMode<-get_set_mode(s,mzML) - n<-length(sMode) - nRow<-nRow+n*length(which(setidKnown$set %in% s)) - - } - compKnown<-dtable( - ID=rep(0,nRow), - mz=rep(0.0,nRow), - rt=rep(NA,nRow), - mode=rep("",nRow), - set=rep("",nRow), - origin=rep("known",nRow), - Name=rep("",nRow), - SMILES=rep("",nRow)) - - i<-1 - for (s in sets) { - sMode<-get_set_mode(s,mzML) - - - for (md in sMode) { - for (id in setidKnown[set == s,ID]) { - compKnown[i,"ID"]<-id - compKnown[i,"mode"]<-md - compKnown[i,"set"]<-s - compKnown[i,"mz"]<-get_mz_cmp_l(id,md,known) - sm<-get_col_from_cmp_l(id,"SMILES",known) - nm<-get_col_from_cmp_l(id,"Name",known) - rt<-get_col_from_cmp_l(id,"rt",known) - compKnown[i,"SMILES"]<-sm - compKnown[i,"Name"]<-nm - compKnown[i,"rt"]<-rt - i<-i+1 - } - - } - } + setidKnown<- merge(mzml[,.(mode,set)],setid[origin=="known",],allow.cartesian = T) + compKnown <- setidKnown[known,on="ID"] + compKnown[,`:=`(mz=mapply(get_mz_from_smiles,SMILES,mode,USE.NAMES = F))] message("Generation of comp table: knowns done.") - ## unknows - setidUnk<-setid[origin=="unknown",] - sets<-setid[origin=="unknown",unique(set)] - nRow<-0 - for (s in sets) { - sMode<-get_set_mode(s,mzML) - n<-length(sMode) - if (n>1) stop("Set of unknowns ",s,"has more than one mode. Sets of unknowns cannot have more than one mode.") - - nRow<-nRow+length(which(setidUnk$set %in% s)) - - } - compUnk<-dtable( - ID=rep(0,nRow), - mz=rep(0.0,nRow), - rt=rep(NA,nRow), - mode=rep("",nRow), - set=rep("",nRow), - origin=rep("unknown",nRow), - Name=rep("",nRow), - SMILES=rep("",nRow), - stringsAsFactors=F) - - - i<-1 - for (s in sets) { - md<-get_set_mode(s,mzML) - for (id in setidUnk[ set == s, ID]) { - compUnk[i,"ID"]<-id - compUnk[i,"mode"]<-md - compUnk[i,"set"]<-s - compUnk[i,"mz"]<-get_col_from_cmp_l(id,"mz",unk) - nm<-get_col_from_cmp_l(id,"Name",unk) - rt<-get_col_from_cmp_l(id,"rt",unk) - compUnk[i,"Name"]<-nm - compUnk[i,"rt"]<-rt - i<-i+1 - } - } + ## unknows + setidUnk<-merge(mzml[,.(mode,set)],setid[origin=="unknown",],allow.cartesian = T) + compUnk <- setidUnk[unk,on="ID"] message("Generation of comp table: unknowns done.") - df<-rbindlist(l=list(compKnown, compUnk)) + df<-rbindlist(l=list(compKnown, compUnk),fill = T) + setnames(df,names(COMP_NAME_MAP), + function(o) COMP_NAME_MAP[[o]]) fn_out <- file.path(m$conf$project,FN_COMP_TAB) + tab2file(tab=df,file=fn_out) message("Generation of comp table finished.") + setkeyv(df,c("set","tag","mz")) m$out$tab$comp <- df m } @@ -157,11 +101,12 @@ read_conf <- function(fn_conf) { conf } +##' @export vrfy_conf <- function(conf) { ## * Existence of input files - ## ** Data files - for (fn in unlist(conf$data,recursive=T)) assertthat::assert_that(file.exists(fn),msg=paste("Unable to read data file:",fn)) + + fn_cmpd_known <- conf$compounds$known fn_cmpd_unk <- conf$compounds$unknown fn_cmpd_sets <- conf$compounds$sets @@ -207,28 +152,4 @@ vrfy_conf <- function(conf) { return(conf) } -mk_mzml_tab <- function(data) { - files <- unlist(data,recursive = T) - sets <- unique(names(data)) - tags <- c() - for (s in sets) { - tags<-c(tags,names(data[[s]])) - } - tags<-unique(tags) - nr<-length(files) - z<-suppressWarnings(data.table::data.table(Files=character(nr), - mode=factor(levels = names(MODEMAP)), - set=factor(levels = sets), - tag=factor(levels= c(TAG_DEF,tags)), - stringsAsFactors = F)) - z$Files <- files - i <- 1 - for (s in names(data)) { - for (t in names(data[[s]])) { - z[i,"set"] <- s - z[i,"tag"]<-t - i<-i+1 - } - } - z -} + diff --git a/R/base.R b/R/base.R index 085abf3ff0ec132e49ea9201a3a4aeb3b2377990..e45bfa7e8b1cd81cd04dd654df2dbcbbd9e4414c 100644 --- a/R/base.R +++ b/R/base.R @@ -16,6 +16,7 @@ ##' @import data.table ## Redirections the_ifelse <- data.table::fifelse +dtable <- data.table::data.table tab2file<-function(tab,file,...) { data.table::fwrite(x=tab,file=file,...) diff --git a/R/extraction.R b/R/extraction.R index e3ba5207691988d8aec309fb41593525bc6a7ee0..3f49abbc4d27a706efa6a888f28be01def975bb1 100644 --- a/R/extraction.R +++ b/R/extraction.R @@ -267,20 +267,6 @@ gen_ms1_chrom_ht<-function(raw,mz,errEIC,rt=NULL,errRT=NULL) { res } - - -tab2file<-function(tab,file,...) { - write.csv(x=tab,file=file,row.names=F,...) -} - -file2tab<-function(file,stringsAsFactors=F,comment.char='',...) { - read.csv(file=file, - header=T, - stringsAsFactors=stringsAsFactors, - comment.char=comment.char, - na.strings=c("","NA"),...) -} - get_ext_width <- function(maxid) {as.integer(log10(maxid)+1)} id_fn_ext<-function(width,id) { formatC(as.numeric(id),width=width,flag=0) diff --git a/R/mix.R b/R/mix.R index 3a3094fbd2b76744c583ec5dacb3a6e668b86c43..74b70be980d44176d1dae7fee962426cad86b479 100644 --- a/R/mix.R +++ b/R/mix.R @@ -31,6 +31,10 @@ get_mz_cmp_l<-function(id,mode,cmpL) { res } +get_mz_from_smiles <- function(smiles,mode) { + RChemMass::getSuspectFormulaMass(smiles)[[MODEMAP[[mode]]]] +} + get_col_from_cmp_l<-function(id,cname,cmpL) { ind<-match(id,cmpL$ID) x<-cmpL[[cname]][[ind]] @@ -449,7 +453,7 @@ plot_id_msn <- function(ni, -adornmzMLTab<-function(df,projDir=getwd()) { +add_wd_to_mzml <- function(df,wdir) { pref<-df$set mask<-is.na(pref) drop<-df$files[mask] @@ -457,36 +461,16 @@ adornmzMLTab<-function(df,projDir=getwd()) { df<-df[!mask,] pref<-df$set wd<-basename(tools::file_path_sans_ext(df$Files)) - wd<-file.path(projDir,pref,wd) + wd<-file.path(wdir,pref,wd) df$wd<-wd df } -genSuprFileTab <- function(fileTab,compTab) { - genOne<-function(ids,fn) { - - K<-length(ids) - fTabRow<-fileTab[fileTab$Files == fn,] - cols<-lapply(names(fileTab),function(n) rep(fTabRow[[n]],K)) - names(cols)<-NULL - cols<-c(cols,list(ids)) - names(cols)<-c(names(fileTab),"ID") - df<-as.data.frame(cols,stringsAsFactors = F) - df - } - - tabs<-lapply(fileTab$Files,function(fn) - { - wh<-which(fileTab$Files==fn) - set<-fileTab$set[[wh]] - md<-fileTab$mode[[wh]] - sel<-(compTab$set %in% set) & (compTab$mode %in% md) - ids<-compTab$ID[sel] - genOne(ids,fn) - - }) - res<-do.call(rbind,tabs) - res +gen_sup_ftab <- function(ftab,ctab) { + df<-ctab[ftab,on=c("set","mode"),allow.cartesian=T] + setkeyv(df,cols=FTAB_KEY) + setcolorder(df,neworder = FTAB_NAMES) + df } getEntryFromComp<-function(entry,id,set,mode,compTab) { @@ -510,30 +494,30 @@ getEntryFromComp<-function(entry,id,set,mode,compTab) { res } -addCompColsToFileTbl<-function(ft,compTab) { - nR<-nrow(ft) - mzCol<-rep(NA,nR) - nmCol<-rep("",nR) - rtCol<-rep(NA,nR) +## add_comp_ftab <- function(ft,ctab) { +## nR<-nrow(ft) +## mzCol<-rep(NA,nR) +## nmCol<-rep("",nR) +## rtCol<-rep(NA,nR) - for (ir in 1:nR) { - id<-ft[ir,"ID"] - set<-ft[ir,"set"] - m<-ft[ir,"mode"] - entries<-getEntryFromComp(c("mz","Name","rt"),id,set,m,compTab) - mzCol[[ir]]<- entries[["mz"]] - nm<-entries[["Name"]] - nmCol[[ir]]<- if (!is.na(nm)) nm else "" - rtCol[[ir]]<- entries[["rt"]] - } - ft$mz<-mzCol - ft$Name<-nmCol - ft$rt<-rtCol - ft -} - -get_set_mode <- function(set,mzml) { - unique(mzml[set==..set,mode]) +## for (ir in 1:nR) { +## id<-ft[ir,"ID"] +## set<-ft[ir,"set"] +## m<-ft[ir,"mode"] +## entries<-getEntryFromComp(c("mz","Name","rt"),id,set,m,ctab) +## mzCol[[ir]]<- entries[["mz"]] +## nm<-entries[["Name"]] +## nmCol[[ir]]<- if (!is.na(nm)) nm else "" +## rtCol[[ir]]<- entries[["rt"]] +## } +## ft$mz<-mzCol +## ft$Name<-nmCol +## ft$rt<-rtCol +## ft +## } + +get_set_mode <- function(s,mzml) { + unique(mzml[set == s,mode]) } vald_comp_tab<-function(df,ndf,checkSMILES=F,checkMz=F,checkNames=F) { @@ -597,3 +581,14 @@ vald_comp_tab<-function(df,ndf,checkSMILES=F,checkMz=F,checkNames=F) { df } + +read_setid <- function(fn,known,unk) { + setid <- file2tab(fn) + id_k <- known$ID + id_u <- unk$ID + tmp <- setid[,.(ID,set,origin=the_ifelse(ID %in% id_k,"known",NA_character_))] + tmp <- tmp[,.(ID,set,origin=the_ifelse(is.na(origin) & ID %in% id_u,"unknown",origin))] + natmp <- tmp[is.na(origin),.(ID,set)] + assertthat::assert_that(nrow(natmp)==0,msg=paste("The following IDs from set table have not been found in the compound table:","------",print_table(natmp),"------",sep = "\n")) + tmp +} diff --git a/R/resources.R b/R/resources.R index 7a8d8933f9412c32f32a6a28d767b5cfd0060904..dea508d5525c73a91370fd57856457c7aae679d8 100644 --- a/R/resources.R +++ b/R/resources.R @@ -98,3 +98,14 @@ MS1_SN_FAC <- 3.0 ## Shiny objects NUM_INP_WIDTH="15%" + +## Comprehensive table properties +COMP_NAME_MAP <- list(RT="rt") +# COMP_NAMES <-c("ID","mz","rt","mode","set","origin","Name","SMILES") + +## File table properties +FTAB_KEY=c("set","tag","mz") +FTAB_NAMES=c("ID", "mz", "rt", "tag", "mode", "set", "Name", "SMILES", "Files" , "wd","origin") + + +EMPTY_UNK <- data.table(ID=character(0),mz=numeric(0),RT=numeric(0),Name=character(0),CAS=character(0))