diff --git a/R/api.R b/R/api.R index 9b54c1eec8fbcddab9d4f9ae2c071fa747b74bfd..385e7b19e22b6585f599a969bb7b0f30d13a7529 100644 --- a/R/api.R +++ b/R/api.R @@ -49,14 +49,14 @@ load_compound_input <- function(m) { for (l in 1:length(fns)) { fn <- fns[[l]] fnfields <- colnames(fn) - dt <- file2tab(fn) + dt <- file2tab(fn, colClasses=c(ID="character")) verify_cmpd_l(dt=dt,fn=fn) - nonexist <- setdiff(colnames,fields) - coll[[l]] <- dt[,(nonexist) := NULL] + nonexist <- setdiff(fnfields,fields) + coll[[l]] <- if (length(nonexist)==0) dt else dt[,(nonexist) := NULL] coll[[l]]$ORIG <- fn } - cmpds <- if (length(fns)>0) rbindlist(l=coll,use.names = T, fill = T) else EMPTY_CMPD_LIST + cmpds <- if (length(fns)>0) rbindlist(l=c(list(EMPTY_CMPD_LIST), coll), use.names = T, fill = T) else EMPTY_CMPD_LIST cmpds[,("known"):=.(the_ifelse(!is.na(SMILES),"structure",the_ifelse(!is.na(Formula),"formula","mz")))] m$input$tab$cmpds <- cmpds m$input$tab$setid <- read_setid(m$conf$compounds$sets, @@ -80,40 +80,34 @@ load_inputs <- function(m) { ##' @export mk_comp_tab <- function(m) { - message("Started assembling the lists of knowns and unknowns into the `comprehensive' table.") setid <- m$input$tab$setid setkey(setid,set) mzml<- m$input$tab$mzml setkey(mzml,set) - unk<-m$input$tab$unknown - setkey(unk,ID) - known<-m$input$tab$known - setkey(known,ID) + cmpds<-m$input$tab$cmpds + setkey(cmpds,ID) mzml[,`:=`(wd=sapply(Files,add_wd_to_mzml,m$conf$project))] - assert(xor(nrow(unk)==0,nrow(known)==0),msg="No compound lists have been provided. At least one of the known, or unknown compound lists is required.") - message("Begin generation of comp table.") - ## knowns - setidKnown<- mzml[setid[origin %in% "known"],.(tag,adduct,ID,set,Files,wd),on="set",allow.cartesian=T] - tab2file(tab=setidKnown,file="setidKnown.csv") - compKnown <- known[setidKnown,on=c("ID"),allow.cartesian=T] - setkey(compKnown,set,ID) - tab2file(tab=compKnown,file="compKnown.csv") - compKnown[,`:=`(mz=mapply(get_mz_from_smiles,SMILES,adduct,USE.NAMES = F))] - message("Generation of comp table: knowns done.") - ## unknows - setidUnk<-mzml[setid[origin %in% "unknown"],.(tag,adduct,ID,set,Files,wd),on="set",allow.cartesian=T] - compUnk <- unk[setidUnk,on="ID"] - message("Generation of comp table: unknowns done.") - df<-rbindlist(l=list(compKnown, compUnk),fill = T) - setnames(df,names(COMP_NAME_MAP), + assert(nrow(cmpds)>0,msg="No compound lists have been provided.") + message("Begin generation of the comprehensive table.") + + comp <- cmpds[setid,on="ID"][mzml,.(tag,adduct,ID,RT,set,Name,Files,wd,SMILES,Formula,mz,known),on="set",allow.cartesian=T] + tab2file(tab=comp,file=paste0("setidmerge",".csv")) + setkey(comp,known,set,ID) + + ## Known structure. + ## comp[,`:=`(mz=mapply(calc_mz_from_smiles,SMILES,adduct,ID,USE.NAMES = F))] + comp[known=="structure",`:=`(mz=calc_mz_from_smiles(SMILES,adduct,ID))] + + ## Known formula. + comp[known=="formula",`:=`(mz=calc_mz_from_formula(Formula,adduct,ID))] + setnames(comp,names(COMP_NAME_MAP), function(o) COMP_NAME_MAP[[o]]) - setcolorder(df,COMP_NAME_FIRST) + setcolorder(comp,COMP_NAME_FIRST) fn_out <- file.path(m$conf$project,FN_COMP_TAB) - - tab2file(tab=df,file=fn_out) + tab2file(tab=comp,file=fn_out) message("Generation of comp table finished.") - setkeyv(df,c("set","tag","mz")) - m$out$tab$comp <- df + setkeyv(comp,c("set","tag","mz")) + m$out$tab$comp <- comp m } diff --git a/R/mix.R b/R/mix.R index 29384e988554e19da43fb281690ff4a7cd3e7273..01003efbedcdb966e250592b0a7659d605b20b92 100644 --- a/R/mix.R +++ b/R/mix.R @@ -31,7 +31,7 @@ get_mz_cmp_l<-function(id,adduct,cmpL) { res } -calc_mz_from_formula <- function(chform,adduct,id) { +calc_mz_from_formula_outer <- function(chform,adduct,id) { check_chform <- enviPat::check_chemform(ISOTOPES,chform) wind <- which(check_chform$warning) if (length(wind) > 0) stop("Cannot understand the following formulas: ", @@ -76,12 +76,50 @@ calc_mz_from_formula <- function(chform,adduct,id) { dt } +calc_mz_from_formula <- function(chform,adduct,id) { + check_chform <- enviPat::check_chemform(ISOTOPES,chform) + wind <- which(check_chform$warning) + if (length(wind) > 0) stop("Cannot understand the following formulas: ", + paste(check_chform$new_formula[wind],collapse = ",")) + mol_form <- check_chform$new_formula + uad <- unique(adduct) + uadds <- lapply(uad,function(a) ADDUCTS[Name==a,.(Name, + add=as.character(Formula_add), + ded=as.character(Formula_ded), + charge=Charge),on=""]) + names(uadds) <- uad + adds <- rbindlist(l=lapply(adduct,function(a) uadds[[a]])) + + merger <- function (mol_form,add,ded) { + res <- numeric(length(mol_form)) + both_ind <- which(add != 'FALSE' & ded != 'FALSE') + add_only_ind <- which(add != 'FALSE' & ded == 'FALSE') + ded_only_ind <- which(ded != 'FALSE' & add == 'FALSE') + ainds <- c(both_ind,add_only_ind) + res[ainds] <- vapply(ainds,function (i) enviPat::mergeform(mol_form[[i]],add[[i]]),FUN.VALUE = character(1), USE.NAMES = F) + dinds <- c(both_ind,ded_only_ind) + res[dinds] <- vapply(dinds,function (i) { + z <- check_ded2(mol_form[[i]],ded[[i]]) + if (z) enviPat::subform(mol_form[[i]],ded[[i]]) else NA_character_ + }, + FUN.VALUE = character(1)) + res + } + forms <- merger(mol_form,adds$add,adds$ded) + mz <- the_ifelse(!is.na(forms), + mapply(function(ff,ch) enviPat::isopattern(ISOTOPES,chemforms = ff, + charge = ch, verbose = F)[[1]][1], + forms, + adds$charge, USE.NAMES = F), + NA_real_) + mz +} + calc_mz_from_smiles <- function(smiles,adduct,id) { - mol <- try(getMolecule(smiles), silent = T) mol <- lapply(smiles,function(s) try(RMassBank::getMolecule(s), silent = T)) check <- which(is.atomic(mol)) if (length(check) > 0) - stop("Errors in SMILES with IDs:",paste(id[which],collapse = T)) + stop("Errors in SMILES with IDs:",paste(id[which],collapse = ',')) mol_form <- sapply(mol,function(x) (rcdk::get.mol2formula(x))@string,USE.NAMES = F) names(mol_form) <- id @@ -90,6 +128,19 @@ calc_mz_from_smiles <- function(smiles,adduct,id) { } +calc_mz_from_smiles_outer <- function(smiles,adduct,id) { + mol <- lapply(smiles,function(s) try(RMassBank::getMolecule(s), silent = T)) + check <- which(is.atomic(mol)) + if (length(check) > 0) + stop("Errors in SMILES with IDs:",paste(id[which],collapse = ',')) + + mol_form <- sapply(mol,function(x) (rcdk::get.mol2formula(x))@string,USE.NAMES = F) + names(mol_form) <- id + calc_mz_from_formula_outer(mol_form,adduct,id) + + +} + @@ -98,15 +149,7 @@ calc_mz_from_smiles <- function(smiles,adduct,id) { ## } -get_mz_from_smiles <- function(smiles,Formula,mz,adduct,id) { - mapply(function (sm,frm,mz) { - if (!is.na(sm)) { - RChemMass::getSuspectMasses(smiles=sm,adduct_list = adduct) - } else if (!is.na(frm)) { - RChemMass::getAdductMassesFromFormula - } }) - RChemMass::getSuspectFormulaMass(smiles)[[ADDUCTMAP[[adduct]]]] -} + get_col_from_cmp_l<-function(id,cname,cmpL) { ind<-match(id,cmpL$ID) @@ -642,7 +685,7 @@ vald_comp_tab<-function(df,ndf,checkSMILES=F,checkMz=F,checkNames=F) { read_setid <- function(fn,cmpds) { assert(file.exists(fn),msg=paste("Please provide valid compounds set table:", fn)) assert(nrow(cmpds) > 0,msg="Please provide at least one compounds list.") - setid <- file2tab(fn) + setid <- file2tab(fn,colClasses=c(ID="character")) x<-cmpds[setid,on='ID'][,.SD,.SDcols=c(colnames(setid),'known')] sids <- unique(setid$ID) @@ -685,8 +728,8 @@ verify_cmpd_l <- function(dt,fn) { msg = paste('Compound list from ',fn, 'does not contain any of "SMILES", "Formula", or "mz". \nThe compound list needs at least one of those to be valid.')) exst <- ess[pres] - x <- lapply(exst,function (nm) all(is.na(dt[[nm]]))) - assert(!all(x), msg = paste('At least one of', paste(exst,collapse = T), + x <- lapply(exst,function (nm) do.call(all,as.list(is.na(dt[[nm]])))) + assert(!do.call(all,x), msg = paste('At least one of', paste(exst,collapse = ','), '\nmust contain some values in compound list from',fn)) invisible(T) diff --git a/R/resources.R b/R/resources.R index 9bb4fc9e0f779ae2b5ae87f796bba227ec4f231b..741a46b82bf471ce71f740f70f2041fec6c79510 100644 --- a/R/resources.R +++ b/R/resources.R @@ -39,15 +39,16 @@ data(isotopes,package = "enviPat", envir = .envp) ADDUCTS <- dtable(.envp$adducts) ISOTOPES <- dtable(.envp$isotopes) .envp <- NULL -ADDUCTMAP <- RChemMass:::adducts$Name -names(ADDUCTMAP) <- apply(ADDUCTS,1,function(row) { - nm <- row[["Name"]] - sgn <- row[["Charge"]] - suff <- if (sgn > 0) "+" else if (sgn < 0) "-" else "" - paste0("[",nm,"]",suff) -}) - -DISP_ADDUCTMAP <- c(c("UNSET"="UNSET_ADDUCT_ERROR"),ADDUCTMAP) +ADDUCTMAP <- ADDUCTS$Name +ADDUCTS$Name <- the_ifelse(ADDUCTS$Charge>0,paste0("[",ADDUCTS$Name,"]+"),paste0("[",ADDUCTS$Name,"]-")) +## names(ADDUCTMAP) <- apply(ADDUCTS,1,function(row) { +## nm <- row[["Name"]] +## sgn <- row[["Charge"]] +## suff <- if (sgn > 0) "+" else if (sgn < 0) "-" else "" +## paste0("[",nm,"]",suff) +## }) +## ADDUCTS$Name <- names(ADDUCTMAP) +DISP_ADDUCTS <- c("UNSET",ADDUCTS$Name) TAG_NA <- "::UNSET::" SET_NA <- "::UNSET::" TAG_DEF <- TAG_NA @@ -134,12 +135,11 @@ EMPTY_CMPD_LIST <- dtable(ID=character(), COMP_LIST_COLS <- c("ID","Name","SMILES","Formula","RT","mz") ## Comprehensive table properties COMP_NAME_MAP <- list(RT="rt") -## COMP_NAMES <-c("ID","mz","rt","adduct","set","origin","Name","SMILES") -COMP_NAME_FIRST <- c("ID","mz","rt","adduct","tag","set","Name","SMILES","Files","wd") +COMP_NAME_FIRST <- c("ID","mz","rt","adduct","tag","set","Name","known","SMILES","Formula","Files","wd") ## File table properties FTAB_KEY=c("set","tag","mz") -FTAB_NAMES=c("ID", "mz", "rt", "tag", "adduct", "set", "Name", "SMILES", "Files" , "wd","origin") +FTAB_NAMES=c("ID", "mz", "rt", "tag", "adduct", "set", "Name", "SMILES", "Files" , "wd","known") EMPTY_UNKNOWN <- dtable(ID=character(0),mz=numeric(0),RT=numeric(0),Name=character(0),CAS=character(0))