Newer
Older
## Constants
FN_FTAB_BASE<-"ftable.base.csv"
FN_FTAB_PP<-"ftable.pp.csv"
FN_PP_OUT_PREF<-"PP.filetable"
FN_FTAB<-"ftable.csv"
MODEMAP=list(pH="MpHp_mass",
mH="MmHm_mass",
pNH4="MpNH4_mass",
pNa="MpNa_mass")
DEFAULT_RT_RANGE=c(NA,NA)
QANAMES <- c("MS1","MS2","Alignment","AboveNoise")
PLOT_DEF_TAGS<-NA
PLOT_DEF_SET<-NA
ppInpFt<-function() {
tempfile(pattern=FN_PP_OUT_PREF,fileext=".csv")
}
stripext<-function(fn) {
bits<-strsplit(fn,split="\\.")[[1]]
if (length(bits)> 1) paste(head(bits,-1),collapse=".") else fn}
idsFromFiles<-function(setDir) {
fls<-list.files(path=setDir,patt=".*eic.*csv",rec=T)
bas<-basename(fls)
res<-strsplit(bas,"\\.")
sapply(res,function (r) as.integer(r[[1]]))
}
##' Create directories without drama.
##'
##'
##' @title Create directories without drama
##' @return The character string containing the input argument `path`.
##' @author Todor Kondić
no_drama_mkdir<-function(path) {
f <- Vectorize(function(path) {
if (! dir.exists(path)) dir.create(path)
path},vectorize.args="path")
f(path)
}
##' Produce the Rmb Settings file
##'
##' Produce the Rmb Settings file based on the customisation file in
##' YAML format.
##'
##' @title Generate RMassBank settings file.
##' @param sett_alist The named list of settings that are different
##' from the RMassBank defaults.
##' @param file The name of the YAML specification that will be merged
##' with the template Rmb settings file.
##' @return NULL
mk_sett_file<-function(sett_alist,file) {
tmp<-tempfile()
RMassBank::RmbSettingsTemplate(tmp)
for (nm in names(sett_alist)) {
sett[[nm]]<-sett_alist[[nm]]
}
##' Combine RMB settings with different collisional energies into one
##' settings file with multiple collisional energy entries.
##' @title Combine RMB Settings With Different Collisional Energies
##' @param sett_fns A list of settings files.
##' @param fname The name of the combined file.
##' @return fname
##' @author Todor Kondić
mk_combine_file<-function(sett_fns,fname) {
all_settings <- lapply(sett_fns,yaml::yaml.load_file)
comb_settings <- all_settings[[1]]
for (n in 1:length(all_settings)) {
comb_settings$spectraList[[n]] <- all_settings[[n]]$spectraList[[1]]
}
yaml::write_yaml(x=comb_settings,fname)
fname
}
fn_data2wd <- function(fn_data,dest) {
f <- Vectorize(function(fn_data) {
noext <- stripext(fn_data)
file.path(dest,basename(noext))
},vectorize.args="fn_data")
f(fn_data)
}
gen_presc_d <- function(wd) dir.create(wd,recursive = T,showWarnings = F)
get_cmpd_l_fn <- function(wd) {
f <- function(wd) file.path(wd,"compounds.csv")
fv <- Vectorize(f,vectorize.args=c("wd"))
fv(wd)
}
get_stgs_fn <- function(wd) {
f <- function(wd) file.path(wd,"settings.ini")
fv <- Vectorize(f,vectorize.args=c("wd"))
fv(wd)
}
get_ftable_fn <- function(wd) {
fv <- Vectorize(f,vectorize.args=c("wd"))
fv(wd)
}
get_inp_stgs_fn<- function(fn_data) {
f <- Vectorize(function(fn_data) {
bnm <- stripext(fn_data)
fn <- paste(bnm,".ini",sep='')},
vectorize.args="fn_data")
f(fn_data)}
get_info_dir <- function(wd) {
file.path(wd,"info")
}
get_info_fn <- function(wd) {
file.path(get_info_dir(wd),"info.csv")
}
gen_info_dir <- function(wd) {
nm <- get_info_dir(wd)
no_drama_mkdir(nm)
nm
}
emptyfield <- function (f) {length(f) == 0 | is.na(f) | f == ""}
##' Generate the RMassBank compound list from the input compound list
##' in CSV file src_fn. The input compound list format is either a
##' Chemical Dashboard csv file with, at least, PREFERRED_ SMILES
##' columns _filled_ out, or just an ordinary CSV file with columns
##' SMILES and Names filled. Argument dest_fn is the destination
##' filename. Returns the number of compounds.
##'
##' @title Generate Compound List File
##' @param src_fn The input compound list CSV filename.
##' @param dest_fn The resulting compound list CSV filename.
##' @return Number of compounds.
##' @author Todor Kondić
df<-read.csv(src_fn,sep=',',stringsAsFactors=F,comment.char='')
## Names
nms<-if ("PREFERRED_NAME" %in% names(df)) df$PREFERRED_NAME else df$Name
## CAS
casvals<-if ("CASRN" %in% names(df)) df$CASRN else df$CAS
## CAS
rt<- df$RT
if (is.null(casvals)) casvals <- rep(NA,sz)
if (is.null(nms)) nms <- rep(NA,nrow(df))
if (is.null(rt)) rt <- rep(NA,nrow(df))
odf <- data.frame(ID=df$ID,Name=nms,SMILES="",mz=NA,RT=rt,Level=3,CAS=casvals,stringsAsFactors=F)
for (ri in 1:nrow(df)) {
if (emptyfield(df$SMILES[ri])) {
if (! emptyfield(df$Mass[ri])) {
odf$mz[ri] <- df$Mass[ri]
odf$Level[ri] <- 5
} else
stop ("At row ",ri," of the input compound list, there are neither SMILES, nor Mass to be found.")
} else odf$SMILES[ri] <- df$SMILES[ri]
}
##' Generates settings file and loads it.
##'
##' @title Generate and Load the RMassBank Settings File
##' @param wd Directory under which results are archived.
##' @return result of RMassBank::loadRmbSettings
##' @author Todor Kondić
stgs<-if (is.character(stgs)) yaml::yaml.load_file(stgs) else stgs
}
##' Generates the RMassBank compound list and loads it.
##'
##' @title Generate and Load the RMassBank Compound List
##' @param wd Directory under which results are archived.
##' @return Named list. The key `fn_cmpdl` is the path of the
##' generated compound list and the key `n` the number of
##' compounds.
##' @author Todor Kondić
gen_cmpdl_and_load <- function(wd,fn_cmpdl) {
fn_comp<-get_cmpd_l_fn(wd)
n_cmpd<-gen_cmpd_l(fn_cmpdl,fn_comp)
RMassBank::loadList(fn_comp,check=F) #reduce universality of this statement!!!
list(fn_cmpdl=fn_comp,n=n_cmpd)
}
##' Generates file table.
##'
##'
##' @title Generate and Load the RMassBank Settings File
##' @param fn_data The mzML filename.
##' @param wd Directory under which results are archived.
f <- Vectorize(function(fn_data,wd) {
df_table<-data.frame(Files=rep(fn_data,n_cmpd),ID=1:n_cmpd)
fn_table<-get_ftable_fn(wd)
write.csv(x=df_table,file=fn_table,row.names=F)
fn_table
}, vectorize.args=c("fn_data","wd"))
f(fn_data,wd)
}
gen_ftable <- function(id,fnData,wd) {
n <- length(id)
files <- rep(fnData,n)
df <- data.frame(Files=files,ID=id,wd=wd,stringsAsFactors=F)
write.csv(x=df,file=get_ftable_fn(wd))
}
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
gen_fn_stgs <- function(fn_inp,fn) {
f <- Vectorize(function(fn_inp,fn) {
stgs <- yaml::yaml.load_file(fn_inp)
mk_sett_file(stgs,fn)
fn}, vectorize.args=c("fn_inp","fn"))
f(fn_inp,fn)
}
conf <- function(fn_data,fn_cmpd_l,dest) {
no_drama_mkdir(dest)
wd <- fn_data2wd(fn_data,dest)
no_drama_mkdir(wd)
fn_inp_stgs <- get_inp_stgs_fn(fn_data)
fn_stgs <- get_stgs_fn(wd)
fn_out_cmpd_l <- get_cmpd_l_fn(wd)
gen_fn_stgs(fn_inp_stgs,fn_stgs)
n_cmpd <- gen_cmpd_l(fn_cmpd_l,fn_out_cmpd_l)
gen_ftable(fn_data,wd,n_cmpd)
}
reconf <- function(wd) {## Load the settings.
fn_stgs <- get_stgs_fn(wd)
RMassBank::loadRmbSettings(fn_stgs)
## Load the compound list.
fn_cmpd_l <- get_cmpd_l_fn(wd)
RMassBank::loadList(fn_cmpd_l)
##' Prescreens. Writes data out. Adapted from ReSOLUTION
##'
##'
##' @title Prescreen
##' @param wd Absolute path to the directory that will contain the
##' resulting data frame.
##' @param RMB_mode ...
##' @param FileList ...
##' @param cmpd_list ...
##' @param ppm_limit_fine ...
##' @param EIC_limit ...
##' @author Emma Schymanski, Todor Kondić
RMB_EIC_prescreen_df_old <- function (wd, RMB_mode, FileList, cmpd_list,
ppm_limit_fine = 10, EIC_limit = 0.001) {
n_spec <- 0
cmpd_RT_maxI <- ""
msms_found <- ""
rts <- 0
max_I_prec <- ""
cmpd_RT_maxI_min <- ""
file_list <- read.csv(FileList, stringsAsFactors = FALSE,comment.char='')
cmpd_info <- read.csv(cmpd_list, stringsAsFactors = FALSE,comment.char='')
get_width <- function(maxid) {log10(maxid)+1}
id_field_width <- get_width(ncmpd)
fn_out<- function(id,suff) {file.path(odir,paste(formatC(id,width=id_field_width,flag=0),suff,".csv",sep=''))}
f <- mzR::openMSfile(file_list$Files[1])
for (i in 1:length(file_list$ID)) {
cpdID <- file_list$ID[i]
n_spec <- n_spec + 1
smiles <- tryCatch(RMassBank::findSmiles(cpdID), error = function(e) NA)
if (!is.na(smiles)) {
mz <- as.numeric(RMassBank::findMz(cpdID, RMB_mode)[3])
}
else {
mz <- as.numeric(RMassBank::findMz(cpdID, RMB_mode, retrieval = "unknown")[3])
}
eic <- RMassBank::findEIC(f, mz, limit = EIC_limit)
msms_found[n_spec] <- FALSE
msms <- RMassBank::findMsMsHR.mass(f, mz, 0.5, RMassBank::ppm(mz, ppm_limit_fine,
max_I_prec_index <- which.max(eic$intensity)
cmpd_RT_maxI[n_spec] <- eic[max_I_prec_index, 1]
max_I_prec[n_spec] <- eic[max_I_prec_index, 2]
cmpd_RT_maxI_min[n_spec] <- as.numeric(cmpd_RT_maxI[n_spec])/60
write.csv(x=eic[c("rt","intensity")],file=fn_out(cpdID,".eic"),row.names=F)
for (specs in msms) {
if (specs@found == TRUE) {
df <- do.call(rbind, lapply(specs@children, function(sp) c(sp@rt,
intensity = max(sp@intensity))))
if (nrow(cpd_df)>0) write.csv(x=cpd_df,file=fn_out(cpdID,".kids"),row.names=F)
write.csv(cbind(file_list$ID, cmpd_info$mz, cmpd_info$Name,
cmpd_RT_maxI, cmpd_RT_maxI_min, max_I_prec, msms_found),
file = file.path(odir,"RTs_wI.csv"),
row.names = F)
}
##' Prescreens. Writes data out. Adapted from ReSOLUTION
##'
##'
##' @title Prescreen
##' @param wd Absolute path to the directory that will contain the
##' resulting data frame.
##' @param RMB_mode ...
##' @param FileList ...
##' @param cmpd_list ...
##' @param ppm_limit_fine ...
##' @param EIC_limit ...
##' @author Emma Schymanski, Todor Kondić
RMB_EIC_prescreen_df <- function (wd, RMB_mode, FileList, cmpd_list,
ppm_limit_fine = 10, EIC_limit = 0.001) {
n_spec <- 0
cmpd_RT_maxI <- ""
msms_found <- ""
rts <- 0
max_I_prec <- ""
cmpd_RT_maxI_min <- ""
file_list <- read.csv(FileList, stringsAsFactors = FALSE,comment.char='')
cmpd_info <- read.csv(cmpd_list, stringsAsFactors = FALSE,comment.char='')
ncmpd <- nrow(cmpd_info)
odir=wd
fid <- file_list$ID
cmpind <- which(cmpd_info$ID %in% fid)
mzCol <- cmpd_info$mz[cmpind]
nmCol <- cmpd_info$Name[cmpind]
get_width <- function(maxid) {log10(maxid)+1}
id_field_width <- get_width(ncmpd)
fn_out<- function(id,suff) {file.path(odir,paste(formatC(id,width=id_field_width,flag=0),suff,".csv",sep=''))}
f <- mzR::openMSfile(file_list$Files[1])
for (i in 1:length(file_list$ID)) {
cpdID <- file_list$ID[i]
n_spec <- n_spec + 1
smiles <- tryCatch(RMassBank::findSmiles(cpdID), error = function(e) NA)
mz<-if (!is.na(smiles)) {
mz <- as.numeric(RMassBank::findMz(cpdID, RMB_mode)[3])
} else {
mzCol[[i]] ## TODOR REMOVE mz <- as.numeric(RMassBank::findMz(cpdID, RMB_mode, retrieval = "unknown")[3])
}
message("Pre findeic")
## TODOR REMOVED if (is.na(mzCol[[i]])) mzCol[[i]] <- mz ## infer from findMz.
msms_found[n_spec] <- FALSE
msms <- RMassBank::findMsMsHR.mass(f, mz, 0.5, RMassBank::ppm(mz, ppm_limit_fine,
p = TRUE))
message("here?")
max_I_prec_index <- which.max(eic$intensity)
cmpd_RT_maxI[n_spec] <- eic[max_I_prec_index, 1]
max_I_prec[n_spec] <- eic[max_I_prec_index, 2]
cmpd_RT_maxI_min[n_spec] <- as.numeric(cmpd_RT_maxI[n_spec])/60 ## conversion to minutes
if (length(eic$rt)>0) eic$rt <- eic$rt/60 ## conversion to minutes
write.csv(x=eic[c("rt","intensity")],file=fn_out(cpdID,".eic"),row.names=F)
bindKids <- function(kids)
do.call(rbind,lapply(kids,function (kid)
c(rt=kid@rt,intensity=max(kid@intensity))))
bindSpec <- function(specLst) {
do.call(rbind,lapply(specLst,function (sp) bindKids(sp@children)))
}
found <- which(vapply(msms,function(sp) sp@found,FUN.VALUE=F))
msmsExst <- msms[found]
## message("found:",found)
## message("Lall:",length(msms))
## message("Lsome:",length(msmsExst))
if (length(found)>0) {
msms_found[n_spec] <- T
msmsTab <- as.data.frame(bindSpec(msmsExst),stringsAsFactors=F)
names(msmsTab) <- c("rt","intensity")
if (nrow(msmsTab)>0) {
msmsTab$rt <- msmsTab$rt/60 ## conversion to minutes
write.csv(x=msmsTab,file=fn_out(cpdID,".kids"),row.names=F)
}
}
rts[i] <- (cmpd_RT_maxI[n_spec])
}
mzR::close(f)
rtwiDf <- data.frame(ID=file_list$ID, mz=mzCol, Name=nmCol,
cmpd_RT_maxI=cmpd_RT_maxI, cmpd_RT_maxI_min=cmpd_RT_maxI_min,
max_I_prec=max_I_prec, msms_found=msms_found,stringsAsFactors=F)
write.csv(rtwiDf, file = file.path(odir,"RTs_wI.csv"), row.names = F)
}
preProcLai <- function (fnFileTab,fnDest=paste(stripext(fnFileTab),"_candidate.csv",sep=''),noiseFac=3,rtDelta=0.5,ms1_intTresh=1e5,ms2_intTresh=1e4,MS1peakWi=0.3) {
ftable <- read.csv(file = fnFileTab, header = T, sep=",", stringsAsFactors = F,comment.char='')
id_field_width <- getWidth(max(ids))
fn_out<- function(id,suff) {paste(formatC(id,width=id_field_width,flag=0),suff,".csv",sep='')}
## for loop through dataframe called file to set tresholds
ftable[c("MS1","MS2","Alignment","MS1Intensity","AboveNoise","MS2Intensity","MS1peakWidth")] <- T
ftable$Comments <- ""
for (ind in 1:nrow(ftable)) {
wd <- ftable$wd[ind]
id <- ftable$ID[ind]
odir=file.path(wd)
fn_eic <- file.path(wd,fn_out(id,".eic"))
eic <- NULL
maxInt <- NULL
eicExists <- T
##does MS1 exist?
if(!file.exists(fn_eic)) {
eic <- read.csv(fn_eic, sep = ",", stringsAsFactors = F,comment.char='')
##is MS1 intensity high enough?
if (maxInt < ms1_intTresh) {
ftable[ind,"MS1Intensity"] = FALSE
##Detect noisy signal. This is a naive implementation, so careful.
mInt <- mean(eic$intensity)
if (maxInt < noiseFac*mInt) ftable[ind,"AboveNoise"] <- F
##Is MS1 peak a proper peak, or just a spike? Check peakwidth.
MS1peakWi
##does MS2 exist? Regardless of quality/alignment.
if(!file.exists(fn_kids)) {
ftable[ind,"MS2"] = FALSE
ftable[ind,"MS2Intensity"] = NA #moot
ftable[ind,"Alignment"] = NA #moot
## Detect RT shifts. Naive implementation, so careful.
if (eicExists) { ################WHY THIS IF CONDITIONAL?? If MS2 exists, then eic MUST exist, no? Seems redundant.
##Is MS2 intensity high enough?
ms2maxInt <- max(msms@intensity)
if (ms2maxInt > ms2_intTresh){
rtInd <- match(maxInt,eic$intensity) #returns position of first match in eic$intensity
rtMax <- eic$rt[rtInd] #fetch the rtmax value (RT with highest int;seconds) using above index
msms <- read.csv(fn_kids, sep = ",", stringsAsFactors = F,comment.char='')
whc <- msms$rt > rtMax - rtDelta #T/F vector: are RT vals of ms2 above rtMax within Delta? Good peaks=TRUE;rt must be retentionTime!!!
whc <- whc < rtMax + rtDelta #T/F vector: are RT vals of ms2 above rtMax within Delta?
#Overwrites whc! In any case, cannot use this as both must be T to give final whc of T (i.e. fall within the window).
ints <- msms$intensity[whc] #builds ints vector with intensities which fall in window
if (! any(ints>0)) ftable[ind,"Alignment"] = FALSE #if none of ints larger than 0, Alignment <-F
} else {
ftable[ind,"MS2Intensity"] = FALSE
ftable[ind,"Alignment"] = NA #neither T or F, the MS2 was not of decent intensity in first place, alignment moot.
}
}
}
}
## get a csv outfile
write.csv(ftable, file = fnDest,row.names=F)
}
preProc <- function (fnFileTab,lCmpdList,fnDest=paste(stripext(fnFileTab),"_candidate.csv",sep=''),noiseFac=3,rtDelta=0.5,intTresh=1e5) {
ftable <- read.csv(file = fnFileTab, header = T, sep=",", stringsAsFactors = F,comment.char='')
getWidth <- function(maxid) {log10(maxid)+1}
ids <- as.numeric(levels(factor(ftable$ID)))
id_field_width <- getWidth(lCmpdList)
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
fn_out<- function(id,suff) {paste(formatC(id,width=id_field_width,flag=0),suff,".csv",sep='')}
## For loop through dataframe called file to set thresholds.
ftable[c("MS1","MS2","Alignment","AboveNoise")] <- T
ftable["MS2rt"] <- NA
## QA check plan:
##
## If MS1 does not exist, set MS1 to F, as well as everything else except MS2.
## If it exists, proceed to noise check.
## If noise check fails, set AboveNoise and Alignment to F.
##
##
## MS2 will be checked independently.
## If MS2 does not exist, set MS2 and Alignment to F.
## If it does, check the Alignment.
## If Alignment is wrong, set Alignment to F.
##
## Terminology: MS1 does not exist if the intensity is below the
## intensity threshold. MS2 does not exist if it was not picked up
## during the dataframe generation stage. In this case, the file
## with the corresponding ID will not be there.
ftable$Comments <- ""
for (ind in 1:nrow(ftable)) {
wd <- ftable$wd[ind]
id <- ftable$ID[ind]
odir=file.path(wd)
fn_eic <- file.path(wd,fn_out(id,".eic"))
eic <- NULL
maxInt <- NULL
if (!file.exists(fn_eic)) {
warning("File ",fn_eic,"does not exist. Skipping.")
next
}
eic <- read.csv(fn_eic, sep = ",", stringsAsFactors = F,comment.char='')
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
maxInt <- max(eic$intensity)
##If MS1 does not exist, set entry to F.
if (maxInt < intTresh) {
ftable[ind,"MS1"] <- F
## Other checks automatically fail, too.
ftable[ind,"Alignment"] <- F
ftable[ind,"AboveNoise"] <- F
} else {
## Noisy?
if (ftable[ind,"AboveNoise"]) {
mInt <- mean(eic$intensity)
if (maxInt < noiseFac*mInt) {
ftable[ind,"AboveNoise"] <- F
ftable[ind,"Alignment"] <- F ## If noisy, this is
## probably meaningles, so
## F.
}
}
}
## MS2 checks.
fn_kids <- file.path(wd,fn_out(id,".kids"))
if (!file.exists(fn_kids)) {
ftable[ind,"MS2"] <- F
ftable[ind,"Alignment"] <- F
} else {
## Alignment still makes sense to be checked?
if (ftable[ind,"Alignment"]) {
rtInd <- match(maxInt,eic$intensity)
rtMS1Peak <- eic$rt[[rtInd]]
msms <- read.csv(fn_kids, sep = ",", stringsAsFactors = F,comment.char='')
rtInd <- which(msms$rt > rtMS1Peak - rtDelta & msms$rt < rtMS1Peak + rtDelta)
msmsRT <- msms$rt[rtInd]
if (length(msmsRT) > 0) {
ftable[ind,"iMS2rt"] <- which.min(abs(msmsRT - rtMS1Peak))
ftable[ind,"MS2rt"] <- msmsRT[ftable[ind,"iMS2rt"]]
}
}
}
}
write.csv(ftable, file = fnDest,row.names=F)
}
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
##' Helper function for rendersmiles2
##'
##' @title Render Compound from an Online Resource
##' @param depictURL The URL of the object to plot.
##' @param coords The positioning of the image (in data coords).
##' @param filename Temp filename.
##' @return Nothing useful.
##' @author Todor Kondić
renderurl <- function(depictURL,coords=c(0,0,100,100), filename=tempfile(fileext=".svg")) {
h <- new_handle()
curl::handle_setopt(h, ssl_verifyhost = 0, ssl_verifypeer=0)
curl::curl_download(url=depictURL,filename,handle=h)
img <- rsvg(filename)
if (length(img)>2) {
rasterImage(img,xleft=coords[1],ybottom=coords[2],xright=coords[3],ytop=coords[4])
}
}
## rendersmiles <- function(smiles, kekulise=TRUE, coords=c(0,0,100,100), width=200, height=200,
## zoom=1.3,style="cow", annotate="off", abbr="on",suppressh=TRUE,
## showTitle=FALSE, smaLimit=100, sma=NULL) {
## dep <- get.depictor(width = width, height = height, zoom = zoom, style = style, annotate = annotate,
## abbr = abbr, suppressh = suppressh, showTitle = showTitle, smaLimit = smaLimit,
## sma = NULL)
## library(rcdk)
## library(RChemMass)
## mol <- getMolecule(smiles)
## img <- view.image.2d(mol, depictor=dep)
## rasterImage(img, coords[1],coords[2], coords[3],coords[4])
## }
##' Render smiles from an online resource.
##'
##' @title Turn SMILES to an Image Using Online Resource
##' @param smiles The SMILES string.
##' @param ... Hand over to renderurl.
##' @return Nothing useful.
##' @author Todor Kondić
rendersmiles2 <- function(smiles,style="cow",...) {
dpurl <- buildCDKdepictURL(smiles,style=style)
renderurl(dpurl,filename=tempfile(fileext=".svg"),...)
}
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
calcLogTics <- function(lims,powUnit=1,linDiv=1,howMany=NULL) {
## Find integer power limits.
llim <- log10(lims)
llim[is.infinite(llim)] <- 0
nlim <- llim/powUnit
ilim <- round(nlim)*powUnit
all <- if (linDiv>1) {
z <- sapply(seq(ilim[1],(ilim[2]-1),by=powUnit),
function(i) {
a <- 10.**i
b <- 10.**(i+1)
st <- b/linDiv
s <- seq(0,b,by=st)
s[2:length(s)]
})
dim(z) <- prod(dim(z))
z
} else
10**seq(ilim[1],ilim[2],by=powUnit)
res <- if (!is.null(howMany)) {
if (howMany<length(all)) {
step <- length(all) %/% howMany
ind <- seq(1,howMany*step,by=step)
rev(rev(all)[ind])
} else
return(NULL)
} else
all
res
}
calcLabels <- function(ticVals) {
pw <- as.integer(log10(abs(ticVals)))
mags <- 10**pw
mags[is.na(mags)] <- 0
pw[is.na(mags)] <- 0
mant <- signif(ticVals/mags,3)
zz <- Map(function (m,p) c(m=m,p=p),mant,pw)
sapply(zz,function (z) {as.expression(bquote(.(z['m']) %*% 10^.(z['p'])))},USE.NAMES = F)
}
arrPlot <- function(xlim,ylim,ytics,xaxis=F,log=NULL,cex=0.2) {
ylim[is.na(ylim)] <- 1
ylim[ylim == 0] <- 1
if (is.null(ylim)) ylim <- c(1,10)
if (xaxis) xaxt="s" else xaxt="n"
if (! is.null(log) && ! any(is.na(ytics)) ) {
plot(1,1,xlab="",ylab="",xlim = xlim,ylim = ylim,type="n",log=log,xaxt=xaxt,yaxt = "n")
message("ytics:",ytics)
ltics <- calcLabels(ytics)
axis(side=2,at=ytics,labels=ltics,las=2,cex=cex,gap.axis = -1)
} else {
plot(1,1,xlab="",ylab="",xlim = xlim,ylim = ylim,type="n",xaxt = xaxt)
axis(side=2,las=2,cex=cex)
}
}
arrPlotStd <- function(xlim,ylim,xaxis=F,log=log,cex=1.5,mar,intTresh) {
if (ylim[1]<intTresh) ylim[1] <- intTresh
if (is.na(ylim[2])) ylim[2] <- 10
if (xaxis) xaxt="s" else xaxt="n"
par(mar=mar)
plot(1,1,xlab="",ylab="",xlim = xlim,ylim = ylim,type="n",log=log,xaxt=xaxt,yaxt = "n",cex.axis=cex)
ytics <- if (log=="y") axTicks(side=2, nintLog = 3) else axTicks(side=2)
ltics <- calcLabels(ytics)
axis(side=2,at=ytics,labels=ltics,las=2,cex.axis=cex,gap.axis = -1)
}
cmpdID2nm_1 <- function(id) paste("id",id,sep='')
cmpdIDnm <- Vectorize(cmpdID2nm_1)
plot_id_aux <- function(i,wd,eics,maybekids,mass,smile,tags,fTab,logYAxis,pal="Dark2",cex=0.75,rt_digits=2,m_digits=4,rtrange=NULL) {
clean_rtrange <- function(def) {
x1 <- rtrange[1]
x2 <- rtrange[2]
if (is.na(x1) || x1 == 0) x1 <- def[1]
if (is.na(x2) || x2 == 0) x2 <- def[2]
c(x1,x2)
}
if (logYAxis == "linear") log = ""
if (logYAxis == "log") log = "y"
LEFT_MARGIN=9
##FIXME: fTab will break presc.plot.
recs <- fTab[fTab$ID %in% as.integer(i),c("wd","MS2rt","iMS2rt")]
## osmesi <- fTab[fTab$ID %in% as.integer(i),"SMILES"]
message("smile arg:",smile)
MS2Peak <- sapply(wd,function(x) recs[recs$wd %in% x,"MS2rt"])
iMS2Peak <- sapply(wd,function(x) recs[recs$wd %in% x,"iMS2rt"])
eic <- eics[[i]]
maybekid <- maybekids[[i]]
dfs <- lapply(file.path(wd,eic),function(fn) {
tryCatch(read.csv(fn,stringsAsFactors = F,comment.char=''),
error=function(e) {message(paste(e,"; offending file:",fn))})
})
dfs <- lapply(dfs,function(x) data.frame(rt=x$rt,intensity=x$intensity))
## Find existing children.
maybes <- file.path(wd,maybekid)
indkids <- which(file.exists(maybes))
kids <- maybes[indkids]
dfs_kids <- lapply(kids,read.csv,stringsAsFactors=F,comment.char='')
MS2Peak <- MS2Peak[indkids]
iMS2Peak <- iMS2Peak[indkids]
#dfs_kids <- lapply(dfs_kids,function(x) data.frame(rt=x$retentionTime,intensity= x$intensity))
## Find max intensities.
w_max <- sapply(dfs,function (x) which.max(x$intensity))
rt_max <- Map(function(df,w) df$rt[[w]],dfs,w_max)
i_max<- Map(function(df,w) df$intensity[[w]],dfs,w_max)
symbs <- LETTERS[1:length(w_max)]
w_max_kids <- sapply(dfs_kids,function (x) which.max(abs(x$intensity)))
rt_near_kids <- Map(function(df,w) {if (!is.na(w) && !is.null(df$rt)) df$rt[[w]] else NA},dfs_kids,iMS2Peak)
i_near_kids <- Map(function(df,w) {if (!is.na(w) && !is.null(df$intensity)) df$intensity[[w]] else NA},dfs_kids,iMS2Peak)
symbs_kids<- letters[indkids]
def_rt_rng <- range(sapply(dfs,function(x) x$rt))
rt_rng <- if (is.null(rtrange)) def_rt_rng else clean_rtrange(def_rt_rng)
int_rng<- range(sapply(dfs,function(x) x$intensity))
int_rng_kids<- if (! is.null(dfs_kids))
range(sapply(dfs_kids,function(x) x$intensity)) else
c(0,1)
cols <- RColorBrewer::brewer.pal(n=length(dfs),name=pal)
lgnd <- Map(function(k,v) paste(k,"= ",formatC(v,format="f",digits=rt_digits),sep=''),symbs,rt_max)
layout(matrix(c(3,3,4,4,1,2), 3, 2, byrow = TRUE))
## par(mar=c(1,2,1,4))
plot(1,1,type="n",xlab="",ylab="",xlim=struc_xr,ylim=struc_yr,xaxt="n",yaxt="n",asp=1,axes = FALSE)
if (!emptyfield(smile))
rendersmiles2(smile,coords=c(struc_xr[1],struc_yr[1],struc_xr[2],struc_yr[2]))
plot(1,1,type="n",xlab="",ylab="",xlim=col_eng,ylim=peak_int,xaxt="n",yaxt="n",axes = FALSE)
linfo <- legend("topleft",horiz=T,legend=tags,col=cols,fill=cols,bty="n",cex=1.5)
legend(x=linfo$rect$left,y=linfo$rect$top-1*linfo$rect$h,horiz=F,legend=lgnd,fill=cols,bty='n',cex=1.5)
lgnd_kids <- Map(function(k,v) paste(k,"= ",tryCatch(formatC(v,digits=rt_digits,format="f"),error=function(e) "NA"),sep=''),symbs_kids,rt_near_kids)
if (length(lgnd_kids)>0) legend(x=linfo$rect$left-14*linfo$rect$left,y=linfo$rect$top-1*linfo$rect$h,horiz=F,legend=lgnd_kids,fill=cols[indkids],bty="n",cex=1.5)
arrPlotStd(xlim=rt_rng,ylim=int_rng,mar=c(0,LEFT_MARGIN,3,0),log=log,intTresh=1e4)
title(main=paste("ID:",i,"Ion m:",formatC(mass,digits=m_digits,format="f")))
for (k in seq(length(w_max))) text(rt_max[[k]],i_max[[k]],labels=symbs[[k]],pos=4,offset=0.5*k)
arrPlotStd(xlim=rt_rng,ylim=int_rng_kids,xaxis=T,log=log,mar=c(4,LEFT_MARGIN,0,0),intTresh=1)
for (k in 1:length(indkids)) {
lines(intensity ~ rt,data=dfs_kids[[k]],type="h",col=cols_kids[[k]])
}
arrPlotStd(xlim=rt_rng,ylim=c(1,10),xaxis=T,log=log,mar=c(4,9,0,0),intTresh=1)
if (length(dfs_kids)>0) for (k in seq(length(w_max_kids))) text(rt_near_kids[[k]],i_near_kids[[k]],labels=symbs_kids[[k]],pos=4,offset=0.5*k)
adornmzMLTab<-function(df,projDir=getwd()) {
pref<-df$set
mask<-is.na(pref)
drop<-df$files[mask]
for (d in drop) warning("Dropping",d,"because no set specified for it.")
df<-df[!mask,]
pref<-df$set
wd<-basename(tools::file_path_sans_ext(df$Files))
wd<-file.path(projDir,pref,wd)
df$wd<-wd
df
}
genSuprFileTbl <- function(fileTbl,IDSet,destFn="ftable.csv") {
genOneFileTbl <- function(id,fileTbl) {
n <- nrow(fileTbl)
K <- length(id)
longid <- rep(id,n)
cols <- lapply(names(fileTbl),function(cn) rep("",n*K))
names(cols) <- names(fileTbl)
bdf <- as.data.frame(cols,stringsAsFactors = F)
rows <- lapply(1:n*K,function(x) NA)
for (j in 1:n) {
for (i in 1:K)
rows[[(j-1)*K+i]] <- fileTbl[j,]
}
bdf <- as.data.frame(do.call(rbind,rows),stringsAsFactors = F)
bdf <- cbind(bdf,data.frame(ID=longid))
bdf
}
sets <- levels(factor(IDSet$set))
setTbl <- lapply(sets,function (s) {
sl1<-IDSet$set %in% s
sl2<-fileTbl$set==s
if (!any(sl2)) stop("Set",s,"does not select anything in the currently processed files.")
genOneFileTbl(IDSet[sl1,]$ID,fileTbl[sl2,])
})
write.csv(x=allTbl,file=destFn,row.names=F)