Newer
Older
## Copyright (C) 2020 by University of Luxembourg
## Licensed under the Apache License, Version 2.0 (the "License");
## you may not use this file except in compliance with the License.
## You may obtain a copy of the License at
## http://www.apache.org/licenses/LICENSE-2.0
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
ppInpFt<-function() {
tempfile(pattern=FN_PP_OUT_PREF,fileext=".csv")
}
stripext<-function(fn) {
bits<-strsplit(fn,split="\\.")[[1]]
if (length(bits)> 1) paste(head(bits,-1),collapse=".") else fn}
idsFromFiles<-function(setDir) {
fls<-list.files(path=setDir,patt=".*eic.*csv",rec=T)
bas<-basename(fls)
res<-strsplit(bas,"\\.")
sapply(res,function (r) as.integer(r[[1]]))
}
readCmpList<-function(fn) {
read.csv(file=fn,
header=T,
stringsAsFactors = F,
comment.char = '')
}
importCmpList<-function(fn) {
df<-readCmpList(fn)
nRow<-nrow(df)
naCol<-rep(NA,nRow)
blankCol<-rep("",nRow)
if (! "CAS" %in% dfNm) df$CAS<-naCol
if (! "Name" %in% dfNm) df$Name<-blankCol
if (! "RT" %in% dfNm) df$RT<-naCol
mzIn=as.logical(match("mz",dfNm,nomatch=F))
SMILESIn=as.logical(match("SMILES",dfNm,nomatch=F))
if (! (mzIn || SMILESIn)) stop("Either `mz', or `SMILES' columns must be present in the compound list.")
if (! ("mz" %in% dfNm)) df$mz<-naCol
if (! ("SMILES" %in% dfNm)) df$SMILES<-blankCol
if (! ("Level" %in% dfNm)) df$Level<-1
for (ri in 1:nRow) {
if (emptyfield(df$SMILES[[ri]])) {
if (! emptyfield(df$mz[[ri]])) {
df$Level[[ri]]<-5
} else
stop ("At row ",ri," of the input compound list, there are neither SMILES, nor Mass to be found.")
}
}
df
}
getMzFromCmpL<-function(id,mode,cmpL) {
res<-if (!is.null(mz) && !is.na(mz)) {
mde<-as.character(mode)
wh<-MODEMAP[[mde]]
RChemMass::getSuspectFormulaMass(smiles)[[wh]]
} else stop("Both SMILES and mz fields, for ID ",id,", found empty in the compound list. Aborting.")
getSMILESFromCmpL<-function(id,cmpL) {
ind<-match(id,cmpL$ID)
smiles<-cmpL$SMILES[[ind]]
smiles
}
getColFromCmpL<-function(id,cname,cmpL) {
ind<-match(id,cmpL$ID)
cmpL[[cname]][[ind]]
}
##' Create directories without drama.
##'
##'
##' @title Create directories without drama
##' @return The character string containing the input argument `path`.
##' @author Todor Kondić
no_drama_mkdir<-function(path) {
f <- Vectorize(function(path) {
if (! dir.exists(path)) dir.create(path)
path},vectorize.args="path")
f(path)
fn_data2wd <- function(fn_data,dest) {
f <- Vectorize(function(fn_data) {
noext <- stripext(fn_data)
file.path(dest,basename(noext))
},vectorize.args="fn_data")
f(fn_data)
}
gen_presc_d <- function(wd) dir.create(wd,recursive = T,showWarnings = F)
get_cmpd_l_fn <- function(wd) {
f <- function(wd) file.path(wd,"compounds.csv")
fv <- Vectorize(f,vectorize.args=c("wd"))
fv(wd)
}
get_ftable_fn <- function(wd) {
fv <- Vectorize(f,vectorize.args=c("wd"))
fv(wd)
}
get_info_dir <- function(wd) {
file.path(wd,"info")
}
get_info_fn <- function(wd) {
file.path(get_info_dir(wd),"info.csv")
}
gen_info_dir <- function(wd) {
nm <- get_info_dir(wd)
no_drama_mkdir(nm)
nm
}
emptyfield <- function (f) {length(f) == 0 | is.na(f) | f == ""}
##' Generate the RMassBank compound list from the input compound list
##' in CSV file src_fn. The input compound list format is either a
##' Chemical Dashboard csv file with, at least, PREFERRED_ SMILES
##' columns _filled_ out, or just an ordinary CSV file with columns
##' SMILES and Names filled. Argument dest_fn is the destination
##' filename. Returns the number of compounds.
##'
##' @title Generate Compound List File
##' @param src_fn The input compound list CSV filename.
##' @param dest_fn The resulting compound list CSV filename.
##' @return Number of compounds.
##' @author Todor Kondić
df<-read.csv(src_fn,sep=',',stringsAsFactors=F,comment.char='')
if (is.null(casvals)) casvals <- rep(NA,sz)
if (is.null(nms)) nms <- rep(NA,nrow(df))
if (is.null(rt)) rt <- rep(NA,nrow(df))
odf <- data.frame(ID=df$ID,Name=nms,SMILES="",mz=NA,RT=rt,Level=3,CAS=casvals,stringsAsFactors=F)
for (ri in 1:nrow(df)) {
if (emptyfield(df$SMILES[ri])) {
if (! emptyfield(df$Mass[ri])) {
odf$mz[ri] <- df$Mass[ri]
odf$Level[ri] <- 5
} else
stop ("At row ",ri," of the input compound list, there are neither SMILES, nor Mass to be found.")
} else odf$SMILES[ri] <- df$SMILES[ri]
}
##' Generates the RMassBank compound list and loads it.
##'
##' @title Generate and Load the RMassBank Compound List
##' @param wd Directory under which results are archived.
##' @return Named list. The key `fn_cmpdl` is the path of the
##' generated compound list and the key `n` the number of
##' compounds.
##' @author Todor Kondić
gen_cmpdl_and_load <- function(wd,fn_cmpdl) {
fn_comp<-get_cmpd_l_fn(wd)
n_cmpd<-gen_cmpd_l(fn_cmpdl,fn_comp)
RMassBank::loadList(fn_comp,check=F) #reduce universality of this statement!!!
list(fn_cmpdl=fn_comp,n=n_cmpd)
}
gen_ftable <- function(fTab,file) {
df<-fTab[fTab$Files %in% file,]
wd<-unique(df$wd)
tab2file(tab=df,file=get_ftable_fn(wd))
conf <- function(fn_data,fn_cmpd_l,dest) {
no_drama_mkdir(dest)
wd <- fn_data2wd(fn_data,dest)
no_drama_mkdir(wd)
fn_out_cmpd_l <- get_cmpd_l_fn(wd)
n_cmpd <- gen_cmpd_l(fn_cmpd_l,fn_out_cmpd_l)
gen_ftable(fn_data,wd,n_cmpd)
}
reconf <- function(wd) {## Load the settings.
## Load the compound list.
fn_cmpd_l <- get_cmpd_l_fn(wd)
RMassBank::loadList(fn_cmpd_l)
gen_clean_state_ftab<-function(ftable) {
ftable$Comments <- ""
ftable[c("MS1","MS2","Alignment","AboveNoise")] <- T
ftable["MS2rt"] <- NA_real_
ftable["iMS2rt"] <- NA_integer_
ftable["rt"]<-NA_real_
ftable
preProc <- function (ftable,noiseFac=3,rtDelta=0.5,intThresh=1e5,intThreshMS2=0.05) {
fn_spec<-function(wd) readRDS(file.path(wd,FN_SPEC))
message("Loading RDS-es ...")
allData<-lapply(wds,fn_spec)
names(allData)<-wds
message("... done with RDSs")
## QA check plan:
##
## If MS1 does not exist, set MS1 to F, as well as everything else except MS2.
## If it exists, proceed to noise check.
## If noise check fails, set AboveNoise and Alignment to F.
##
##
## MS2 will be checked independently.
## If MS2 does not exist, set MS2 and Alignment to F.
## If it does, check the Alignment.
## If Alignment is wrong, set Alignment to F.
##
## Terminology: MS1 does not exist if the intensity is below the
## intensity threshold. MS2 does not exist if it was not picked up
## during the dataframe generation stage. In this case, the file
## with the corresponding ID will not be there.
for (ind in 1:nrow(ftable)) {
wd <- ftable$wd[ind]
id <- ftable$ID[ind]
## odir=file.path(wd)
## fn_eic <- fn_out(id,".eic",wd)
eics<-allData[[wd]]$eic
nid<-id2name(id)
ii<-match(nid,MSnbase::fData(eics)[["ID"]]) #id, because id-s, not nid-s are in fData for ms1 eics;
eic1<-eics[[ii]]
eic<-data.frame(rt=MSnbase::rtime(eic1)/60.,intensity=MSnbase::intensity(eic1))
colnames(eic)<-c("rt","intensity")
if (nrow(eic)==0) {
warning("No chromatogram for id ",id," found in", wd, " . Skipping.")
ms1MaxInd<-which.max(eic$intensity)
maxInt<-eic$intensity[[ms1MaxInd]]
ftable[ind,"rt"]<-eic$rt[[ms1MaxInd]]
ftable[ind,"MS1"] <- F
## Other checks automatically fail, too.
ftable[ind,"Alignment"] <- F
ftable[ind,"AboveNoise"] <- F
} else {
## Noisy?
if (ftable[ind,"AboveNoise"]) {
mInt <- mean(eic$intensity)
if (maxInt < noiseFac*mInt) {
ftable[ind,"AboveNoise"] <- F
ftable[ind,"Alignment"] <- F ## If noisy, this is
## probably meaningles, so
## F.
}
}
}
ms2nids<-names(ms2)
mInt<-mean(eic$intensity)
if (! (nid %in% ms2nids)) {
ftable[ind,"MS2"] <- F
ftable[ind,"Alignment"] <- F
} else {
## Alignment still makes sense to be checked?
if (ftable[ind,"Alignment"]) {
rtInd <- match(maxInt,eic$intensity)
rtMS1Peak <- eic$rt[[rtInd]]
msms<-MSnbase::fData(sp)[,c("rtm","maxI")]
colnames(msms)<-c("rt","intensity")
rtInd <- which((msms$rt > rtMS1Peak - rtDelta) &
(msms$rt < rtMS1Peak + rtDelta)) #Close enough?
rtInd <- rtInd[which(msms$intensity[rtInd]>intThreshMS2*mInt)] #Intense enough?
msmsRT <- msms$rt[rtInd]
if (length(msmsRT) > 0) {
ftable[ind,"iMS2rt"] <- which.min(abs(msmsRT - rtMS1Peak))
ftable[ind,"MS2rt"] <- msmsRT[ftable[ind,"iMS2rt"]]
##' Helper function for rendersmiles2
##'
##' @title Render Compound from an Online Resource
##' @param depictURL The URL of the object to plot.
##' @param coords The positioning of the image (in data coords).
##' @param filename Temp filename.
##' @return Nothing useful.
##' @author Todor Kondić
renderurl <- function(depictURL,coords=c(0,0,100,100), filename=tempfile(fileext=".svg")) {
h <- new_handle()
curl::handle_setopt(h, ssl_verifyhost = 0, ssl_verifypeer=0)
curl::curl_download(url=depictURL,filename,handle=h)
img <- rsvg(filename)
if (length(img)>2) {
rasterImage(img,xleft=coords[1],ybottom=coords[2],xright=coords[3],ytop=coords[4])
}
}
smiles2img <- function(smiles, kekulise=TRUE, width=300, height=300,
zoom=1.3,style="cow", annotate="off", abbr="on",suppressh=TRUE,
showTitle=FALSE, smaLimit=100, sma=NULL) {
dep <- rcdk::get.depictor(width = width, height = height, zoom = zoom, style = style, annotate = annotate,
abbr = abbr, suppressh = suppressh, showTitle = showTitle, smaLimit = smaLimit,
sma = NULL)
mol <- RMassBank::getMolecule(smiles)
z<-rcdk::view.image.2d(mol, depictor=dep)
grid::rasterGrob(z)
}
##' Render smiles from an online resource.
##'
##' @title Turn SMILES to an Image Using Online Resource
##' @param smiles The SMILES string.
##' @param ... Hand over to renderurl.
##' @return Nothing useful.
##' @author Todor Kondić
rendersmiles2 <- function(smiles,style="cow",...) {
dpurl <- buildCDKdepictURL(smiles,style=style)
renderurl(dpurl,filename=tempfile(fileext=".svg"),...)
}
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
calcLogTics <- function(lims,powUnit=1,linDiv=1,howMany=NULL) {
## Find integer power limits.
llim <- log10(lims)
llim[is.infinite(llim)] <- 0
nlim <- llim/powUnit
ilim <- round(nlim)*powUnit
all <- if (linDiv>1) {
z <- sapply(seq(ilim[1],(ilim[2]-1),by=powUnit),
function(i) {
a <- 10.**i
b <- 10.**(i+1)
st <- b/linDiv
s <- seq(0,b,by=st)
s[2:length(s)]
})
dim(z) <- prod(dim(z))
z
} else
10**seq(ilim[1],ilim[2],by=powUnit)
res <- if (!is.null(howMany)) {
if (howMany<length(all)) {
step <- length(all) %/% howMany
ind <- seq(1,howMany*step,by=step)
rev(rev(all)[ind])
} else
return(NULL)
} else
all
res
}
calcLabels <- function(ticVals) {
pw <- as.integer(log10(abs(ticVals)))
mags <- 10**pw
mags[is.na(mags)] <- 0
pw[is.na(mags)] <- 0
mant <- signif(ticVals/mags,3)
zz <- Map(function (m,p) c(m=m,p=p),mant,pw)
sapply(zz,function (z) {as.expression(bquote(.(z['m']) %*% 10^.(z['p'])))},USE.NAMES = F)
}
arrPlot <- function(xlim,ylim,ytics,xaxis=F,log=NULL,cex=0.2) {
ylim[is.na(ylim)] <- 1
ylim[ylim == 0] <- 1
if (is.null(ylim)) ylim <- c(1,10)
if (xaxis) xaxt="s" else xaxt="n"
if (! is.null(log) && ! any(is.na(ytics)) ) {
plot(1,1,xlab="",ylab="",xlim = xlim,ylim = ylim,type="n",log=log,xaxt=xaxt,yaxt = "n")
message("ytics:",ytics)
ltics <- calcLabels(ytics)
axis(side=2,at=ytics,labels=ltics,las=2,cex=cex,gap.axis = -1)
} else {
plot(1,1,xlab="",ylab="",xlim = xlim,ylim = ylim,type="n",xaxt = xaxt)
axis(side=2,las=2,cex=cex)
}
}
arrPlotStd <- function(xlim,ylim,xaxis=F,log=log,cex=1.5,mar,intThresh) {
if (ylim[1]<intThresh) ylim[1] <- intThresh
if (is.na(ylim[2])) ylim[2] <- 10
if (xaxis) xaxt="s" else xaxt="n"
par(mar=mar)
plot(1,1,xlab="",ylab="",xlim = xlim,ylim = ylim,type="n",log=log,xaxt=xaxt,yaxt = "n",cex.axis=cex)
ytics <- if (log=="y") axTicks(side=2, nintLog = 3) else axTicks(side=2)
ltics <- calcLabels(ytics)
axis(side=2,at=ytics,labels=ltics,las=2,cex.axis=cex,gap.axis = -1)
}
cmpdID2nm_1 <- function(id) paste("id",id,sep='')
cmpdIDnm <- Vectorize(cmpdID2nm_1)
plot_id_aux <- function(i,wd,eics,maybekids,mass,smile,tags,fTab,logYAxis,pal="Dark2",cex=0.75,rt_digits=2,m_digits=4,rtrange=NULL) {
clean_rtrange <- function(def) {
x1 <- rtrange[1]
x2 <- rtrange[2]
if (is.na(x1) || x1 == 0) x1 <- def[1]
if (is.na(x2) || x2 == 0) x2 <- def[2]
c(x1,x2)
}
if (logYAxis == "linear") log = ""
if (logYAxis == "log") log = "y"
LEFT_MARGIN=9
##FIXME: fTab will break presc.plot.
recs <- fTab[fTab$ID %in% as.integer(i),c("wd","MS2rt","iMS2rt")]
## osmesi <- fTab[fTab$ID %in% as.integer(i),"SMILES"]
message("smile arg:",smile)
MS2Peak <- sapply(wd,function(x) recs[recs$wd %in% x,"MS2rt"])
iMS2Peak <- sapply(wd,function(x) recs[recs$wd %in% x,"iMS2rt"])
eic <- eics[[i]]
maybekid <- maybekids[[i]]
dfs <- lapply(file.path(wd,eic),function(fn) {
tryCatch(read.csv(fn,stringsAsFactors = F,comment.char=''),
error=function(e) {message(paste(e,"; offending file:",fn))})
})
dfs <- lapply(dfs,function(x) data.frame(rt=x$rt,intensity=x$intensity))
## Find existing children.
maybes <- file.path(wd,maybekid)
indkids <- which(file.exists(maybes))
kids <- maybes[indkids]
dfs_kids <- lapply(kids,read.csv,stringsAsFactors=F,comment.char='')
MS2Peak <- MS2Peak[indkids]
iMS2Peak <- iMS2Peak[indkids]
#dfs_kids <- lapply(dfs_kids,function(x) data.frame(rt=x$retentionTime,intensity= x$intensity))
## Find max intensities.
w_max <- sapply(dfs,function (x) which.max(x$intensity))
rt_max <- Map(function(df,w) df$rt[[w]],dfs,w_max)
i_max<- Map(function(df,w) df$intensity[[w]],dfs,w_max)
symbs <- LETTERS[1:length(w_max)]
w_max_kids <- sapply(dfs_kids,function (x) which.max(abs(x$intensity)))
rt_near_kids <- Map(function(df,w) {if (!is.na(w) && !is.null(df$rt)) df$rt[[w]] else NA},dfs_kids,iMS2Peak)
i_near_kids <- Map(function(df,w) {if (!is.na(w) && !is.null(df$intensity)) df$intensity[[w]] else NA},dfs_kids,iMS2Peak)
symbs_kids<- letters[indkids]
def_rt_rng <- range(sapply(dfs,function(x) x$rt))
rt_rng <- if (is.null(rtrange)) def_rt_rng else clean_rtrange(def_rt_rng)
int_rng<- range(sapply(dfs,function(x) x$intensity))
int_rng_kids<- if (! is.null(dfs_kids))
range(sapply(dfs_kids,function(x) x$intensity)) else
c(0,1)
cols <- RColorBrewer::brewer.pal(n=length(dfs),name=pal)
lgnd <- Map(function(k,v) paste(k,"= ",formatC(v,format="f",digits=rt_digits),sep=''),symbs,rt_max)
layout(matrix(c(3,3,4,4,1,2), 3, 2, byrow = TRUE))
## par(mar=c(1,2,1,4))
plot(1,1,type="n",xlab="",ylab="",xlim=struc_xr,ylim=struc_yr,xaxt="n",yaxt="n",asp=1,axes = FALSE)
if (!emptyfield(smile))
rendersmiles2(smile,coords=c(struc_xr[1],struc_yr[1],struc_xr[2],struc_yr[2]))
plot(1,1,type="n",xlab="",ylab="",xlim=col_eng,ylim=peak_int,xaxt="n",yaxt="n",axes = FALSE)
linfo <- legend("topleft",horiz=T,legend=tags,col=cols,fill=cols,bty="n",cex=1.5)
legend(x=linfo$rect$left,y=linfo$rect$top-1*linfo$rect$h,horiz=F,legend=lgnd,fill=cols,bty='n',cex=1.5)
lgnd_kids <- Map(function(k,v) paste(k,"= ",tryCatch(formatC(v,digits=rt_digits,format="f"),error=function(e) "NA"),sep=''),symbs_kids,rt_near_kids)
if (length(lgnd_kids)>0) legend(x=linfo$rect$left-14*linfo$rect$left,y=linfo$rect$top-1*linfo$rect$h,horiz=F,legend=lgnd_kids,fill=cols[indkids],bty="n",cex=1.5)
arrPlotStd(xlim=rt_rng,ylim=int_rng,mar=c(0,LEFT_MARGIN,3,0),log=log,intThresh=1e4)
mass<- if (!is.na(mass)) mass else "NA"
title(main=paste("ID:",i,"Ion m:",formatC(mass,digits=m_digits,format="f")))
for (k in seq(length(w_max))) text(rt_max[[k]],i_max[[k]],labels=symbs[[k]],pos=4,offset=0.5*k)
arrPlotStd(xlim=rt_rng,ylim=int_rng_kids,xaxis=T,log=log,mar=c(4,LEFT_MARGIN,0,0),intThresh=1)
for (k in 1:length(indkids)) {
lines(intensity ~ rt,data=dfs_kids[[k]],type="h",col=cols_kids[[k]])
}
arrPlotStd(xlim=rt_rng,ylim=c(1,10),xaxis=T,log=log,mar=c(4,9,0,0),intThresh=1)
if (length(dfs_kids)>0) for (k in seq(length(w_max_kids))) text(rt_near_kids[[k]],i_near_kids[[k]],labels=symbs_kids[[k]],pos=4,offset=0.5*k)
}
multiplot <- function(..., plotlist=NULL, cols=1, layout=NULL) {
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
# Set up the page
grid::grid.newpage()
grid::pushViewport(grid::viewport(layout = grid::grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = grid::viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
plot_id_msn <- function(ni,
data,
rtMS1,
rtMS2,
rtMS2Ind,
mass,
smile,
tags,
fTab,
prop,
theme,
pal="Dark2",
cex=0.75,
rt_digits=2,
m_digits=4) {
clean_range<-function(def,rng) {
x1 <- rng[1]
x2 <- rng[2]
if (is.na(x1) || x1 == 0) x1 <- def[1]
if (is.na(x2) || x2 == 0) x2 <- def[2]
c(x1,x2)
mk_title<-function() paste("EIC (",
"m/z = ",
formatC(mass,format='f',digits=m_digits),
")",sep='')
mk_leg_lab<-function(tag,rt) {paste(tag,"; rt= ",formatC(rt[[tag]],format='f',digits=rt_digits),"min")}
sci10<-function(x) {ifelse(x==0, "0", parse(text=gsub("[+]", "", gsub("e", " %*% 10^", scales::scientific_format()(x)))))}
dfChrMS1<-NULL
dfChrMS2<-NULL
dfSpecMS2<-NULL
## MS1 time series.
dfschrms1<-lapply(tags,function(tag) {d<-data[[tag]]$eic
data.frame(rt=MSnbase::rtime(cg)/60.,
intensity=MSnbase::intensity(cg),tag=as.character(tag),legend=mk_leg_lab(tag,rtMS1))
dfChrMS1<-do.call(rbind,c(dfschrms1,list(make.row.names=F)))
dfsChrMS2<-lapply(tags,function(tag) {
if (!is.null(d)) {
df<-MSnbase::fData(d)[,c("rtm","maxI")]
colnames(df)<-c("rt","intensity")
df$tag<-as.character(tag)
if (!all(sapply(dfsChrMS2,is.null))) dfChrMS2<-do.call(rbind,c(dfsChrMS2,list(make.row.names=F)))
if (!all(sapply(dfsChrMS2,is.null))) {
dfsSpecMS2<-lapply(tags,function(tag) {
d<-data[[tag]]$ms2[[ni]]
if (!is.null(d)) {
ind<-rtMS2Ind[[tag]]
if (!is.na(ind)) {
x<-data.frame(mz=MSnbase::mz(d[[ind]]),intensity=MSnbase::intensity(d[[ind]]))
x$tag<-tag
x
} else NULL
}
})
dfsSpecMS2<-dfsSpecMS2[!is.null(dfsSpecMS2)]
dfSpecMS2<-do.call(rbind,c(dfsSpecMS2,list(make.row.names=F)))
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
}
## Ranges
if (!is.null(dfChrMS1)) {
rrtMS1<-range(dfChrMS1$rt)
rrtMS2<-rrtMS1
rintMS1<-range(dfChrMS1$intensity)
}
if (!is.null(dfChrMS2)) {
rintMS2<-range(dfChrMS2$intensity)
rintMS2 <- if (is.null(prop$ms2$irng)) rintMS2 else clean_range(rintMS2,prop$ms2$irng)
}
if (!is.null(dfSpecMS2)) {
rmzSpMS2<-range(dfSpecMS2$mz)
rintSpMS2<-range(dfSpecMS2$intensity)
rmzSpMS2<- if (is.null(prop$spec$mzrng)) rmzSpMS2 else clean_range(rmzSpMS2,prop$spec$mzrng)
rintSpMS2<- if (is.null(prop$spec$irng)) rintSpMS2 else clean_range(rintSpMS2,prop$spec$irng)
}
titMS1<-mk_title()
scale_y<-if (prop$ms1$axis=="linear") {
ggplot2::scale_y_continuous
} else {
ggplot2::scale_y_log10
}
plMS1<- if(!is.null(dfChrMS1) && !is.na(dfChrMS1) && !nrow(dfChrMS1)==0) {
ggplot2::ggplot(data=dfChrMS1,ggplot2::aes(x=rt,y=intensity,group=legend))+
ggplot2::geom_line(ggplot2::aes(colour=legend),key_glyph=KEY_GLYPH)+
ggplot2::lims(x=rrtMS1)+
ggplot2::labs(x=CHR_GRAM_X,y=CHR_GRAM_Y,title=titMS1,tag=i,colour=PLOT_MS1_LEG_TIT)+
scale_y(labels=sci10,limits=rintMS1)+theme()
} else NULL
## Empty
plEmpty<-ggplot2::ggplot(data=dfChrMS1,ggplot2::aes(x=rt,y=intensity))+ggplot2::theme_void()
if (!all(sapply(dfsChrMS2,is.null))) {
scale_y<-if (prop$ms2$axis=="linear") {
ggplot2::scale_y_continuous
} else {
ggplot2::scale_y_log10
}
plMS2<-ggplot2::ggplot(data=dfChrMS2,ggplot2::aes(x=rt,ymin=0,ymax=intensity,group=legend))+
ggplot2::geom_linerange(ggplot2::aes(colour=legend),key_glyph=KEY_GLYPH)+
ggplot2::labs(x=CHR_GRAM_X,y=CHR_GRAM_Y,title=NULL,subtitle = "MS2",tag = " ")+
ggplot2::lims(x=rrtMS2)+ggplot2::labs(colour=PLOT_MS2_LEG_TIT)+
scale_y(labels=sci10,limits = rintMS2)+theme()
} else {
plMS2<-plEmpty
}
## Structure
if (!is.null(smile) && !is.na(smile) && !nchar(smile)<1) {
g<-smiles2img(smile,width=500,height=500,zoom=4.5)
plStruc<-ggplot2::ggplot(data=dfChrMS1,ggplot2::aes(x=rt,y=intensity))+
ggplot2::geom_blank()+ggplot2::annotation_custom(g)+ggplot2::theme_void()
} else plStruc<-plEmpty
## MS2 Spectrum
if (!all(sapply(dfsChrMS2,is.null))) {
plSpecMS2<-if (is.data.frame(dfSpecMS2)) { #sometimes
#dfSpecMS2 ends up
#as a list of
#logicals; this
#probably happens
#when either MS2 is
#bad in some way,
#or the RT
#intervals are
#mismatched.
scale_y<-if (prop$spec$axis=="linear") {
ggplot2::scale_y_continuous
} else {
ggplot2::scale_y_log10
}
ggplot2::ggplot(data=dfSpecMS2,ggplot2::aes(x=mz,ymin=0,ymax=intensity,group=tag))+
ggplot2::geom_linerange(ggplot2::aes(colour=tag),key_glyph=KEY_GLYPH)+
ggplot2::lims(x=rmzSpMS2)+
ggplot2::labs(subtitle="MS2",y="intensity")+
scale_y(labels=sci10,limits= rintSpMS2)+theme()
## Lucky N the most intense N TODO
## lckN<-if (is.data.frame(dfSpecMS2)) {
## ord<-order(dfSpecMS2$intensity,decreasing=T)
## ll<-length(ord)
## theL<-min(ll,MS2_1ST_N)
## mzN<-dfSpecMS2$mz[ord][1:theL]
## inN<-dfSpecMS2$intensity[ord][1:theL]
## df<-data.frame("m/z"=mzN,"intensity"=inN)
## message("DF:")
## str(df)
## message("---DF")
## gridExtra::tableGrob(df) #+ggplot2::labs(subtitle="Top m/z")
## } else NULL
res<- if (!is.null(plMS1)) cowplot::plot_grid(plMS1,plStruc,plMS2,plEmpty,plSpecMS2,align = "hv",axis='l',ncol = 2,nrow=3,rel_widths=c(3,1)) else NULL
res
adornmzMLTab<-function(df,projDir=getwd()) {
pref<-df$set
mask<-is.na(pref)
drop<-df$files[mask]
for (d in drop) warning("Dropping",d,"because no set specified for it.")
df<-df[!mask,]
pref<-df$set
wd<-basename(tools::file_path_sans_ext(df$Files))
wd<-file.path(projDir,pref,wd)
df$wd<-wd
df
}
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
## genSuprFileTblOld <- function(fileTbl,compTab) {
## genOneFileTbl <- function(id,fileTbl) {
## n <- nrow(fileTbl)
## K <- length(id)
## longid <- rep(id,n)
## cols <- lapply(names(fileTbl),function(cn) rep("",n*K))
## names(cols) <- names(fileTbl)
## bdf <- as.data.frame(cols,stringsAsFactors = F)
## rows <- lapply(1:n*K,function(x) NA)
## for (j in 1:n) {
## for (i in 1:K)
## rows[[(j-1)*K+i]] <- fileTbl[j,]
## }
## bdf <- as.data.frame(do.call(rbind,rows),stringsAsFactors = F)
## bdf <- cbind(bdf,data.frame(ID=longid))
## bdf
## }
## sets <- levels(factor(compTab$set))
## setTbl <- lapply(sets,function (s) {
## sl1<-compTab$set %in% s
## sl2<-fileTbl$set==s
## if (!any(sl2)) stop("Set",s,"does not select anything in the currently processed files.")
## genOneFileTbl(compTab[sl1,]$ID,fileTbl[sl2,])
## })
## allTbl <- do.call(rbind,setTbl)
## allTbl
## }
genSuprFileTab <- function(fileTab,compTab) {
genOne<-function(ids,fn) {
K<-length(ids)
fTabRow<-fileTab[fileTab$Files == fn,]
cols<-lapply(names(fileTab),function(n) rep(fTabRow[[n]],K))
names(cols)<-NULL
cols<-c(cols,list(ids))
names(cols)<-c(names(fileTab),"ID")
df<-as.data.frame(cols,stringsAsFactors = F)
df
}
tabs<-lapply(fileTab$Files,function(fn)
{
wh<-which(fileTab$Files==fn)
set<-fileTab$set[[wh]]
md<-fileTab$mode[[wh]]
sel<-(compTab$set %in% set) & (compTab$mode %in% md)
ids<-compTab$ID[sel]
getEntryFromComp<-function(entry,id,set,mode,compTab) {
ind <- which(compTab$ID %in% id &
compTab$set %in% set &
compTab$mode %in% mode)
res<- if (length(ind)==1) compTab[ind,entry] else {
if (length(ind)>1) {
stop("Nonunique entry selection in comprehensive table.")
} else {
stop("Entries not found for id ", id,"set ",set, "and mode ", mode, " .")
}
}
res
}
addCompColsToFileTbl<-function(ft,compTab) {
entries<-getEntryFromComp(c("mz","Name","rt"),id,set,m,compTab)
mzCol[[ir]]<- entries[["mz"]]
nm<-entries[["Name"]]
rtCol[[ir]]<- entries[["rt"]]
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
vald_comp_tab<-function(df,ndf,checkSMILES=F,checkMz=F,checkNames=F) {
## Fields.
if (is.null(df$ID)) stop("Column ID missing in ",ndf," .")
if (checkMz && is.null(df$mz)) stop("Column mz missing in ", ndf, " .")
if (checkSMILES && is.null(df$SMILES)) stop("Column SMILES missing in", ndf, " .")
if (checkNames && is.null(df$Name)) warning("Column Name missing in ", ndf," , continuing without.")
if (is.null(df$RT) && is.null(df$rt)) {
warning("Column RT (alternatively, rt) missing in ", ndf, ", continuing without.")
} else {
if (is.null(df$rt)) {
df$rt<-df$RT
df$RT<-NULL
}
}
## Missing IDs?
ind<-which(is.na(df$ID))
if (length(ind)>0) {
for (i in ind) {
warning("ID missing at row: ",i," .")
}
stop("Missing IDs found.")
}
## Unique IDs?
luids<-length(unique(df$ID))
if (length(df$ID) > luids) stop("Duplicate IDs in ", ndf, " are not allowed.")
## Missing SMILES?
if (checkSMILES) {
ind<-which(is.na(df$SMILES))
if (length(ind)>0) {
for (i in ind) {
warning("SMILES missing at row: ",i, "; ID: ",df$ID[[i]]," .")
}
stop("Missing SMILES found.")
}
}
## Missing mz?
if (checkMz) {
ind<-which(is.na(df$mz))
if (length(ind)>0) {
for (i in ind) {
warning("mz missing at row: ",i, "; ID: ",df$ID[[i]]," .")
}
stop("Missing mz-s found.")
}
}
df
}