Skip to content
Snippets Groups Projects
Commit dc229aaf authored by Todor Kondic's avatar Todor Kondic
Browse files

Merge branch 'devel' into 'master'

Devel

See merge request !4
parents ccf614a9 a978228c
No related branches found
No related tags found
No related merge requests found
Package: shinyscreen Package: shinyscreen
Title: Pre-screening of Mass Spectrometry Data Title: Pre-screening of Mass Spectrometry Data
Version: 0.1 Version: 0.3
Author: Todor Kondić Author: Todor Kondić
Maintainer: Todor Kondić <todor.kondic@uni.lu> Maintainer: Todor Kondić <todor.kondic@uni.lu>
Authors@R: Authors@R:
person(given = "Todor", c(person(given = "Todor",
family = "Kondić", family = "Kondić",
role = c("aut", "cre"), role = c("aut", "cre"),
email = "todor.kondic@uni.lu", email = "todor.kondic@uni.lu",
comment = c(ORCID = "https://orcid.org/0000-0001-6662-4375")) comment = c(ORCID = "https://orcid.org/0000-0001-6662-4375")),
person(given = "Hiba Mohammed",
family = "Taha",
role = c("ctb"),
email = "hiba.mohammed-taha@ext.uni.lu"),
person(given = "Jessy",
family = "Krier",
role = c("ctb"),
email = "jessy.krier@ext.uni.lu"),
person(given = "Mira",
family = "Narayanan",
role = c("ctb"),
email = "mira.narayanan@ext.uni.lu"),
person(given = "Anjana",
family = "Elapavalore",
role = c("ctb"),
email = "anjana.elapavalore@ext.uni.lu"))
Description: Pre-screening of Mass Spectrometry Data. Description: Pre-screening of Mass Spectrometry Data.
License: Apache License (>= 2.0) License: Apache License (>= 2.0)
Encoding: UTF-8 Encoding: UTF-8
......
# Generated by roxygen2: do not edit by hand # Generated by roxygen2: do not edit by hand
export(mb.do)
export(mb.prep)
export(presc.do) export(presc.do)
export(presc.plot) export(presc.plot)
export(sw.do)
export(presc.shiny) export(presc.shiny)
...@@ -352,37 +352,135 @@ renderurl <- function(depictURL,coords=c(0,0,100,100), filename=tempfile(fileext ...@@ -352,37 +352,135 @@ renderurl <- function(depictURL,coords=c(0,0,100,100), filename=tempfile(fileext
##' ##'
##' @title Turn SMILES to an Image Using Online Resource ##' @title Turn SMILES to an Image Using Online Resource
##' @param smiles The SMILES string. ##' @param smiles The SMILES string.
##' @param style Structure style.
##' @param ... Hand over to renderurl. ##' @param ... Hand over to renderurl.
##' @return Nothing useful. ##' @return Nothing useful.
##' @author Todor Kondić ##' @author Todor Kondić
rendersmiles2 <- function(smiles,...) { rendersmiles2 <- function(smiles,style="cow",...) {
dpurl <- buildCDKdepictURL(smiles) dpurl <- buildCDKdepictURL(smiles,style=style)
renderurl(dpurl,filename=tempfile(fileext=".svg"),...) renderurl(dpurl,filename=tempfile(fileext=".svg"),...)
} }
plot_id_aux <- function(i,wd,eics,maybekids,masses,osmesi,tags,pal="Dark2",cex=0.75,rt_digits=2,m_digits=4,rtrange=NULL) {
clean_rtrange <- function(def) {
x1 <- rtrange[1]
x2 <- rtrange[2]
if (is.na(x1) || x1 == 0) x1 <- def[1]
if (is.na(x2) || x2 == 0) x2 <- def[2]
c(x1,x2)
}
eic <- eics[[i]]
maybekid <- maybekids[[i]]
dfs <- lapply(file.path(wd,eic),function(fn) {
tryCatch(read.csv(fn,stringsAsFactors = F),
error=function(e) {message(paste(e,"; offending file:",fn))})
})
dfs <- lapply(dfs,function(x) data.frame(rt=x$rt/60.,intensity=x$intensity))
## Find existing children.
maybes <- file.path(wd,maybekid)
indkids <- which(file.exists(maybes))
kids <- maybes[indkids]
dfs_kids <- lapply(kids,read.csv,stringsAsFactors=F)
dfs_kids <- lapply(dfs_kids,function(x) data.frame(rt=x$retentionTime/60.,intensity= -x$intensity))
## Find max intensities.
w_max <- sapply(dfs,function (x) which.max(x$intensity))
rt_max <- Map(function(df,w) df$rt[[w]],dfs,w_max)
i_max<- Map(function(df,w) df$intensity[[w]],dfs,w_max)
symbs <- LETTERS[1:length(w_max)]
## Find max intensities in children
w_max_kids <- sapply(dfs_kids,function (x) which.max(abs(x$intensity)))
rt_max_kids <- Map(function(df,w) df$rt[[w]],dfs_kids,w_max_kids)
i_max_kids <- Map(function(df,w) df$intensity[[w]],dfs_kids,w_max_kids)
symbs_kids<- letters[indkids]
def_rt_rng <- range(sapply(dfs,function(x) x$rt))
rt_rng <- if (is.null(rtrange)) def_rt_rng else clean_rtrange(def_rt_rng)
int_rng <- range(sapply(append(dfs_kids,dfs),function(x) x$intensity))
cols <- RColorBrewer::brewer.pal(n=length(dfs),name=pal)
lgnd <- Map(function(k,v) paste(k,"= ",formatC(v,format="f",digits=rt_digits),sep=''),symbs,rt_max)
layout(matrix(c(1,2,3,3), 2, 2, byrow = TRUE),
widths=c(7,8), heights=c(4,6))
struc_xr <- c(0,100)
struc_yr <- c(0,100)
par(mar=c(1,1,1,1))
plot(1,1,type="n",xlab="",ylab="",xlim=struc_xr,ylim=struc_yr,xaxt="n",yaxt="n")
rendersmiles2(osmesi[i],coords=c(struc_xr[1],struc_yr[1],struc_xr[2],struc_yr[2]))
col_eng <- c(0,100)
peak_int <- c(0,100)
par(mar=c(1,1,1,1))
plot(1,1,type="n",xlab="",ylab="",xlim=col_eng,ylim=peak_int,xaxt="n",yaxt="n",axes = FALSE)
linfo <- legend("topleft",horiz=T,legend=tags,col=cols,fill=cols,bty="n",cex=cex)
legend(x=linfo$rect$left,y=linfo$rect$top-0.5*linfo$rect$h,horiz=T,legend=lgnd,fill=cols,bty='n',cex=cex)
cols_kids <- cols[indkids]
lgnd_kids <- Map(function(k,v) paste(k,"= ",formatC(v,digits=rt_digits,format="f"),sep=''),symbs_kids,rt_max_kids)
if (length(lgnd_kids)>0) legend(x=linfo$rect$left,y=linfo$rect$top-1*linfo$rect$h,horiz=T,legend=lgnd_kids,fill=cols[indkids],bty="n",cex=cex)
par(mar=c(4,4,1,1))
plot(1,1,xlab="",ylab="",xlim = rt_rng,ylim = int_rng,type="n")
## Plot eic across the directory set.
for (n in seq(length(dfs))) {
df <- dfs[[n]]
col <- cols[[n]]
lines(df$intensity ~ df$rt,col=col)
}
if (length(dfs_kids) >0) {
for (k in 1:length(indkids)) {
lines(intensity ~ rt,data=dfs_kids[[k]],type="h",col=cols_kids[[k]])
}
}
title(main=paste("ID:",i,"Ion m:",formatC(masses[[i]],digits=m_digits,format="f")),xlab="retention time [min]",ylab="intensity")
for (k in seq(length(w_max))) text(rt_max[[k]],i_max[[k]],labels=symbs[[k]],pos=4,offset=0.5*k)
if (length(dfs_kids)>0) for (k in seq(length(w_max_kids))) text(rt_max_kids[[k]],i_max_kids[[k]],labels=symbs_kids[[k]],pos=4,offset=0.5*k)
axis(1)
axis(2)
## RChemMass::renderSMILES.rcdk(smiles[[i]],coords=c(x1,y1,x2,y2))
gc()
}
##' Plot the output of prescreen. ##' Plot the output of prescreen.
##' ##'
##' @title Plot the Output of Prescreen ##' @title Plot the Output of Prescreen
##' @param wd Sequence of data dirs containing the prescreen subdir. ##' @param prescdf File table data-frame. See presc.shiny for details.
##' @param mode RMB mode. ##' @param mode RMB mode.
##' @param out The name of the output file. ##' @param out The name of the output file.
##' @param fn_cmpd_l The compound list name.
##' @param pal ColorBrewer palette name. ##' @param pal ColorBrewer palette name.
##' @param cex As in legend. ##' @param cex As in legend.
##' @param rt_digits Number of digits after the point for the retention time. ##' @param rt_digits Number of digits after the point for the retention time.
##' @param m_digits Number of digits after the point for the mass. ##' @param m_digits Number of digits after the point for the mass.
##' @param wd Sequence of data dirs containing the prescreen subdir.
##' @param digits Number of significant digits for peak ret times. ##' @param digits Number of significant digits for peak ret times.
##' @return Nothing useful. ##' @return Nothing useful.
##' @author Todor Kondić ##' @author Todor Kondić
##' @author Mira Narayanan
##' @author Anjana Elapavalore
##' @export ##' @export
presc.plot <- function(wd,mode,out="prescreen.pdf",pal="Dark2",cex=0.75,rt_digits=2,m_digits=4) { presc.plot <- function(prescdf,mode,out="prescreen.pdf",fn_cmpd_l,pal="Dark2",cex=0.75,rt_digits=2,m_digits=4) {
modemap=list(pH="MpHp_mass", modemap=list(pH="MpHp_mass",
mH="MmHm_mass", mH="MmHm_mass",
blahnh4="MpNH4_mass", blahnh4="MpNH4_mass",
blahna="MpNa_mass") blahna="MpNa_mass")
dfdir <- file.path(wd,"prescreen")
tags <- levels(factor(prescdf$tag))
wd <- prescdf$wd[match(tags,prescdf$tag)]
wd1 <- wd[[1]] wd1 <- wd[[1]]
df <- read.csv(file=get_cmpd_l_fn(wd1),stringsAsFactors = F) df <- read.csv(file=fn_cmpd_l,stringsAsFactors = F)
osmesi <- df$SMILES osmesi <- df$SMILES
no_cmpds <- length(osmesi) no_cmpds <- length(osmesi)
# reconf(wd1) # reconf(wd1)
...@@ -395,180 +493,28 @@ presc.plot <- function(wd,mode,out="prescreen.pdf",pal="Dark2",cex=0.75,rt_digit ...@@ -395,180 +493,28 @@ presc.plot <- function(wd,mode,out="prescreen.pdf",pal="Dark2",cex=0.75,rt_digit
# return(osmesi) # return(osmesi)
## Get the basenames of eic files. ## Get the basenames of eic files.
eics <- list.files(path=dfdir[[1]],patt=".*eic.csv") eics <- list.files(path=wd[[1]],patt=".*eic.csv")
maybekids <- sapply(strsplit(eics,split="\\."),function(x) {paste(x[[1]][1],'.kids.csv',sep='')}) maybekids <- sapply(strsplit(eics,split="\\."),function(x) {paste(x[[1]][1],'.kids.csv',sep='')})
pdf(out) pdf(out)
for (i in seq(length(eics))) { for (i in 1:length(osmesi)) plot_id_aux(i=i,wd=wd,eics=eics,maybekids=maybekids,masses=masses,osmesi=osmesi,tags=tags,rtrange=rtrange,cex=cex,pal=pal,rt_digits=rt_digits,m_digits=m_digits)
eic <- eics[[i]]
maybekid <- maybekids[[i]]
fn_ini <- lapply(wd,get_stgs_fn)
lbls <- lapply(fn_ini,function(x) {s <- yaml::yaml.load_file(x);s$prescreen$tag})
dfs <- lapply(file.path(dfdir,eic),function(fn) {
tryCatch(read.csv(fn,stringsAsFactors = F),
error=function(e) {message(paste(e,"; offending file:",fn))})
})
dfs <- lapply(dfs,function(x) data.frame(rt=x$rt/60.,intensity=x$intensity))
## Find existing children.
maybes <- file.path(dfdir,maybekid)
indkids <- which(file.exists(maybes))
kids <- maybes[indkids]
dfs_kids <- lapply(kids,read.csv,stringsAsFactors=F)
dfs_kids <- lapply(dfs_kids,function(x) data.frame(rt=x$retentionTime/60.,intensity= -x$intensity))
## Find max intensities.
w_max <- sapply(dfs,function (x) which.max(x$intensity))
rt_max <- Map(function(df,w) df$rt[[w]],dfs,w_max)
i_max<- Map(function(df,w) df$intensity[[w]],dfs,w_max)
symbs <- LETTERS[1:length(w_max)]
## Find max intensities in children
w_max_kids <- sapply(dfs_kids,function (x) which.max(abs(x$intensity)))
rt_max_kids <- Map(function(df,w) df$rt[[w]],dfs_kids,w_max_kids)
i_max_kids <- Map(function(df,w) df$intensity[[w]],dfs_kids,w_max_kids)
symbs_kids<- letters[indkids]
rt_rng <- range(sapply(dfs,function(x) x$rt))
int_rng <- range(sapply(append(dfs_kids,dfs),function(x) x$intensity))
cols <- RColorBrewer::brewer.pal(n=length(dfs),name=pal)
lgnd <- Map(function(k,v) paste(k,"= ",formatC(v,format="f",digits=rt_digits),sep=''),symbs,rt_max)
layout(matrix(c(1,2,3,3), 2, 2, byrow = TRUE),
widths=c(7,8), heights=c(6,6))
struc_xr <- c(0,100)
struc_yr <- c(0,100)
plot(1,1,type="n",xlab="",ylab="",xlim=struc_xr,ylim=struc_yr,xaxt="n",yaxt="n")
rendersmiles2(osmesi[i],coords=c(struc_xr[1],struc_yr[1],struc_xr[2],struc_yr[2]))
col_eng <- c(0,100)
peak_int <- c(0,100)
plot(1,1,type="n",xlab="",ylab="",xlim=col_eng,ylim=peak_int,xaxt="n",yaxt="n",axes = FALSE)
linfo <- legend("topleft",horiz=T,legend=lbls,col=cols,fill=cols,bty="n",cex=cex)
legend(x=linfo$rect$left,y=linfo$rect$top-0.5*linfo$rect$h,horiz=T,legend=lgnd,fill=cols,bty='n',cex=cex)
cols_kids <- cols[indkids]
lgnd_kids <- Map(function(k,v) paste(k,"= ",formatC(v,digits=rt_digits,format="f"),sep=''),symbs_kids,rt_max_kids)
if (length(lgnd_kids)>0) legend(x=linfo$rect$left,y=linfo$rect$top-1*linfo$rect$h,horiz=T,legend=lgnd_kids,fill=cols[indkids],bty="n",cex=cex)
plot(1,1,xlab="",ylab="",xlim = rt_rng,ylim = int_rng,type="n")
## Plot eic across the directory set.
for (n in seq(length(dfs))) {
df <- dfs[[n]]
col <- cols[[n]]
lines(df$intensity ~ df$rt,col=col)
}
if (length(dfs_kids) >0) {
for (k in 1:length(indkids)) {
lines(intensity ~ rt,data=dfs_kids[[k]],type="h",col=cols_kids[[k]])
}
}
title(main=paste("ID:",i,"Ion m:",formatC(masses[[i]],digits=m_digits,format="f")),xlab="retention time [min]",ylab="intensity")
for (k in seq(length(w_max))) text(rt_max[[k]],i_max[[k]],labels=symbs[[k]],pos=4,offset=0.5*k)
if (length(dfs_kids)>0) for (k in seq(length(w_max_kids))) text(rt_max_kids[[k]],i_max_kids[[k]],labels=symbs_kids[[k]],pos=4,offset=0.5*k)
axis(1)
axis(2)
## RChemMass::renderSMILES.rcdk(smiles[[i]],coords=c(x1,y1,x2,y2))
gc()
}
dev.off() dev.off()
} }
##' Interface to vectorised Mass Bank workflow. mkUI <- function(idSliderRange,setName,rtRange,tags) {
##'
##'
##' @title Vectorised Mass Bank Workflow
##' @param mb List of mass bank workflow objects
##' @param infodir List of subdirs containing info lists.
##' @param fn_stgs List of settings files.
##' @return A named list of mbWorkspace objects. The names are derived
##' from the input mb sequence.
##' @author Todor Kondić
mb.v<-function(mb,infodir,fn_stgs) {
f<-Vectorize(mb.single,vectorize.args=c("mb","infodir","fn_stgs"),SIMPLIFY=F)
x<-f(mb,infodir,fn_stgs)
names(x)<-names(mb)
x}
##' Interface to parallelised Mass Bank workflow.
##'
##'
##' @title Parallel Mass Bank Workflow
##' @param mb List of mass bank workflow objects
##' @param infodir List of subdirs containing info lists.
##' @param fn_stgs List of settings files.
##' @param cl Cluster.
##' @return A named list of mbWorkspace objects. The names are derived
##' from the input mb sequence.
##' @author Todor Kondić
mb.p<-function(mb,infodir,fn_stgs,cl=F) {
x<-parallel::clusterMap(cl=cl,mb.single,mb,infodir,fn_stgs)
names(x)<-names(mb)
x}
##' Prescreening using shiny interface.
##'
##' @title Prescreening with Shiny
##' @return Nothing useful.
##' @author Jessy Krier
##' @author Mira Narayanan
presc.shiny <-function(wd,mode,pal="Dark2",cex=0.75,rt_digits=2,m_digits=4,prescdf){
modemap=list(pH="MpHp_mass",
mH="MmHm_mass",
blahnh4="MpNH4_mass",
blahna="MpNa_mass")
DEFAULT_RT_RANGE=c(0,40)
default_min_rt=DEFAULT_RT_RANGE[1]
default_max_rt=DEFAULT_RT_RANGE[2]
dfdir <- file.path(wd,"prescreen")
wd1 <- wd[[1]]
df <- read.csv(file=get_cmpd_l_fn(wd1),stringsAsFactors = F)
osmesi <- df$SMILES
no_cmpds <- length(osmesi)
# reconf(wd1)
masses <- lapply(osmesi,function (smile) {
#osmesi <- tryCatch(RMassBank::findSmiles(i), error = function(e) NA)
zz <- RChemMass::getSuspectFormulaMass(smile)
zz[[modemap[[mode]]]]
})
for (col in c("MS1","MS2","Alignment","Intensity","AboveNoise","Comments")) {
if (is.null(prescdf[[col]])) prescdf[[col]] <- rep(T,length(prescdf$ID))
}
#message("Masses:",masses)
# return(osmesi)
## Get the basenames of eic files.
eics <- list.files(path=dfdir[[1]],patt=".*eic.csv")
maybekids <- sapply(strsplit(eics,split="\\."),function(x) {paste(x[[1]][1],'.kids.csv',sep='')})
tags <- levels(factor(prescdf$tag))
spectProps <- sapply(tags,function (tag) paste("spectProps",tag,sep=""))
idsliderrange <- range(df$ID)
tabPanelList <- lapply(tags, function(tag) { tabPanelList <- lapply(tags, function(tag) {
shiny::tabPanel(tag, shiny::checkboxGroupInput(paste("spectProps",tags,sep=""), "Quality Control", shiny::tabPanel(tag, shiny::checkboxGroupInput(paste("spectProps",tag,sep=""), "Quality Control",
c("MS1" = T, c(MS1 = "MS1",
"MS2" = T, MS2 = "MS2",
"Alignment" = T, Alignment = "Alignment",
"Intensity" = T, Intensity = "Intensity",
"AboveNoise" = T)), AboveNoise = "AboveNoise")),
shiny::textAreaInput(paste("caption",tag,sep=""), "Comments:", "Insert your comment here..."), shiny::textAreaInput(paste("caption",tag,sep=""), "Comments:", "Insert your comment here..."),
shiny::verbatimTextOutput(paste("value",tag,sep="")))}) shiny::verbatimTextOutput(paste("value",tag,sep=""))
)})
nvp <- do.call(shiny::navlistPanel, tabPanelList) nvPanel <- do.call(shiny::navlistPanel, tabPanelList)
ui <- shinydashboard::dashboardPage( ui <- shinydashboard::dashboardPage(
shinydashboard::dashboardHeader(title = "Prescreening"), shinydashboard::dashboardHeader(title = "Prescreening"),
...@@ -600,119 +546,115 @@ presc.shiny <-function(wd,mode,pal="Dark2",cex=0.75,rt_digits=2,m_digits=4,presc ...@@ -600,119 +546,115 @@ presc.shiny <-function(wd,mode,pal="Dark2",cex=0.75,rt_digits=2,m_digits=4,presc
), ),
shinydashboard::box( shinydashboard::box(
title = "Compounds", width=5,solidHeader = TRUE, collapsible = TRUE, "", shiny::br(), title = "Compounds", width=5,solidHeader = TRUE, collapsible = TRUE, "", shiny::br(),
shiny::sliderInput("idslider", "Compound number:", idsliderrange[1], idsliderrange[2], value=1,step=1) shiny::sliderInput("idslider", "Compound number:", idSliderRange[1], idSliderRange[2], value=1,step=1)
), ),
shinydashboard::box( shinydashboard::box(
title = "Plot x axis range", width = 5, solidHeader = TRUE, collapsible = TRUE, title = "Plot x axis range", width = 5, solidHeader = TRUE, collapsible = TRUE,
shiny::numericInput("min_val", "Minimum x Axis Value", default_min_rt), shiny::numericInput("min_val", "Minimum x Axis Value", rtRange[1]),
shiny::numericInput("max_val", "Maximum x Axis Value", default_max_rt) shiny::numericInput("max_val", "Maximum x Axis Value", rtRange[2])
), ),
shinydashboard::box( shinydashboard::box(
title = "Prescreening analysis", width = 5, solidHeader = TRUE, collapsible = TRUE, title = "Prescreening analysis", width = 5, solidHeader = TRUE, collapsible = TRUE,
shiny::titlePanel(prescdf$set_name), shiny::titlePanel(setName),
shiny::uiOutput("nvp"), nvPanel,
shiny::actionButton("submitQA", "Submit", icon = shiny::icon("save")) shiny::actionButton("submitQA", "Submit", icon = shiny::icon("save")),
shiny::textInput("fn_ftable", "File table Name",value="ftable.csv"),
shiny::actionButton("savefiletable", "Save File Table")
) )
) )
) )
) )}
plotall <- function(i,rtrange) { ##' Prescreening using shiny interface.
eic <- eics[[i]] ##'
maybekid <- maybekids[[i]] ##' @title Prescreening with Shiny
fn_ini <- lapply(wd,get_stgs_fn) ##' @return Nothing useful.
##' @author Jessy Krier
lbls <- lapply(fn_ini,function(x) {s <- yaml::yaml.load_file(x);s$prescreen$tag}) ##' @author Mira Narayanan
dfs <- lapply(file.path(dfdir,eic),function(fn) { ##' @author Hiba Mohammed Taha
tryCatch(read.csv(fn,stringsAsFactors = F), ##' @author Anjana Elapavalore
error=function(e) {message(paste(e,"; offending file:",fn))}) ##' @author Todor Kondić
}) ##' @param prescdf File table data-frame. Columns: Files,ID,wd,tag,set_name ...
##' @param mode RMassBank mode.
dfs <- lapply(dfs,function(x) data.frame(rt=x$rt/60.,intensity=x$intensity)) ##' @param fn_cmpd_l Compound list file name.
##' @param pal ColorBrewer palette.
##' @param cex Size of fonts.
##' @param rt_digits Number of decimal places for the retention time.
##' @param m_digits Number of decimal places for the mass.
##' @export
presc.shiny <-function(prescdf,mode,fn_cmpd_l,pal="Dark2",cex=0.75,rt_digits=2,m_digits=4){
## Helper functions
queryFileTable <- function(df,id) {
df[df$ID %in% id,]
}
## Find existing children.
maybes <- file.path(dfdir,maybekid)
indkids <- which(file.exists(maybes))
kids <- maybes[indkids]
dfs_kids <- lapply(kids,read.csv,stringsAsFactors=F)
dfs_kids <- lapply(dfs_kids,function(x) data.frame(rt=x$retentionTime/60.,intensity= -x$intensity))
updateFileTable <- function(df,id,linput) {
for (tag in names(linput)) {
entries <- names(linput[[tag]])
cond <- (df$ID %in% id) & (df$tag == tag)
df[cond,entries] <- linput[[tag]]
}
df
}
## Find max intensities. ## Constants
w_max <- sapply(dfs,function (x) which.max(x$intensity)) MODEMAP=list(pH="MpHp_mass",
rt_max <- Map(function(df,w) df$rt[[w]],dfs,w_max) mH="MmHm_mass",
i_max<- Map(function(df,w) df$intensity[[w]],dfs,w_max) blahnh4="MpNH4_mass",
symbs <- LETTERS[1:length(w_max)] blahna="MpNa_mass")
DEFAULT_RT_RANGE=c(NA,NA)
## Find max intensities in children QANAMES <- c("MS1","MS2","Alignment","Intensity","AboveNoise")
w_max_kids <- sapply(dfs_kids,function (x) which.max(abs(x$intensity)))
rt_max_kids <- Map(function(df,w) df$rt[[w]],dfs_kids,w_max_kids)
i_max_kids <- Map(function(df,w) df$intensity[[w]],dfs_kids,w_max_kids)
symbs_kids<- letters[indkids]
tags <- levels(factor(prescdf$tag))
wd <- prescdf$wd[match(tags,prescdf$tag)]
rt_rng <- rtrange #range(sapply(dfs,function(x) x$rt))
int_rng <- range(sapply(append(dfs_kids,dfs),function(x) x$intensity)) wd1 <- wd[[1]]
cols <- RColorBrewer::brewer.pal(n=length(dfs),name=pal) cmpd_l_df <- read.csv(file=fn_cmpd_l,stringsAsFactors = F)
lgnd <- Map(function(k,v) paste(k,"= ",formatC(v,format="f",digits=rt_digits),sep=''),symbs,rt_max) osmesi <- cmpd_l_df$SMILES
no_cmpds <- length(osmesi)
layout(matrix(c(1,2,3,3), 2, 2, byrow = TRUE), # reconf(wd1)
widths=c(7,8), heights=c(6,6)) masses <- lapply(osmesi,function (smile) {
struc_xr <- c(0,100) #osmesi <- tryCatch(RMassBank::findSmiles(i), error = function(e) NA)
struc_yr <- c(0,100) zz <- RChemMass::getSuspectFormulaMass(smile)
plot(1,1,type="n",xlab="",ylab="",xlim=struc_xr,ylim=struc_yr,xaxt="n",yaxt="n") zz[[MODEMAP[[mode]]]]
rendersmiles2(osmesi[i],coords=c(struc_xr[1],struc_yr[1],struc_xr[2],struc_yr[2])) })
col_eng <- c(0,100)
peak_int <- c(0,100)
plot(1,1,type="n",xlab="",ylab="",xlim=col_eng,ylim=peak_int,xaxt="n",yaxt="n",axes = FALSE)
linfo <- legend("topleft",horiz=T,legend=lbls,col=cols,fill=cols,bty="n",cex=cex)
legend(x=linfo$rect$left,y=linfo$rect$top-0.5*linfo$rect$h,horiz=T,legend=lgnd,fill=cols,bty='n',cex=cex)
cols_kids <- cols[indkids]
lgnd_kids <- Map(function(k,v) paste(k,"= ",formatC(v,digits=rt_digits,format="f"),sep=''),symbs_kids,rt_max_kids)
if (length(lgnd_kids)>0) legend(x=linfo$rect$left,y=linfo$rect$top-1*linfo$rect$h,horiz=T,legend=lgnd_kids,fill=cols[indkids],bty="n",cex=cex)
plot(1,1,xlab="",ylab="",xlim = rt_rng,ylim = int_rng,type="n")
## Plot eic across the directory set.
for (n in seq(length(dfs))) {
df <- dfs[[n]]
col <- cols[[n]]
lines(df$intensity ~ df$rt,col=col)
}
if (length(dfs_kids) >0) { for (col in c("MS1","MS2","Alignment","Intensity","AboveNoise","Comments")) {
for (k in 1:length(indkids)) { if (is.null(prescdf[[col]])) prescdf[[col]] <- T
lines(intensity ~ rt,data=dfs_kids[[k]],type="h",col=cols_kids[[k]])
}
}
title(main=paste("ID:",i,"Ion m:",formatC(masses[[i]],digits=m_digits,format="f")),xlab="retention time [min]",ylab="intensity")
for (k in seq(length(w_max))) text(rt_max[[k]],i_max[[k]],labels=symbs[[k]],pos=4,offset=0.5*k)
if (length(dfs_kids)>0) for (k in seq(length(w_max_kids))) text(rt_max_kids[[k]],i_max_kids[[k]],labels=symbs_kids[[k]],pos=4,offset=0.5*k)
axis(1)
axis(2)
## RChemMass::renderSMILES.rcdk(smiles[[i]],coords=c(x1,y1,x2,y2))
gc()
} }
clean_rtrange <- function(rtrange) {
x1 <- rtrange[1]
x2 <- rtrange[2]
if (is.na(x1)) x1 <- default_min_rt
if (is.na(x2)) x2 <- default_max_rt
c(x1,x2)
}
captureQA <- function() {
QAlist <- list() ## Get the basenames of eic files.
list(add=function (entry) QAlist[[length(QAlist)+1]]<<-entry, eics <- list.files(path=wd[[1]],patt=".*eic.csv")
get=function() QAlist) maybekids <- sapply(strsplit(eics,split="\\."),function(x) {paste(x[[1]][1],'.kids.csv',sep='')})
plot_id <- function (i,rtrange) plot_id_aux(i=i,wd=wd,eics=eics,maybekids=maybekids,masses=masses,osmesi=osmesi,tags=tags,rtrange=rtrange,cex=cex,pal=pal,rt_digits=rt_digits,m_digits=m_digits)
spectProps <- sapply(tags,function (tag) paste("spectProps",tag,sep=""))
idSliderRange <- range(cmpd_l_df$ID)
ui <- mkUI(idSliderRange=idSliderRange,setName=prescdf$set_name,rtRange=DEFAULT_RT_RANGE,tags=tags)
getCheckboxValues <- function(tag,input) {
chkbox <- input[[spectProps[[tag]]]]
q <- sapply(QANAMES,function (qn) if (qn %in% chkbox) T else F)
names(q) <- QANAMES
q
} }
server <- function(input, output, session) { server <- function(input, output, session) {
rv <- shiny::reactiveValues(prescList=list(), rv <- shiny::reactiveValues(prescList=list(),
prescdf=prescdf, prescdf=prescdf,
...@@ -726,7 +668,7 @@ presc.shiny <-function(wd,mode,pal="Dark2",cex=0.75,rt_digits=2,m_digits=4,presc ...@@ -726,7 +668,7 @@ presc.shiny <-function(wd,mode,pal="Dark2",cex=0.75,rt_digits=2,m_digits=4,presc
i=input$idslider i=input$idslider
rtrange <- c(input$min_val,input$max_val) rtrange <- c(input$min_val,input$max_val)
plotall(i,rtrange=clean_rtrange(rtrange)) plot_id(i,rtrange=rtrange)
}) })
output$value <- renderText( output$value <- renderText(
...@@ -742,48 +684,50 @@ presc.shiny <-function(wd,mode,pal="Dark2",cex=0.75,rt_digits=2,m_digits=4,presc ...@@ -742,48 +684,50 @@ presc.shiny <-function(wd,mode,pal="Dark2",cex=0.75,rt_digits=2,m_digits=4,presc
shiny::observeEvent(input$saveplot, shiny::observeEvent(input$saveplot,
{ {
i=input$idslider i=input$idslider
message("Save plot button pressed.")
pfn <-input$plotname pfn <-input$plotname
if (is.na(pfn)) pfn <- "plotCpdID_%i.pdf" if (is.na(pfn)) pfn <- "plotCpdID_%i.pdf"
fn <- sprintf(pfn,i) fn <- sprintf(pfn,i)
rtrange <- c(input$min_val,input$max_val) rtrange <- c(input$min_val,input$max_val)
pdf(file=fn, width=12, height=8) pdf(file=fn, width=12, height=8)
plotall(i,rtrange=clean_rtrange(rtrange)) plot_id(i,rtrange=rtrange)
dev.off() dev.off()
}) })
shiny::observeEvent(input$saveallplots, shiny::observeEvent(input$saveallplots,
{ {
i=input$idslider i=input$idslider
message("Save plot button pressed.")
pfn <-input$plotname pfn <-input$plotname
if (is.na(pfn)) pfn <- "plotCpdID_%i.pdf" if (is.na(pfn)) pfn <- "plotall.pdf"
fn <- sprintf(pfn,i) fn <- sprintf(pfn,i)
rtrange <- c(input$min_val,input$max_val)
pdf(file=fn, width=12, height=8) pdf(file=fn, width=12, height=8)
for (i in 1:rv$no_cmpds) { for (i in 1:rv$no_cmpds) {
plotall(i,rtrange=rv$default_range) plot_id(i)
message("Compound ID ",i," done.") message("Compound ID ",i," done.")
} }
dev.off() dev.off()
}) })
shiny::observeEvent(input$idslider,{
i <- input$idslider
tag <- rv$prescdf$tag[[i]]
})
shiny::observeEvent(input$submitQA,{ shiny::observeEvent(input$submitQA,{
res <- lapply(rv$tags,getCheckboxValues,input)
names(res) <- rv$tags
rv$prescdf <- updateFileTable(df=rv$prescdf,id=input$idslider,linput=res)
}) })
shiny::observe({
i <- input$idslider
sdf <- queryFileTable(df=rv$prescdf,id=i)
for (t in sdf$tag) {
sprop <- rv$spectProps[[t]]
sel <- as.logical(sdf[sdf$tag %in% t,QANAMES])
choices <- QANAMES[sel]
names(choices) <- QANAMES[sel]
shiny::updateCheckboxGroupInput(session = session,inputId = sprop,selected=choices)
}
})
output$nvp <- shiny::renderUI( shiny::observeEvent(input$savefiletable,
{ {
nvp write.csv(file=input$fn_ftable,x=rv$prescdf,row.names = F)
}) })
......
...@@ -7,7 +7,6 @@ ...@@ -7,7 +7,6 @@
##' @author Todor Kondić ##' @author Todor Kondić
attch<-function(...) paste(...,sep='') attch<-function(...) paste(...,sep='')
##' Do the prescreening. ##' Do the prescreening.
##' ##'
##' @title Prescreening on bunch of files. ##' @title Prescreening on bunch of files.
...@@ -44,140 +43,3 @@ presc.do<-function(fn_data,fn_cmpd_l,mode,dest=".",proc=F,...) { ...@@ -44,140 +43,3 @@ presc.do<-function(fn_data,fn_cmpd_l,mode,dest=".",proc=F,...) {
} }
} }
##' Performs massbank workflow on multiple mzML files:
##'
##'
##' 1. Create rdir if it does not exist.
##'
##' 2. Generate data subdirectories inside `rdir` that will be used to
##' keep the processed data belonging to a particular mzML file in
##' the fileset.
##'
##' 3. Generate the full RMassBank settings files by merging the
##' sections from the `fn_cmpd_list` with the defaults and place those
##' new settings files in the appropriate data subdirs.
##'
##' 4. Finally perform the spectral workflow on each data file and
##' place the results in the data subdirs.
##'
##' @title Perform MassBank Workflow on Multiple Compound Mixtures
##' @param fn_data List of mzML data filenames to be processed.
##' @param fn_cmpd_l Compound list.
##' @param mode as in msmsRead.
##' @param dest The destination data directory.
##' @param combine If TRUE, use combineMultiplicies to merge
##' workspaces corresponding to different collisional energies.
##' @param proc Split work between this amount of processes. If FALSE
##' (or, 1), run sequential.
##' @param split This is the last step before combine
##' @return A named list of msmsWorkspace objects.
##' @author Todor Kondić
##' @export
sw.do <- function(fn_data, fn_cmpd_l, mode, dest=".", combine=F,
proc=F,split=3) {
conf(fn_data,fn_cmpd_l,dest)
fread <- function(fn_data) {
wd <- fn_data2wd(fn_data,dest)
reconf(wd)
w <- RMassBank::newMsmsWorkspace()
RMassBank::msmsRead(w=w,filetable = get_ftable_fn(wd),
mode=mode,readMethod = "mzR")
}
fwork <- Vectorize(function(w,wd,steps) {
archdir <- file.path(wd,"archive")
no_drama_mkdir(archdir)
fn_arch <- file.path(archdir,"archive")
reconf(wd)
RMassBank::msmsWorkflow(w=w,mode=mode,steps=steps,archivename = fn_arch)
}, vectorize.args = c("w","wd"),SIMPLIFY=F)
w <- if (proc) {
cl=parallel::makeCluster(proc)
parallel::clusterEvalQ(cl,library(shinyscreen))
parallel::clusterMap(cl,fread,fn_data)
} else {
lapply(fn_data,fread)
}
wd <- fn_data2wd(fn_data,dest)
w <- fwork(w,wd,steps=2:split)
if (combine) {
## Combined workflow is not based on a single file, but the
## functions that generate config are. Therefore, lets create
## a fake filename.
fakefile <- "combine.mzML"
cwd <- fn_data2wd(fakefile,dest)
xx <- get_stgs_fn(wd[[1]])
file.copy(xx,"combine.ini",overwrite = T)
# mk_combine_file(get_stgs_fn(wd),"combine.ini")
conf(fakefile,fn_cmpd_l,dest)
reconf(cwd)
w <- list(RMassBank::combineMultiplicities(w))
wd <- list(cwd)
}
w <- fwork(w,wd,steps=(split+1):8)
names(w) <- wd
w
}
##' Creates and prepares mbWorkspace objects before the full workflow
##' is performed on them. In process, create directory `info` as a
##' subdir of any particular data dir and place the starting info list
##' there.
##'
##'
##' @title Prepare mbWorkspace objects
##' @param w A list of spectral workspace inputs.
##' @return Named list of prepared mbWorkspace objects.
##' @author Todor Kondić
##' @export
mb.prep<-function(w) {
wd <- names(w)
fwork <- Vectorize(function(w,wd) {
reconf(wd)
idir <- gen_info_dir(wd)
mb <- RMassBank::newMbWorkspace(w)
RMassBank::resetInfolists(mb)
RMassBank::mbWorkflow(mb,infolist_path = get_info_fn(wd))
},vectorize.args = c("w","wd"))
mb <- fwork(w,wd)
names(mb) <- wd
mb
}
##' Perform the Mass Bank workflow on the prepared mbWorkspace
##' objects. FOR THE RECORD: writing documentation is boring. Anyway,
##' record results in the info subfolder of each data subdir.
##'
##'
##' @title Perform the Mass Bank workflow
##' @param mb The list of prepared mbWorkspace objects.
##' @param proc Split work between this amount of processes. If FALSE
##' (or, 1), run sequential.
##' @return The named list of processed mbWorkspace objects.
##' @author Todor Kondić
##' @export
mb.do<-function(mb,proc=F) {
wd <- names(mb)
infodir <- get_info_dir(wd)
fwork <- Vectorize(function(mb,id,wd) {
reconf(wd)
mb <- RMassBank::resetInfolists(mb)
mb <- RMassBank::loadInfolists(mb,id)
dcur <- setwd(id)
mb <- RMassBank::mbWorkflow(mb,step=1:8)
setwd(dcur)
mb},vectorize.args = c("mb","id","wd"))
if (proc) {
cl<-parallel::makeCluster(proc)
parallel::clusterEvalQ(cl,library("shinyscreen"))
parallel::clusterMap(cl,fwork,mb,infodir,wd)
} else {
fwork(mb,infodir,wd)
}
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/run.R
\name{mb.do}
\alias{mb.do}
\title{Perform the Mass Bank workflow}
\usage{
mb.do(mb, proc = F)
}
\arguments{
\item{mb}{The list of prepared mbWorkspace objects.}
\item{proc}{Split work between this amount of processes. If FALSE
(or, 1), run sequential.}
}
\value{
The named list of processed mbWorkspace objects.
}
\description{
Perform the Mass Bank workflow on the prepared mbWorkspace
objects. FOR THE RECORD: writing documentation is boring. Anyway,
record results in the info subfolder of each data subdir.
}
\author{
Todor Kondić
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mix.R
\name{mb.p}
\alias{mb.p}
\title{Parallel Mass Bank Workflow}
\usage{
mb.p(mb, infodir, fn_stgs, cl = F)
}
\arguments{
\item{mb}{List of mass bank workflow objects}
\item{infodir}{List of subdirs containing info lists.}
\item{fn_stgs}{List of settings files.}
\item{cl}{Cluster.}
}
\value{
A named list of mbWorkspace objects. The names are derived
from the input mb sequence.
}
\description{
Interface to parallelised Mass Bank workflow.
}
\author{
Todor Kondić
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/run.R
\name{mb.prep}
\alias{mb.prep}
\title{Prepare mbWorkspace objects}
\usage{
mb.prep(w)
}
\arguments{
\item{w}{A list of spectral workspace inputs.}
}
\value{
Named list of prepared mbWorkspace objects.
}
\description{
Creates and prepares mbWorkspace objects before the full workflow
is performed on them. In process, create directory \code{info} as a
subdir of any particular data dir and place the starting info list
there.
}
\author{
Todor Kondić
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mix.R
\name{mb.v}
\alias{mb.v}
\title{Vectorised Mass Bank Workflow}
\usage{
mb.v(mb, infodir, fn_stgs)
}
\arguments{
\item{mb}{List of mass bank workflow objects}
\item{infodir}{List of subdirs containing info lists.}
\item{fn_stgs}{List of settings files.}
}
\value{
A named list of mbWorkspace objects. The names are derived
from the input mb sequence.
}
\description{
Interface to vectorised Mass Bank workflow.
}
\author{
Todor Kondić
}
...@@ -4,16 +4,18 @@ ...@@ -4,16 +4,18 @@
\alias{presc.plot} \alias{presc.plot}
\title{Plot the Output of Prescreen} \title{Plot the Output of Prescreen}
\usage{ \usage{
presc.plot(wd, mode, out = "prescreen.pdf", pal = "Dark2", presc.plot(prescdf, mode, out = "prescreen.pdf", fn_cmpd_l,
cex = 0.75, rt_digits = 2, m_digits = 4) pal = "Dark2", cex = 0.75, rt_digits = 2, m_digits = 4)
} }
\arguments{ \arguments{
\item{wd}{Sequence of data dirs containing the prescreen subdir.} \item{prescdf}{File table data-frame. See presc.shiny for details.}
\item{mode}{RMB mode.} \item{mode}{RMB mode.}
\item{out}{The name of the output file.} \item{out}{The name of the output file.}
\item{fn_cmpd_l}{The compound list name.}
\item{pal}{ColorBrewer palette name.} \item{pal}{ColorBrewer palette name.}
\item{cex}{As in legend.} \item{cex}{As in legend.}
...@@ -22,6 +24,8 @@ presc.plot(wd, mode, out = "prescreen.pdf", pal = "Dark2", ...@@ -22,6 +24,8 @@ presc.plot(wd, mode, out = "prescreen.pdf", pal = "Dark2",
\item{m_digits}{Number of digits after the point for the mass.} \item{m_digits}{Number of digits after the point for the mass.}
\item{wd}{Sequence of data dirs containing the prescreen subdir.}
\item{digits}{Number of significant digits for peak ret times.} \item{digits}{Number of significant digits for peak ret times.}
} }
\value{ \value{
...@@ -32,4 +36,8 @@ Plot the output of prescreen. ...@@ -32,4 +36,8 @@ Plot the output of prescreen.
} }
\author{ \author{
Todor Kondić Todor Kondić
Mira Narayanan
Anjana Elapavalore
} }
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mix.R
\name{presc.shiny}
\alias{presc.shiny}
\title{Prescreening with Shiny}
\usage{
presc.shiny(prescdf, mode, fn_cmpd_l, pal = "Dark2", cex = 0.75,
rt_digits = 2, m_digits = 4)
}
\arguments{
\item{prescdf}{File table data-frame. Columns: Files,ID,wd,tag,set_name ...}
\item{mode}{RMassBank mode.}
\item{fn_cmpd_l}{Compound list file name.}
\item{pal}{ColorBrewer palette.}
\item{cex}{Size of fonts.}
\item{rt_digits}{Number of decimal places for the retention time.}
\item{m_digits}{Number of decimal places for the mass.}
}
\value{
Nothing useful.
}
\description{
Prescreening using shiny interface.
}
\author{
Jessy Krier
Mira Narayanan
Hiba Mohammed Taha
Anjana Elapavalore
Todor Kondić
}
...@@ -4,11 +4,13 @@ ...@@ -4,11 +4,13 @@
\alias{rendersmiles2} \alias{rendersmiles2}
\title{Turn SMILES to an Image Using Online Resource} \title{Turn SMILES to an Image Using Online Resource}
\usage{ \usage{
rendersmiles2(smiles, ...) rendersmiles2(smiles, style = "cow", ...)
} }
\arguments{ \arguments{
\item{smiles}{The SMILES string.} \item{smiles}{The SMILES string.}
\item{style}{Structure style.}
\item{...}{Hand over to renderurl.} \item{...}{Hand over to renderurl.}
} }
\value{ \value{
......
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/run.R
\name{sw.do}
\alias{sw.do}
\title{Perform MassBank Workflow on Multiple Compound Mixtures}
\usage{
sw.do(fn_data, fn_cmpd_l, mode, dest = ".", combine = F, proc = F,
split = 3)
}
\arguments{
\item{fn_data}{List of mzML data filenames to be processed.}
\item{fn_cmpd_l}{Compound list.}
\item{mode}{as in msmsRead.}
\item{dest}{The destination data directory.}
\item{combine}{If TRUE, use combineMultiplicies to merge
workspaces corresponding to different collisional energies.}
\item{proc}{Split work between this amount of processes. If FALSE
(or, 1), run sequential.}
\item{split}{This is the last step before combine}
}
\value{
A named list of msmsWorkspace objects.
}
\description{
Performs massbank workflow on multiple mzML files:
}
\details{
\enumerate{
\item Create rdir if it does not exist.
\item Generate data subdirectories inside \code{rdir} that will be used to
keep the processed data belonging to a particular mzML file in
the fileset.
\item Generate the full RMassBank settings files by merging the
sections from the \code{fn_cmpd_list} with the defaults and place those
new settings files in the appropriate data subdirs.
\item Finally perform the spectral workflow on each data file and
place the results in the data subdirs.
}
}
\author{
Todor Kondić
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment