Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • eci/shinyscreen
  • miroslav.kratochvil/shinyscreen
2 results
Show changes
Commits on Source (1063)
Showing with 2611 additions and 27 deletions
.git
/.Renviron
/.Rhistory
# Copyright (C) 2023 by University of Luxembourg
# Summary
#
# This is a file which describes CI/CD pipeline for Shinyscreen. The
# main goal is to test Shinyscreen and create appropriately versioned
# Docker images on whichever container registry is used.
#
# There are three stages: _build_, _test_ and _release_. Jobs
# associated with build and test stages should run an all
# commits. Release jobs run exclusively on tagged commits of vX.Y.Z
# format.
#
# Job tags _docker_ and _$RUNNER_TAG_ serve to inform GitLab which
# gitlab-runners (programs which perform jobs) are suitable to pick up
# the pipeline.
#
# The pipeline itself is following the paradigm of docker-in-docker
# build. In short, it helps having reproducible builds and
# tests. There is more documentation available online.
image: docker:23.0.1
services:
- docker:23.0.1-dind
stages:
- dep_images
- build
- test
- release
variables:
DOCKER_TLS_CERTDIR: "/certs"
DEP_ROOT: $CI_REGISTRY_IMAGE/dep
BASE_IMAGE: $CI_REGISTRY_IMAGE/dep/base:latest
SSUSER_IMAGE: $CI_REGISTRY_IMAGE/dep/ssuser:latest
ISB401_BASE_IMAGE: $CI_REGISTRY_IMAGE/dep/isb401_base:latest
TEST_IMAGE: $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG
RELEASE_IMAGE: $CI_REGISTRY_IMAGE:$CI_COMMIT_TAG
ISB401_TEST_IMAGE: $CI_REGISTRY_IMAGE:isb401_$CI_COMMIT_REF_SLUG
ISB401_RELEASE_IMAGE: $CI_REGISTRY_IMAGE:isb401_$CI_COMMIT_TAG
base-image:
stage: dep_images
rules:
- if: $CI_COMMIT_TAG == ""
changes:
- docker/base/Dockerfile
script:
- cd docker/base
- docker build --pull -t $BASE_IMAGE .
- docker push $BASE_IMAGE
ssuser-image:
tags:
- docker
- $RUNNER_TAG
stage: dep_images
needs:
- job: base-image
optional: true
rules:
- if: $CI_COMMIT_TAG == ""
changes:
- docker/ssuser/Dockerfile
script:
- cd docker/ssuser
- docker build --pull -t $SSUSER_IMAGE .
- docker push $SSUSER_IMAGE
isb401-base-image:
tags:
- docker
- $RUNNER_TAG
stage: dep_images
needs:
- job: ssuser-image
optional: true
rules:
- if: $CI_COMMIT_TAG == ""
changes:
- docker/isb401_base/Dockerfile
script:
- cd docker/isb401_base
- docker build --pull -t $ISB401_BASE_IMAGE .
- docker push $ISB401_BASE_IMAGE
build:
tags:
- docker
- $RUNNER_TAG
stage: build
script:
- docker build --pull -t $TEST_IMAGE .
- docker push $TEST_IMAGE
build-isb401:
tags:
- docker
- $RUNNER_TAG
stage: build
needs:
- job: build
script:
- cp docker/isb401/Dockerfile .
- docker build --pull -t $ISB401_TEST_IMAGE .
- docker push $ISB401_TEST_IMAGE
test:
tags:
- docker
- $RUNNER_TAG
stage: test
script:
- docker pull $TEST_IMAGE
- docker run $TEST_IMAGE test
test-isb401:
tags:
- docker
- $RUNNER_TAG
stage: test
script:
- docker pull $ISB401_TEST_IMAGE
- docker run $ISB401_TEST_IMAGE test
release-image:
tags:
- docker
- $RUNNER_TAG
stage: release
script:
- docker pull $TEST_IMAGE
- docker tag $TEST_IMAGE $RELEASE_IMAGE
- docker push $RELEASE_IMAGE
rules:
- if: $CI_COMMIT_TAG =~ /^v[0-9]+.[0-9]+.[0-9]+/
release-isb401:
tags:
- docker
- $RUNNER_TAG
stage: release
script:
- docker pull $ISB401_TEST_IMAGE
- docker tag $ISB401_TEST_IMAGE $ISB401_RELEASE_IMAGE
- docker push $ISB401_RELEASE_IMAGE
rules:
- if: $CI_COMMIT_TAG =~ /^v[0-9]+.[0-9]+.[0-9]+/
Package: rmbmix
Title: RMassBank Mixtures Workflow
Version: 0.1
Author: Todor Kondi<U+0107>
Maintainer: Todor Kondi<U+0107> <todor.kondic@uni.lu>
Package: shinyscreen
Title: Pre-screening of Mass Spectrometry Data
Version: 1.3.21
Author: Todor Kondić
Maintainer: Todor Kondić <todor.kondic@uni.lu>
Authors@R:
person(given = "Todor",
family = "Kondi<U+0107>",
role = c("aut", "cre"),
email = "todor.kondic@uni.lu",
comment = c(ORCID = "https://orcid.org/0000-0001-6662-4375"))
Description: Perform RMassBank workflow on (possibly) multiple
compound mixture mzML data-files.
c(person(given = "Todor",
family = "Kondić",
role = c("aut", "cre"),
email = "todor.kondic@uni.lu",
comment = c(ORCID = "https://orcid.org/0000-0001-6662-4375")),
person(given = "Anjana",
family = "Elapavalore",
role = c("ctb"),
email = "anjana.elapavalore@uni.lu"),
person(given = "Jessy",
family = "Krier",
role = c("ctb"),
email = "jessy.krier@uni.lu"),
person(given = "Adelene",
family = "Lai",
email = "adelene.lai@uni.lu",
role = c("ctb")),
person(given = "Mira",
family = "Narayanan",
role = c("ctb"),
email = "mira.narayanan@uni.lu"),
person(given = "Hiba Mohammed",
family = "Taha",
role = c("ctb"),
email = "hiba.mohammed-taha@uni.lu"),
person(given = "Marc",
family = "Warmoes",
role = c("ctb"),
email = "marc.warmoes@uni.lu"),
person(given = "Emma",
family = "Schymanski",
email = "emma.schymanski@uni.lu",
role = c("ctb")))
Description: Pre-screening of Mass Spectrometry Data.
License: Apache License (>= 2.0)
Encoding: UTF-8
LazyData: true
RoxygenNote: 6.1.1
RoxygenNote: 7.2.3
Roxygen: list(markdown = TRUE)
Collate:
'base.R'
'resources.R'
'inputs_metfrag.R'
'errors.R'
'mix.R'
'run.R'
Imports:
'envopts.R'
'data-model.R'
'state.R'
'metfrag.R'
'plotting.R'
'extraction.R'
'api.R'
'shiny-state.R'
'shiny-ui-base.R'
Depends:
R (>= 4.0.0),
RMassBank,
parallel,
yaml,
RChemMass,
mzR,
RColorBrewer
MSnbase
Imports:
assertthat,
cowplot,
curl,
data.table,
DT,
future,
ggplot2,
grid,
htmltools,
processx,
promises,
RColorBrewer,
readxl,
rmarkdown,
scales,
shiny,
tools,
withr,
yaml
Suggests:
testthat (>= 3.0.0)
Config/testthat/edition: 3
---
title: "shinyscreen Docker Documentation"
author: "Anjana Elapavalore"
date: '`r Sys.Date()`'
output: pdf_document
---
The objective of this documentation is to provide a detailed procedure for running the shinyScreen application within a Docker container. shinyScreen is a web-based tool developed for visualizing and analyzing high-throughput screening data using Shiny, a web application framework for R. Docker is employed to containerize the application, ensuring reproducibility and simplifying the deployment process by packaging the application with all its dependencies.
This document provides step-by-step instructions on how to install Docker, pull the ShinyScreen Docker image, and run the application.
## Prerequisites
Before proceeding, the following prerequisites must be met:
- Docker Desktop must be installed and properly configured on the system.
- Sufficient system resources, including at least 4 GB of RAM, are recommended for optimal performance.
## Step 1: Install Docker Desktop on Windows
If Docker Desktop is not already installed on your system, follow these steps to set it up:
1. **Download Docker Desktop**:
- Visit the [Docker Desktop for Windows](https://docs.docker.com/desktop/install/windows-install/) download page and download the installer.
2. **Install Docker Desktop**:
- Run the downloaded installer.
- Follow the installation prompts, and make sure the option to "Use the WSL 2-based engine" is selected during setup (if available).
3. **Verify Docker Installation**:
- After installation, launch Docker Desktop.
- Ensure Docker is running by checking the Docker icon in the system tray (it should be active).
- Open a command prompt (CMD) or PowerShell and type the following command to check the Docker version:
```bash
docker --version
```
This should return the installed Docker version.
## Step 2: Search for the ShinyScreen Docker Image in Docker Desktop
Docker Desktop provides a graphical interface to search for Docker images. To search and pull the **shinyScreen** Docker image:
1. **Open Docker Desktop**:
- Launch Docker Desktop from the Windows Start menu or by clicking the Docker icon in the system tray.
2. **Search for the ShinyScreen Image**:
- In Docker Desktop, navigate to the **search** tab as shown in the image below.
**Docker Image : anjanae/shinyscreen:v21_latest**
```{r, echo=FALSE, out.width="70%"}
knitr::include_graphics("images/searchtab.png")
```
3. **Pull the Image**:
- Click on the **Pull** button next to the desired ShinyScreen image. This will download the image to your local system.
You can monitor the download progress in Docker Desktop’s **Images** tab.
## Step 3: Run the ShinyScreen Docker Container
Once the ShinyScreen image has been pulled successfully, the next step is to run the container. This can be done directly from Docker Desktop or via the command line:
### Running the Container via Docker Desktop:
1. **Navigate to the Images Tab**:
- In Docker Desktop, go to the **Images** tab where your downloaded ShinyScreen image will be listed.
2. **Launch the Container**:
- Click the **Run** button next to the ShinyScreen image.
- In the pop-up dialog, ensure that port `3838` is exposed. To do this, add the following port configuration in the **Optional Settings**:
```
Host Port: 3838
Container Port: 3838
```
3. **To create the volume mounts**:
- To share files between the host and the container, you can use Docker’s volume mounting feature.
### local Directory Structure
Before launching the Docker container, the following directory structure should be set up on your local system. These directories will be mounted to specific locations inside the container, enabling the application to access the required input data and store the output files.
### Root Directory
The **root directory** is the top-level folder where the entire project is organized. This root directory will contain the following subdirectories:
1. **Project Directory**
2. **Data Directory**
3. **MetFrag Databases Directory**
Below is an overview of each subdirectory and its purpose.
### Project Directory
The **Project Directory** should initially contain only the **compound list** file, which is an essential input for the ShinyScreen application.
After the application runs, this directory will also store various output files generated during the analysis.
### Data Directory
The **Data Directory** should contain only the **mzML files**. These files serve as the primary data input for the ShinyScreen application and represent mass spectrometry data in mzML format.
### MetFrag Databases Directory
The **MetFrag Databases Directory** must include the latest **PubChemLite_Exposomics.csv** file, which will be used by the ShinyScreen application for MetFrag analysis.
### Configure Volume Mounts in Docker Desktop
Now that the local directories are set up, you can configure **volume mounts** in Docker Desktop to map these local directories to the corresponding paths inside the Docker container.
For the volume mounts, each directory on the host machine (local system) will be mapped to a specific location within the Docker container, allowing the application to access the input data and save output files.
In the **Optional Settings** pop-up window, you will configure the following **three volume mounts** (Please refer to the table and image below):
- **Local Directory on Windows**:
- This is the folder on your local machine where the data will reside.
- **Container Directory**:
- This is the path inside the Docker container where the files will be mounted.
| Local Directory (Host) | Container Directory (Mount) | Description |
| ------------------------------ | -------------------------------------- | ------------------------------- |
| `C:\path\to\rootdirectory` | `/home/ssuser/projects` | Directory for project files. |
| `C:\path\to\rootdirectory` | `/home/ssuser/top_data_dir` | Directory for top-level data. |
| `C:\path\to\metfrag_dbs` | `/home/ssuser/metfrag_dbs` | Directory for MetFrag databases. |
```{r, echo=FALSE, out.width="70%"}
knitr::include_graphics("images/runtab.png")
```
By setting up the volume mounts, any changes made to files in the mapped directories on the host machine will automatically reflect inside the container.
Once the **port** and **volume configurations** are confirmed, click **Run** to start the container. The ShinyScreen application will now be running inside the container.
Now click on the port mapping to access the application in the web browser
```{r, echo=FALSE, out.width="85%"}
knitr::include_graphics("images/containertab.png")
```
File added
Docker_Documentation/images/containertab.png

198 KiB

Docker_Documentation/images/runtab.png

97.9 KiB

Docker_Documentation/images/searchtab.png

141 KiB

FROM gitlab.lcsb.uni.lu:4567/eci/shinyscreen/dep/ssuser:latest
MAINTAINER todor.kondic@uni.lu
EXPOSE 3838
ENV SS_MF_DB="PubChemLite_exposomics.csv"
ENV SS_CPU 2
ADD . shinyscreen/
RUN R CMD build shinyscreen
USER root
RUN R CMD INSTALL shinyscreen
USER ssuser
RUN cp shinyscreen/runme /home/ssuser/runme
RUN chmod u+x /home/ssuser/runme
# RUN chown ssuser /home/ssuser/runme
# RUN chown -R ssuser /home/ssuser/shinyscreen
RUN R -e 'library(shinyscreen);setwd("~");init(top_data_dir="~/top_data_dir",projects="~/projects",users_dir="~/users",metfrag_db_dir=Sys.getenv("SS_MF_DB_DIR"),metfrag_jar="/usr/local/bin/MetFragCommandLine.jar",no_structure_plots=T,save=T,merge=F)'
ENTRYPOINT ["/home/ssuser/runme"]
CMD ["app"]
......@@ -179,7 +179,7 @@ recommend that a file or class name and description of purpose be included on
the same “printed page” as the copyright notice for easier identification within
third-party archives.
Copyright 2019 Todor Kondić
Copyright 2020,2021 University of Luxembourg
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
......
# Generated by roxygen2: do not edit by hand
export(mb.do)
export(mb.prep)
export(presc.do)
export(presc.p)
export(presc.plot)
export(presc.single)
export(presc.v)
export(sw.do)
export(app)
export(concurrency)
export(conf_trans)
export(create_plots)
export(create_stub_gui)
export(extr_data)
export(get_fn_comp)
export(get_fn_conf)
export(get_fn_extr)
export(get_fn_ftab)
export(get_fn_summ)
export(get_rt_interval)
export(get_val_unit)
export(grab_unit)
export(import_project)
export(init)
export(list2rev)
export(load_compound_input)
export(load_data_input)
export(load_inputs)
export(merge2rev)
export(mk_comp_tab)
export(mk_tol_funcs)
export(mz_input)
export(new_empty_project)
export(new_project)
export(new_runtime_state)
export(new_rv_state)
export(new_state)
export(pack_app_state)
export(plot_struct)
export(plot_struct_nowrap)
export(prescreen)
export(r2datatab)
export(read_rt)
export(report)
export(rev2list)
export(rt_input)
export(run)
export(run_in_dir)
export(serve)
export(setup_phase)
export(sort_spectra)
export(subset_summary)
export(tk_save_file)
import(data.table)
importFrom(promises,"%...>%")
importFrom(promises,future_promise)
importFrom(shiny,HTML)
importFrom(shiny,numericInput)
importFrom(shiny,selectInput)
importFrom(shiny,textInput)
importFrom(shiny,validate)
This diff is collapsed.
## Copyright (C) 2020,2021 by University of Luxembourg
## Licensed under the Apache License, Version 2.0 (the "License");
## you may not use this file except in compliance with the License.
## You may obtain a copy of the License at
## http://www.apache.org/licenses/LICENSE-2.0
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
##' @import data.table
## Redirections
the_ifelse <- data.table::fifelse
dtable <- data.table::data.table
norm_path <- function(...) {
test = nchar(...) > 0L
res = character(length(test))
res[test] = normalizePath(...[test],winslash='/')
res[!test] = ...[!test]
res
}
tab2file<-function(tab,file,...) {
data.table::fwrite(x=tab,file=file,...)
}
file2tab<-function(file,na.strings=c("","NA","\"\""),...) {
data.table::fread(file=file,na.strings = na.strings, ...)
}
isThingFile<-function(fn) {
if (length(fn)>0 && is.character(fn)) {
file.exists(fn)
} else F
}
## Stolen from Stack Overflow
split_path <- function(path) {
if (dirname(path) %in% c(".", path)) return(basename(path))
return(c(basename(path), split_path(dirname(path))))
}
print_table <- function (df) {
paste(apply(df,1,function (row) paste(row,collapse=',')),collapse = "\n")
}
assert <- function(expr,msg) shiny::validate(shiny::need(expr,message=msg))
gen_uniq_lab <- function(prev,pref='',suff='') {
l <- length(prev)
gen <- function() paste0(pref,as.integer(runif(1,min=l,max=2L*l+1L)),suff)
cand <- gen()
while (cand %in% prev) cand <- gen()
c(prev,cand)
}
yesno2log <- function(yesno) {
yes <- which(yesno==SYM_YES)
no <- which(yesno==SYM_NO)
res <- logical(length(yesno))
res[yes] <- T
res[no] <- F
res[!((1:length(res)) %in% c(yes,no))]<-NA
res
}
log2yesno <- function (log) {
wna <- log[is.na(log)]
wyes <- which(log)
wno <- !((1:length(log)) %in% c(wna,wyes))
res <- factor(character(length(log)),levels = c(SYM_YES,SYM_NO,"NA"))
res[wyes] <- SYM_YES
res[wno] <- SYM_NO
res[wna] <- "NA"
res
}
## TODO: Remove calls to this once the glitch with prefiltering in
## datatables is fixed.
fixlog2yesno <- function(log) {
as.character(log2yesno(log))
}
##' @export
get_val_unit <- function(entry) {
cntnt <- strsplit(entry,split = "[[:space:]]+")[[1]]
cntnt <- cntnt[nchar(cntnt) > 0]
if (length(cntnt)!=2) stop("(upd_unit) ","Unable to interpret ", entry)
val <- cntnt[[1]]
unit <- cntnt[[2]]
c(val=val,unit=unit)
}
write_keyval_file <- function(namedl,fname) {
con = file(fname,open="w")
for (n in names(namedl)) {
cat(file=con,
paste0(n," = ",
namedl[[n]]),
sep = "\n",
append = T)
}
close(con)
}
gen_1d_name <- function(kval) {
nms = names(kval)
chunks = sapply(nms,function(x) paste0(x,kval[[x]]))
paste0(chunks,collapse="_")
}
gen_1d_keytab <- function(dt) {
dkey = data.table::key(dt)
s = dt[,.(key1d=""),by=dkey]
nms = sapply(dkey, as.name,simplify=F,USE.NAMES=F)
ex=bquote(paste(paste0(.(dkey),c(..(nms))),collapse="_"),splice=T)
eval(bquote(s[,`:=`(key1d=.(ex)),by=key(s)]))
}
gen_fname_slug <- function(fname) {
## Generates a name with blanks replaced with underscores and
## extensions removed.
## Drop path.
name = basename(fname)
## Remove extension if any.
name = gsub(r"(\.[^.]*$)","",name)
## Spaces into underscores.
name = gsub("[[:blank:]]+","_",name)
## Reduce the number of underscores.
name = gsub(r"(_+)","_",name)
name
}
uniqy_slugs <- function(slugs) {
dt = data.table::data.table(slug=slugs)
dt[,slug:=fifelse(rep(.N==1L,.N),slug,paste0(slug,"_",seq(1L,.N))),by="slug"]$slug
}
gen_val_unc <- function(x,dx) {
## Doesn't work well for <=0.
p = floor(log10(x))
dp = floor(log10(dx))
## Zero?
message("p ",p)
w = which(is.infinite(p))
p[w] = 0
## Normalise x and dx.
main = x/10**p
unc = round(dx/10**dp,0)
place = p - dp
main = mapply(function (m,d) formatC(m,digits=d,format='f',flag="#"),main,place,USE.NAMES=F)
w = which(main=='10.')
main[w]='1'
p[w]=p[w]+1
paste0(main,"(",unc,") x 10^",p)
}
#Copyright (C) 2023 by University of Luxembourg
## Shinyscreen works of an internal relational database implemented
## using `data.table' package. Implementation is here.
make_db_catalogue <- function(m) {
## Takes comprehensive database from state `m' and generates a
## catalogue with a unique key. This catalogue is based on
## inputs. Each entry in the catalogue corresponds to a single
## target mass from a single experimental run.
res = m$out$tab$comp[,unique(.SD),.SDcols=c("set","tag","adduct","ID")]
res[,catid:=.I]
setkeyv(res,DB_CATALOGUE_KEY)
setindex(res,catid)
m$db$cat = res
m
}
merge_precid_4_isobars <- function(orig_precids,masses,up_masses) {
start = head(orig_precids,1L)
n = length(orig_precids)
precid = orig_precids
i = 1L
while (i < n) {
theprecid = orig_precids[[i]]
themz = masses[[i]]
mzup = up_masses[[i]]
w = which(masses[(i+1L):n]<mzup)
precid[(i+1L):n][w] = theprecid
i = i + length(w) + 1L
}
precid
}
make_db_precursors <- function(m) {
## Generate masses and label isobars.
cat = m$db$cat
masses = m$out$tab$comp[cat,.(tag=tag,catid=catid,mz=mz,rt=rt),on=key(cat)]
setkey(masses,tag,mz)
## Retention time.
tmp = get_val_unit(m$conf$tolerance[['rt']])
rttol = as.numeric(tmp[['val']])
rtunit = tmp[['unit']]
if (rtunit == "s") {
rttol = rttol/60.
} else if (rtunit != "min") {
stop('make_db_precursors: Unknown retention time unit.')
}
masses[!is.na(rt),`:=`(rt_min=rt-rttol,rt_max=rt+rttol)]
## Fine error.
tmp = get_val_unit(m$conf$tolerance[['ms1 fine']])
ms1tol = as.numeric(tmp[['val']])
ms1unit = tmp[['unit']]
if (ms1unit == "ppm") {
masses[,`:=`(mz_fine_min=mz-ms1tol*mz*1e-6,mz_fine_max=mz+ms1tol*mz*1e-6)]
} else if (ms1unit == "Da") {
masses[,`:=`(mz_fine_min=mz-ms1tol,mz_fine_max=mz+ms1tol)]
} else {
stop('make_db_precursors: Unknown mass unit (fine).')
}
## Coarse error.
tmp = get_val_unit(m$conf$tolerance[['ms1 coarse']])
ms1tol = as.numeric(tmp[['val']])
ms1unit = tmp[['unit']]
if (ms1unit == "ppm") {
masses[,`:=`(mz_coarse_min=mz-ms1tol*mz*1e-6,mz_coarse_max=mz+ms1tol*mz*1e-6)]
} else if (ms1unit == "Da") {
masses[,`:=`(mz_coarse_min=mz-ms1tol,mz_coarse_max=mz+ms1tol)]
} else {
stop('make_db_precursors: Unknown mass unit (coarse).')
}
## Assign "fine" isobars to same isocoarse number.
masses[,precid:=merge_precid_4_isobars(catid,mz,mz_fine_max),by="tag"]
## Assign "coarse" isobars to same isocoarse number.
masses[,isocoarse:=merge_precid_4_isobars(catid,mz,mz_coarse_max),by="tag"]
masses[,`:=`(iso_coarse_min=min(mz_coarse_min),
iso_coarse_max=max(mz_coarse_max)),
by=isocoarse]
masses[,`:=`(iso_fine_min=min(mz_fine_min),
iso_fine_max=max(mz_fine_max)),
by=precid]
setindex(masses,isocoarse,precid)
## Add files.
filetab = m$input$tab$mzml[m$db$cat,
.(catid=i.catid,file=file),
on=c("set","tag"),nomatch=NULL]
masses[filetab,file:=i.file,on="catid"]
m$db$precursors = masses
m
}
empty_cgram_ms1 <- function(n=0L) {
r = data.table(file=character(n),
cgmidx=integer(n),
precid=integer(n),
scan=integer(n),
rt=numeric(n),
intensity=numeric(n))
setkey(r,precid,rt)
r
}
empty_cgram_ms2 <- function(n=0L) {
r = data.table(precid=integer(n),
ce=numeric(n),
scan=character(n),
idx=integer(n),
rt=numeric(n),
intensity=numeric(n))
setkey(r,precid,ce,idx)
r
}
empty_spectra_table <- function(n=0L) {
r = data.table(precid=integer(n),
scan=character(n),
mz=numeric(n),
intensity=numeric(n))
setkey(r,precid,scan)
r
}
summ_needs_from_cat <- function(cat) {
## Catalogue columns.
cat
}
summ_needs_from_precursors <- function(res,precursors) {
## Mass columns.
precursors[res,on=.(catid),.(precid,
mz,
set,
adduct,
tag,
ID,
mz_l=mz_fine_min,
mz_r=mz_fine_max),by=.EACHI]
}
summ_needs_from_qa <- function(res,qa) {
needs = qa[,.SD,.SDcols=c("precid",
"ce",
"scan",
"ms1_rt",
"ms1_int",
"ms2_rt",
"ms2_int",
"ms1_mean",
"ms2_sel",
"qa_pass",
"qa_ms1_exists",
"qa_ms2_exists",
"qa_ms1_good_int",
"qa_ms1_above_noise",
"qa_ms2_near",
"qa_ms2_good_int",
"qlt_ms1",
"qlt_ms2")]
res = needs[res,on=.(precid),allow.cartesian=T]
## TODO: additional processing?
res
}
summ_needs_from_comp <- function(res,comp) {
needs = comp[,.(set,ID,Name,SMILES)]
setkey(needs,set,ID)
res[needs,on=.(set,ID),`:=`(Name=i.Name,
SMILES=i.SMILES)]
}
## This function creates `summ' table.
gen_summ <- function(db,qa,comp) {
## Start with the basic things.
res = summ_needs_from_cat(db$cat)
## Add masses and precids.
res = summ_needs_from_precursors(res,db$precursors)
## Add qa columns.
res = summ_needs_from_qa(res,qa)
setkeyv(res,SUMM_KEY)
## Add comp columns.
summ_needs_from_comp(res,comp)
}
## Copyright (C) 2020,2021,2023 by University of Luxembourg
## Licensed under the Apache License, Version 2.0 (the "License");
## you may not use this file except in compliance with the License.
## You may obtain a copy of the License at
## http://www.apache.org/licenses/LICENSE-2.0
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
## Description
##
## This file contains functions which manipulate per-system and
## per-run parameters. For example, paths that may change from
## computer to computer, such as the location of the MetFrag JAR file.
#' @title Create an Empty `envopts` Object
#' @details An `envopts` object is Shinyscreen way to store settings
#' related to a specific computing environment. Information such
#' as the run time path to a MetFrag JAR will vary from one to
#' another setup and we need to convey this to the `shinyscreen`
#' pipeline.
#'
#' @param projects `character(1)`, a directory which contains all
#' shinyscreen projects directories. A single project directory
#' contains input and output files.
#' @param top_data_dir `character(1)`, a directory which contains all
#' `data` directories. A single `data` directory contains `mzML`
#' spectrometry data files.
#' @param metfrag_db_dir `character(1)`, a path to the directory which
#' contains MetFrag databases.
#' @param metfrag_jar `character(1)`, a path to MetFrag JAR file.
#' @param users_dir `character(1)`, a location on the server side
#' containing individual user directories.
#' @param java_bin `character(1)`, a path to java runtime. If no path
#' has been supplied, we will try to detect it.
#' @param metfrag_max_proc `integer(1)`, maximum number of CPU cores
#' available for MetFrag. If no number has been supplied, we will
#' try to detect the number of logical CPUs and go with that.
#' @param no_structure_plots `logical(1)`, if T, structures will not
#' be plotted, even when it is possibile.
#' @return An `envopts` object.
#' @author Todor Kondić
empty_envopts <- function(projects=NULL,
top_data_dir=NULL,
users_dir=NULL,
metfrag_db_dir=NULL,
metfrag_jar=NULL,
java_bin=NULL,
metfrag_max_proc=NULL,
no_structure_plots=NULL) {
## Creates an empty `envopts' object. Works in conjunction with
## shinyscreen::init().
res = list(projects=projects,
top_data_dir=top_data_dir,
users_dir=users_dir,
metfrag=list(db_dir=metfrag_db_dir,
jar=metfrag_jar,
java_bin=java_bin,
max_proc=metfrag_max_proc),
no_structure_plots=no_structure_plots)
class(res) = c("envopts","list") #Just to officially make it an
#object.
res
}
seal_envopts <- function(o) {
## Assign defaults to `envopts'. Works in conjunction with
## `shinyscreen::init()'.
zero_str = c("projects","top_data_dir","users_dir")
for (z in zero_str) {
if (is.null(o[[z]])) o[[z]]=""
}
if (is.null(o$metfrag$db_dir)) o$metfrag$db_dir=""
if (is.null(o$metfrag$java_bin)) o$metfrag$java_bin=Sys.which("java")
if (is.null(o$metfrag$jar)) o$metfrag$jar = ""
if (is.null(o$metfrag$max_proc)) o$metfrag$max_proc = parallel::detectCores()
if (is.null(o$no_structure_plots)) o$no_structure_plots = F
check_dir_absent(o$projects,what="projects-dir")
o$projects = norm_path(o$projects)
check_dir_absent(o$top_data_dir,what="top-data-dir")
o$top_data_dir=norm_path(o$top_data_dir)
check_dir_absent(o$users_dir,what="users-dir")
o$users_dir=norm_path(o$users_dir)
check_dir_absent(o$metfrag$db_dir,what="mf-db-dir")
o$metfrag$db_dir = norm_path(o$metfrag$db_dir)
check_file_absent(o$metfrag$jar,what="mf-jar")
if (nchar(o$metfrag$jar)>0) o$metfrag$jar = norm_path(o$metfrag$jar)
check_not_integer(value=o$metfrag$max_proc,
what="metfrag-max-proc")
if (nchar(o$metfrag$jar)>0L) {
check_file_absent(o$metfrag$java_bin,"java-bin")
}
check_not_logical(value=o$no_structure_plots,
what="no-structure-plots")
o
}
is_metfrag_available <- function(e) {
nchar(e$metfrag$jar)>0L
}
is_metfrag_local_available <- function(e) {
is_metfrag_available(e) && nchar(e$metfrag$db_dir)>0L
}
get_envopts_fn <- function(dir=tools::R_user_dir(package="shinyscreen",
which="config")) {
file.path(dir,FN_ENVOPTS)
}
load_envopts <- function(dir=tools::R_user_dir(package="shinyscreen",
which="config")) {
cfgfile = get_envopts_fn(dir=dir)
if (file.exists(cfgfile)) readRDS(cfgfile) else empty_envopts()
}
save_envopts <- function(o,dir=tools::R_user_dir(package="shinyscreen",
which="config")) {
cfgfile = get_envopts_fn(dir=dir)
dr = dirname(cfgfile)
dir.create(path = dr, showWarnings = F, recursive=T)
saveRDS(o,cfgfile)
}
## Copyright (C) 2020,2021,2023 by University of Luxembourg
## Licensed under the Apache License, Version 2.0 (the "License");
## you may not use this file except in compliance with the License.
## You may obtain a copy of the License at
## http://www.apache.org/licenses/LICENSE-2.0
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
errc_conf_file_absent <- errorCondition("There is no config file in the project directory.",class="conf-file-absent")
check_notastring <- function(value, what, strict=F) {
cond = !is.character(value)
msg = paste0("The value (",value,") of, ",what," is not a character vector (or, it is, maybe, a missing value).")
if (strict) {
if (is.null(value)) {
cond=T
msg = paste0("The value of ",what," is NULL.")
} else if (length(value)==0L) {
cond=T
msg = paste0("The variable ",what," is a zero-length object.")
} else if (is.na(value)) {
cond=T
msg = paste0("The value of ",what," is NA.")
} else if (nchar(value)==0L) {
cond=T
msg = paste0("The size of character variable ",what," is zero.")
}
}
if (cond) stop(errorCondition(msg,class=paste0(what,'-notastring')))
}
check_dir_absent <- function(dir,what,strict=F) {
check_notastring(dir,what,strict=strict)
cond = !dir.exists(dir)
if (!strict) cond = cond && nchar(dir)>0L
if (cond) stop(errorCondition(paste0("The ", what, " directory --- ", dir, "--- does not exist, or cannot be found."), class=paste0(what,'-absent')))
}
check_dir_absent_nz <- function(dir,what) {
check_notastring(dir,what)
if (nchar(dir)>0L) {
check_dir_absent(dir,what)
}
}
check_file_absent <- function(file,what) {
check_notastring(file,what)
if (nchar(file)>0L && !file.exists(file)) stop(errorCondition(paste0("The ", what, " file --- ", file, "--- does not exist, or cannot be found."), class=paste0(what,'-absent')))
}
check_file_absent_nz <- function(file,what) {
check_notastring(file,what)
if (nchar(file)>0L) {
check_file_absent(file,what)
}
}
check_not_one <- function(value,what) {
if (length(value)!=1L) stop(errorCondition(paste0("Size of", what, " is not one."), class=paste0(what,'-not-one')))
}
check_extension <- function(extfileval,what) {
check_notastring(extfileval[[1]],what = what)
if (extfileval[[1]]==extfileval[[2]]) stop(errorCondition(paste0("We could not find the extension for ",what, ". The returned value was: ", extfileval[[2]]),
class = paste0(what,'-no-ext-found')))
}
check_not_integer <- function(value,what) {
if (!is.integer(value)) stop(errorCondition(paste0("The value (",value,") of `", what,"' must be an integer."), class = paste0(what,'-not-an-int')))
}
check_not_logical <- function(value,what) {
if (!is.logical(value)) stop(errorCondition(paste0("The value (",value,") of `", what,"' must be logical."), class = paste0(what,'-not-a-logical')))
}
check_key_absent <- function(keys,l,what) {
nms = names(l)
keys_in = keys %in% l
keys_absent = keys[!keys_in]
hv = if (length(keys_absent)>1L) "have" else "has"
if (length(keys_absent)>0L) stop(errorCondition(paste0("Keys [",paste0(keys_absent,collapse=', '), "] ",hv," not been found for ", what),
class = paste0(what,'-absent')))
}
check_conf_absent <- function(cfgfile) {
check_notastring(cfgfile,"envopts")
if (!file.exists(cfgfile)) stop(errorCondition("The system configuration file does not exist. Please initialise shinyscreen by calling `shinyscreen::init' function.", class="envopts-file-absent"))
}
check_len_zero <- function(value,what) {
if (length(value)==0L) stop(errorCondition(paste0("The length of variable ",what," is zero."),class=paste0(what,"-len-zero")))
}
check_same_len <- function(value1,what1,value2,what2) {
if (length(value1)!=length(value2)) {
stop(errorCondition(paste0("The lengths of variables ", what1, " and ", what2," are not the same."), class=paste0(what1,"-",what2,"-lens-not-equal")))
}
}
## Copyright (C) 2020,2021 by University of Luxembourg
## Licensed under the Apache License, Version 2.0 (the "License");
## you may not use this file except in compliance with the License.
## You may obtain a copy of the License at
## http://www.apache.org/licenses/LICENSE-2.0
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
create_fine_table <- function(m) {
## Select fine mz-ranges and split them into those with rt entries
## and those without.
precs = m$db$precursors
precs[,unique(.SD),.SDcols=c("iso_fine_min",
"iso_fine_max",
"rt_min",
"rt_max",
"file"),
keyby=c("file","precid")]
}
create_coarse_table <- function(m) {
## Select coarse mz-ranges and split them into those with rt entries
## and those without.
precs = m$db$precursors
precs[,unique(.SD),.SDcols=c("iso_coarse_min",
"iso_coarse_max",
"rt_min",
"rt_max",
"file"),
keyby=c("file","isocoarse","precid")]
}
read_data_file <- function(file) {
MSnbase::readMSData(file=file,msLevel=c(1,2),mode="onDisk")
}
extr_cgrams_ms1 <- function(ms,tab,fdata) {
## Some helpers.
new_restab <- function(intab,cgm) {
base = intab[,.(precid=precid,cgmidx=.I)]
cgm = base[,{
rt = rtime(cgm[cgmidx,1])
inte = intensity(cgm[cgmidx,1])
.(precid=precid,
rt = rt,
intensity = inte,
scan = names(rt))},
by="cgmidx"]
setkey(cgm,scan)
cgm[fdata$ms1,idx:=i.idx,on="scan"]
cgm
}
trt = tab[!is.na(rt_min)]
tnort = tab[is.na(rt_min)]
resrt = if (nrow(trt)>0L) {
## Call with rt argument (in seconds).
mzrng = as.matrix(trt[,.(iso_fine_min,iso_fine_max)])
rtrng = as.matrix(trt[,.(rt_min*60,rt_max*60)])
new_restab(trt,MSnbase::chromatogram(ms,mz = mzrng, rt = rtrng))
} else data.table()
resnort = if (nrow(tnort)>0L) {
mzrng = as.matrix(tnort[,.(iso_fine_min,iso_fine_max)])
new_restab(tnort,MSnbase::chromatogram(ms,mz = mzrng))
} else data.table()
res = rbind(resnort,resrt,fill=T)
res[,rt:=rt/60]
}
get_fdata <- function(ms) {
fdata = as.data.table(fData(ms),keep.rownames="scan")
setkey(fdata,scan)
res = list()
res$ms1 = fdata[msLevel==1L,.(scan,
idx=spIdx)]
res$ms2 = fdata[msLevel==2L,.(scan,
idx=spIdx,
an=acquisitionNum,
rt=retentionTime/60.,
intensity=basePeakIntensity,
ce=collisionEnergy,
prec_mz=precursorMZ,
prec_idx=precursorScanNum)]
res
}
relate_ms2_to_precid <- function(coarse,ms2,cgram_ms1) {
## Take `coarse' table (the one with coarse mass limits), ms2
## fData and ms1 chromatogram, then relate precids from cgram_ms1
## to ms2 data.
## Select those MS2 entries the parents of which coarsely match
## compound lists masses.
res = ms2[coarse,on=.(prec_mz>iso_coarse_min,prec_mz<iso_coarse_max),.(prec_mz=x.prec_mz,precid,prec_idx,scan,idx,ce,rt,intensity),nomatch=NULL]
setkey(res,precid,prec_idx)
## Now, make sure those coarsely matched MS2 actually have a
## parent that finely matches something in the chromatogram (and
## this is by ensuring that a `precid' with the correct scan (idx)
## shows up in the chromatogram.
x = cgram_ms1[!is.na(intensity)]
x[res,on=.(precid,idx==prec_idx),
.(precid,ce,scan=i.scan,
idx=i.idx,rt=i.rt,
intensity=i.intensity),nomatch=NULL]
}
extract_spectra <- function(ms,cgram_ms2) {
## This will extract full MS2 spectra based on ms2 chromatogram entries.
indices = cgram_ms2[,.SD,.SDcol=c("precid","scan","idx")]
res = empty_spectra_table()
selind = indices[,unique(.SD),.SDcol=c("scan","idx")]
sel = ms[selind$idx]
masses = mz(sel)
intensities = intensity(sel)
res = selind
setkey(res,scan)
res = res[,data.table(mz=masses[[scan]],
intensity=intensities[[scan]]),
keyby=c("scan")]
res[indices,on=.(scan),precid:=i.precid]
}
## PRESCREENING
## This function extracts intensity maxima on intervals given by
## RT vectors rt_1 and rt_2.
find_ms1_max <- function(rt,intensity,rt_1,rt_2)
{
x = mapply(function (rt_1,rt_2) {
rt_ival <- c(rt_1,rt_2)
intv <- findInterval(rt,rt_ival)
lintv = length(intv)
if (intv[1]==0L && intv[lintv] == 2L) {
pos = match(c(1L,2L),intv)
} else if (intv[1]==1L && intv[lintv]!=1L) {
pos = c(1L,match(2L,intv))
} else if (intv[1]==0L && intv[lintv]!=0L) {
pos = c(match(1L,intv),lintv)
} else {
pos = c(1L,lintv)
}
pmax = pos[[1]] + which.max(intensity[pos[[1]]:pos[[2]]]) - 1L
if (length(pmax)==0L) pmax = pos[[1]]
c(rt[pmax],intensity[pmax])
}, rt_1, rt_2, USE.NAMES=F)
x
}
analyse_extracted_data <- function(db,prescreen_param) {
## Note
##
## I am working on this two days before the group meeting. The
## point of a meeting is to have something to show, so I will just
## minimally adapt the old `analyse_extracted_data' to the new
## `db' entries in the state. I suspect, even this is not going to
## be very easy.
##
## If no meeting was happening, then I'd create a nice, sleek
## function that fully adheres to the new `data model'
## philosophy. Alas, ...
ms1 = db$extr$cgm$ms1
ms2 = db$extr$cgm$ms2
spectra = db$extr$spectra
precursors = db$precursors
## Get file info.
ms2_noise_table = precursors[spectra,.(file,intensity),on="precid",by=.EACHI,nomatch=NULL]
## Calculate threshold.
ms2_noise_table[,threshold:=0.33333333*mean(intensity),by="file"]
## Reduce table.
ms2_noise_table = ms2_noise_table[,.(threshold=first(threshold,1L)),keyby="precid"]
## Parameters.
presconf = conf_trans_pres(prescreen_param)
rt_shift = presconf$ret_time_shift_tol
det_ms2_noise = presconf$det_ms2_noise
ms2_int_thresh = presconf$ms2_int_thresh
ms1_int_thresh = presconf$ms1_int_thresh
## We start populating the ms2 qa table.
tab_ms2 = copy(ms2)
## Calculate noise.
tab_ms2[ms2_noise_table,qa_ms2_good_int:=intensity>threshold,on=.(precid)]
## Rename as downstream wants it.
setnames(tab_ms2,c("rt","intensity"),c("ms2_rt","ms2_int"))
tab_ms2[,`:=`(rt_left = ms2_rt - rt_shift,rt_right = ms2_rt + rt_shift)]
## Get mean ms1 value.
tab_ms1 = copy(ms1)
## To (artificially) differentiate beween ms1 and ms2 (because,
## they get stapled together later on, set scan to NA_character_.
tab_ms1[,scan:=NA_character_]
tab_ms1_mean = tab_ms1[,.(ms1_mean=mean(intensity,na.rm=T)),keyby="precid"]
## Perform MS1 maxima calculation in the neighbourhood of each
## MS2 result.
tmp = tab_ms1[tab_ms2,
{
xx = find_ms1_max(rt,intensity,i.rt_left,i.rt_right)
.(scan=i.scan,
ms1_rt = xx[1,],
ms1_int = xx[2,])
},on=.(precid),
by=.EACHI,
nomatch=NULL]
## Calculate QA values.
tab_ms2[tmp,on=.(precid,scan),c("ms1_rt","ms1_int"):=.(i.ms1_rt,i.ms1_int)]
tab_ms2[,c("rt_left","rt_right"):=c(NULL,NULL)]
tab_ms2[tab_ms1_mean,ms1_mean:=i.ms1_mean]
tab_ms2[,`:=`(qa_ms1_good_int=fifelse(ms1_int>ms1_int_thresh,T,F),
qa_ms1_above_noise=F,
qa_ms2_near=F)]
## TODO: I wonder if so stupidly auto-calculated ms1 noise should
## be taken into account at all? My recollection from earlier
## times was that it was helpful, at least sometimes.
tab_ms2[qa_ms1_good_int==T,qa_ms1_above_noise:=fifelse(ms1_int>ms1_mean/3.,T,F)]
tab_ms2[qa_ms1_good_int==T & qa_ms1_above_noise==T & qa_ms2_good_int==T,qa_ms2_near:=T]
tab_ms2$qa_ms2_exists=T
## Reduce tab_ms1 to only to MS1 with no children.
precs_ms1 = tab_ms1[,unique(precid)]
precs_ms2 = tab_ms2[,unique(precid)]
precs_noms = data.table(precid=precs_ms1[!(precs_ms1 %in% precs_ms2)])
tab_noms2 = tab_ms1[precs_noms,
on=.(precid),
.SD,nomatch=NULL]
tab_noms2 = tab_noms2[,.(ms1_mean=mean(intensity,na.rm=T),
ms1_rt=rt[which.max(intensity)],
ms1_int=max(intensity, na.rm=T)),
keyby="precid"]
## QA for the above (lazy qa ... take only the max peak into account).
tab_noms2[,c("qa_ms1_good_int","qa_ms1_above_noise"):=.(ms1_int>ms1_int_thresh,ms1_int>ms1_mean/3.)]
## MS2 QA criteria all fail.
tab_noms2[,c("qa_ms2_exists","qa_ms2_good_int","qa_ms2_near"):=.(F,F,F)]
## Bind MS1-only and MS1/MS2 entries together.
res = rbind(tab_ms2,tab_noms2,fill=T,use.names=T)
## If ms1_int has been calculated as a Na(N) value, this means
## that no MS1 has been found for that precid.
res[,qa_ms1_exists:=F]
res[!is.na(ms1_int),qa_ms1_exists:=T]
data.table::setkey(res,precid)
qflg = QA_FLAGS[!(QA_FLAGS %in% "qa_pass")]
res[,qa_pass:=apply(.SD,1,all),.SDcols=qflg]
res[.(T),del_rt:=abs(ms2_rt - ms1_rt),on="qa_pass",by='scan']
resby = BASE_KEY_MS2[! (BASE_KEY_MS2 %in% 'scan')]
res[.(T),qa_tmp_ms1_max:= ms1_int==max(ms1_int),on="qa_pass",by=resby]
res[,ms2_sel:=F]
res[.(T,T),ms2_sel:= del_rt == del_rt[which.min(del_rt)],on=c("qa_pass","qa_tmp_ms1_max"),by=resby]
res[,qlt_ms1:=apply(.SD,1,function(rw) sum(c(5L,3L,2L)*rw)),.SDcol=c("qa_ms1_exists",
"qa_ms1_above_noise",
"qa_ms1_good_int")]
res[,qlt_ms2:=apply(.SD,1,function(rw) sum(c(5L,3L,2L)*rw)),.SDcol=c("qa_ms2_exists",
"qa_ms2_near",
"qa_ms2_good_int")]
res[is.na(qlt_ms1),qlt_ms1:=0L]
res[is.na(qlt_ms2),qlt_ms2:=0L]
## Set all other flags to false when qa_ms1_exists == F by decree.
flgs = c(QA_FLAGS,"ms2_sel")
res[qa_ms1_exists == F,(flgs):=F]
res
}
mk_shiny_input <- function(name,fun,args) {
content = do.call(what=fun,args=c(list(inputId=name),args))
res = list(name=name,
fun=fun,
args=args,
content=content)
res
}
inject_inputs<- function(collection,which) {
do.call(shiny::tagList,lapply(which,function(nm) collection[[nm]]$content))
}
inputs_label <- function(collection,which) {
collection[[which]]$args$label
}
inputs_html_out <- function(collection, which, inline=T,...) {
txt = inputs_label(collection, which)
paste0('<code>',txt,'</code>')
}
INPUTS_METFRAG_NUMERIC = list(mk_shiny_input(name="mf_database_search_relative_mass_deviation",
fun="numericInput",
args=list(label="Database search relative mass deviation",
value=5)),
mk_shiny_input(name="mf_fragment_peak_match_absolute_mass_deviation",
fun="numericInput",
args=list(label="Fragment peak match absolute mass deviation",
value=METFRAG_DEFAULT_ABSMASSDEV)),
mk_shiny_input(name="mf_fragment_peak_match_relative_mass_deviation",
fun="numericInput",
args=list(label="Fragment peak match relative mass deviation",
value=METFRAG_DEFAULT_RELMASSDEV)),
mk_shiny_input(name="mf_maximum_tree_depth",
fun="numericInput",
args=list(label="MaximumTreeDepth",
value=METFRAG_DEFAULT_MAX_TREE_DEPTH)),
mk_shiny_input(name="mf_proc",
fun="numericInput",
args=list(label="Number of MetFrag Processes",
value=METFRAG_DEFAULT_PROC)))
INPUTS_METFRAG_SELECT_STANDARD = list(mk_shiny_input(name="mf_pre_processing_candidate_filter",
fun="selectInput",
args=list(label="Preprocessing candidate filter",
choices=shinyscreen:::METFRAG_PREPFLT_CHOICES,
selected=shinyscreen:::METFRAG_PREPFLT_DEFAULT,
multiple=T)),
mk_shiny_input(name="mf_post_processing_candidate_filter",
fun="selectInput",
args=list(label="Postprocessing candidate filter",
choices=shinyscreen:::METFRAG_POSTPFLT_CHOICES,
selected=shinyscreen:::METFRAG_POSTPFLT_DEFAULT,
multiple=T)),
mk_shiny_input(name="mf_metfrag_candidate_writer",
fun="selectInput",
args=list(label="MetFrag Candidate Writer",
choices=shinyscreen:::METFRAG_WRITER_CHOICES,
selected=shinyscreen:::METFRAG_DEFAULT_WRITER)),
mk_shiny_input(name="mf_database_type",
fun="selectInput",
args=list(label="Database type",
choices=METFRAG_DATABASE_TYPE,
selected=METFRAG_DEFAULT_DATABASE_TYPE)),
mk_shiny_input(name="mf_scores_intrinsic",
fun="selectInput",
args=list(label="Select Scoring Types",
choices = METFRAG_INTRINSIC_SCORES,
multiple = T,
selected = names(METFRAG_DEFAULT_SCORES))))
names(INPUTS_METFRAG_SELECT_STANDARD) = sapply(INPUTS_METFRAG_SELECT_STANDARD,function(x) x$name)
INPUTS_METFRAG_SELECT_LOCAL_DBCH = list(mk_shiny_input(name="mf_local_db_col_ident",
fun="selectInput",
args=list(label="Select Identifiers",
multiple = T,
choices=character(0))),
mk_shiny_input(name="mf_local_db_col_coll",
fun="selectInput",
args=list(label="Identifiers for sets of results",
multiple = T,
choices=character(0))),
mk_shiny_input(name="mf_local_db_col_scores",
fun="selectInput",
args=list(label="Select local scoring terms",
multiple = T,
choices=character(0))))
names(INPUTS_METFRAG_SELECT_LOCAL_DBCH) = sapply(INPUTS_METFRAG_SELECT_LOCAL_DBCH,function(x) x$name)
INPUTS_METFRAG_SELECT_LOCAL_OTHER = list(mk_shiny_input(name="mf_local_database",
fun="selectInput",
args=list(label="Local Database",
choices=character(0))))
names(INPUTS_METFRAG_SELECT_LOCAL_OTHER) = sapply(INPUTS_METFRAG_SELECT_LOCAL_OTHER,function(x) x$name)
INPUTS_METFRAG_SELECT_LOCAL = c(INPUTS_METFRAG_SELECT_LOCAL_OTHER,
INPUTS_METFRAG_SELECT_LOCAL_DBCH)
names(INPUTS_METFRAG_SELECT_LOCAL) = sapply(INPUTS_METFRAG_SELECT_LOCAL,function(x) x$name)
INPUTS_METFRAG = c(INPUTS_METFRAG_NUMERIC, INPUTS_METFRAG_SELECT_STANDARD, INPUTS_METFRAG_SELECT_LOCAL)
names(INPUTS_METFRAG) = sapply(INPUTS_METFRAG, function(x) x[["name"]])
## Copyright (C) 2020,2021,2023 by University of Luxembourg
## Licensed under the Apache License, Version 2.0 (the "License");
## you may not use this file except in compliance with the License.
## You may obtain a copy of the License at
## http://www.apache.org/licenses/LICENSE-2.0
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
## Description:
##
## Functions supporting MetFrag.
## Overall concept:
##
## For some input entries from `summ':
##
## 1. Generate unique "stags" based on key of `summ'.
## 2. Deduce file names based on stags.
## 3. Run MetFrag on them.
## 4. Summarise Results.
metfrag_gen_entry_fname <- function(kv) {
paste0("mf_summ_entry_",gen_1d_name(kv),".csv")
}
metfrag_get_stag_tab <- function(summ) {
## Argument summ can be a subset of actual `summ' table.
x = gen_1d_keytab(summ)
data.table::setnames(x,old="key1d",new="stag")
res = x[summ,`:=`(ce=i.ce,ion_mz=mz)]
res
}
metfrag_get_ms2_spec <- function(ms2,stag_entry) {
x = ms2[kval_tab,.(mz,intensity),on=names(stag_entry)]
x
}
get_mf_res_ext <- function(fn) {
ext = sub(pattern = r"(^.*\.([[:alnum:]]+)$)",r"(\1)", fn)
check_extension(c(ext=ext,file=fn),what="mf-res-file")
ext
}
metfrag_run <- function(param,path,subpaths,db_file,stag_tab,ms2,runtime,java_bin,nproc = 1L) {
keys = intersect(colnames(stag_tab),colnames(ms2))
rms2 = ms2[stag_tab,on=keys,nomatch=NULL]
message("Generating MetFrag configs.")
file_tab = rms2[,{
r = write_metfrag_config(param = ..param,
path = ..path,
subpaths = ..subpaths,
db_file = ..db_file,
stag = first(stag),
adduct = first(adduct),
ion_mz = first(ion_mz),
spec = data.table(mz=mz,intensity=intensity))
c(r,stag = first(stag))
},keyby=keys]
message("Done generating MetFrag configs.")
withr::with_dir(path,{
metfrag_run_many(fn_jar = runtime,
file_tab = file_tab,
java_bin = java_bin,
nproc = nproc)
})
## We don't know (so well) in advance what are the endings of the
## results files. Lets find this out.
pth = file.path(path,subpaths[["results"]])
a_res_f = list.files(path = pth,
pattern = param$SampleName)[[1]]
ext = get_mf_res_ext(a_res_f)
file_tab[,f_res:=paste0(param$SampleName,"_",stag,".",(ext))]
}
mf_narrow_summ <- function(summ,kv,ms2_rt_i=NA_integer_,ms2_rt_f=NA_integer_) {
skey = data.table::key(summ)
cols = union(names(skey),c("adduct","tag","ID","ce","precid","scan","mz","qa_pass","ms2_rt"))
dtkv = as.data.table(kv)
nsumm = summ[dtkv,on=names(kv),.SD,.SDcols=cols]
nsumm = nsumm[qa_pass==T] # Those that make sense.
nsumm_key = intersect(union(SUMM_KEY,"ms2_rt"),colnames(nsumm))
data.table::setkeyv(nsumm,nsumm_key)
ms2_rt_i = if (!is.na(ms2_rt_i)) ms2_rt_i else 0.
ms2_rt_f = if (!is.na(ms2_rt_f)) ms2_rt_f else Inf
nsumm[ms2_rt > (ms2_rt_i) & ms2_rt < (ms2_rt_f)]
}
get_metfrag_targets <- function(stag_tab,ms2) {
## Take the columns we need from summ.
x = summ[ms2_sel==T,.SD,.SDcols=c(key(summ),"mz")]
mrg_keys = c(intersect(key(ms2),key(summ)),"scan")
x=ms2[x,.(CE=CE,ion_mz=i.mz,mz,intensity),on=mrg_keys,by=.EACHI]
## Get column order so that `an' follows `CE'.
resnms = setdiff(mrg_keys,"scan")
nms = union(union(resnms,"CE"),c("scan","ion_mz","mz","intensity"))
data.table::setcolorder(x,neworder = nms)
setkeyv(x,unique(c(resnms,"CE","scan")))
x
}
write_metfrag_config <- function(param,path,subpaths,db_file,stag,adduct,ion_mz,spec) {
check_not_one(ion_mz,"ion_mz")
check_not_one(adduct,"adduct")
dir_res = subpaths$results
dir_spec = subpaths$spec
dir_conf = subpaths$config
dir_log = subpaths$log
f_spec = file.path(dir_spec,paste0(param$SampleName,"_",stag,".csv"))
f_conf = file.path(dir_conf,paste0(param$SampleName,"_",stag,".conf"))
f_log = file.path(dir_log,paste0(param$SampleName,"_",stag,".log"))
f_res = paste0(param$SampleName,"_",stag)
withr::with_dir(path,{
param$SampleName = f_res
param = c(param,list(IonizedPrecursorMass=ion_mz,
IsPositiveIonMode=ifelse(grepl(r"(\+$)",adduct),"True","False"),
PrecursorIonMode=METFRAG_ADDUCT_SWITCHES[[adduct]],
ResultsPath="results",
PeakListPath=f_spec))
if (nchar(db_file)>0L) param = c(param,list(LocalDatabasePath = db_file))
data.table::fwrite(spec,file=f_spec,col.names=F,sep=" ")
write_keyval_file(namedl=param,fname=f_conf)
})
list(f_conf=f_conf,
f_log=f_log,
f_spec=f_spec)
}
metfrag_run_one <- function(fn_jar, fn_conf, fn_log, mem = NA_character_, java_bin = "java") {
## Check if file exists.
## Assemble arguments.
args <- c('-jar',fn_jar,fn_conf)
message(fn_conf)
## If total heap memory given (in quantities like '4m', or '2g')
## then make this argument.
if (!is.na(mem)) args <- c(paste0('-Xmx', mem),args)
## Start new java process.
p <- processx::process$new(java_bin,args=args,stdout=fn_log,stderr='2>&1')
## p$wait()
## p$get_exit_status()
p
}
metfrag_run_many <- function(fn_jar,file_tab, mem = NA_character_, java_bin = "java",nproc=1L) {
ntasks = NROW(file_tab)
todo = min(nproc,ntasks)
k = ntasks %/% todo
ndone = 0L
lc = 1L
while (ndone < ntasks) {
ncurr_last = min(ndone + k*lc,ntasks)
procs = list()
for (i in (ndone + 1):ncurr_last) {
fn_conf = file_tab[i,f_conf]
fn_log = file_tab[i,f_log]
procs[[i-ndone]] = metfrag_run_one(fn_jar,
fn_conf= fn_conf,
fn_log = fn_log,
mem = mem,
java_bin = java_bin)
}
for (p in procs) {
p$wait()
}
message("Completed MetFrag tasks: ", ncurr_last,"/",ntasks,".")
ndone = ncurr_last
lc = lc + 1L
}
}
summarise_metfrag_results <- function(param,path,subpaths,cand_parameters,db_scores,int_scores,collect_candidates,file_tab) {
## which(max(as.numeric(mf_res$Score))==as.numeric(mf_res$Score))
index_maxScore = 1L
## First detect which reader we need.
ext = get_mf_res_ext(file_tab[1,f_res])
readf = METFRAG_RESULT_READF[[ext]]
keyz = as.character(union(key(file_tab),"stag"))
.read_results <- function() {
file_tab[,{
fn = file.path(..path,subpaths$results,f_res)
dt = data.table::rbindlist(lapply(fn,function (ff) as.data.table(readf(ff))))
dt
},
keyby = keyz]
}
.adapt_col_types <- function(x) {
if (length(db_scores)>0) x[,(names(db_scores)):=lapply(.SD, as.numeric),.SDcol=names(db_scores)] else x
}
.calc_basic_scores <- function(x) {
x[,.(num_poss_IDs=length(Score),
max_Score=max(Score),
n_Score_GE4=length(which(Score>=4)),
n_Score_GE3=length(which(Score>=3)),
n_Score_GE2=length(which(Score>=2))),
by=keyz]
}
.get_candidate_param <- function(x) {
res = x[,.SD[..index_maxScore],
.SDcol=cand_parameters,
keyby=keyz]
## data.table::setnames(res,old = names(res), new = paste0("top_",names(res)))
res
}
.make_max_cols <- function(x) {
res = x[,{
cols = lapply(.SD,function(s) max(s,na.rm=T))
names(cols) = lapply(names(.SD),function(nn) paste0("Max_",nn))
cols}, .SDcol=c(names(int_scores),names(db_scores)),keyby=keyz]
res
}
.collect_candidates <- function(x) {
res = x[,{cols = lapply(.SD, function(col) paste(col,collapse=";"))
names(cols) = lapply(names(.SD),function(nn) paste0("All_",nn))
cols},
.SDcol=collect_candidates,
keyby=keyz]
res
}
thetab = .read_results()
.adapt_col_types(thetab)
btab = .calc_basic_scores(thetab)
cctab = .collect_candidates(thetab)
candtab = .get_candidate_param(thetab)
mxtab = .make_max_cols(thetab)
res = file_tab[,`:=`(f_conf=NULL,f_log=NULL,f_res=NULL,f_spec=NULL)]
data.table::setkeyv(res,c(key(res),"stag"))
res = res[btab]
res = res[candtab]
res = res[mxtab]
res = res[cctab]
res
}