From a3a6adf74d9519bd6a7643307bc41ed39104dbd6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Todor=20Kondi=C4=87?= <kontrapunkt@uclmail.net>
Date: Thu, 9 Mar 2023 13:41:47 +0100
Subject: [PATCH] db: New stage for generation of the database.

---
 DESCRIPTION                       |  1 +
 R/api.R                           |  7 ++++
 R/data-model.R                    | 55 +++++++++++++++++++++++++++++++
 R/resources.R                     |  4 +++
 tests/testthat/test-integration.R |  8 +++--
 5 files changed, 72 insertions(+), 3 deletions(-)
 create mode 100644 R/data-model.R

diff --git a/DESCRIPTION b/DESCRIPTION
index 1c92333..065829a 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -50,6 +50,7 @@ Collate:
     'errors.R'
     'mix.R'
     'envopts.R'
+    'data-model.R'
     'state.R'
     'metfrag.R'
     'plotting.R'
diff --git a/R/api.R b/R/api.R
index f976db7..de1dfdc 100644
--- a/R/api.R
+++ b/R/api.R
@@ -32,6 +32,7 @@ run <- function(envopts,
     
     all_phases=list(setup=setup_phase,
                     comptab=mk_comp_tab,
+                    db=make_db,
                     extract=extr_data,
                     prescreen=prescreen,
                     sort=sort_spectra,
@@ -917,3 +918,9 @@ metfrag <- function(m) {
 
     m
 }
+
+make_db <- function(m) {
+    m = make_db_catalogue(m)
+    m = make_db_precursors(m)
+    m
+}
diff --git a/R/data-model.R b/R/data-model.R
new file mode 100644
index 0000000..ab73abf
--- /dev/null
+++ b/R/data-model.R
@@ -0,0 +1,55 @@
+#Copyright (C) 2023 by University of Luxembourg
+
+## Shinyscreen works of an internal relational database implemented
+## using `data.table' package. Implementation is here.
+
+
+make_db_catalogue <- function(m) {
+    ## Takes comprehensive database from state `m' and generates a
+    ## catalogue with a unique key. This catalogue is based on
+    ## inputs. Each entry in the catalogue corresponds to a single
+    ## target mass from a single experimental run.
+    res = m$out$tab$comp[,unique(.SD),.SDcols=c("set","tag","adduct","ID")]
+    res[,catid:=.I]
+    setkeyv(res,DB_CATALOGUE_KEY)
+    setindex(res,catid)
+    m$db$cat = res
+    m
+}
+
+
+make_db_precursors <- function(m) {
+    ## Generate masses and label isobars.
+
+    ## Get tolerance.
+    tmp = get_val_unit(m$conf$tolerance[['ms1 fine']])
+    ms1tol = as.numeric(tmp[['val']])
+    ms1unit = tmp[['unit']]
+    cat = m$db$cat
+    masses = m$out$tab$comp[cat,.(catid=catid,mz=mz),on=key(cat)]
+    setkey(masses,mz)
+    if (ms1unit == "ppm") {
+        masses[,`:=`(mz_min=mz-ms1tol*mz*1e-6,mz_max=mz+ms1tol*mz*1e-6)]
+    } else if (ms1unit == "Da") {
+        masses[,`:=`(mz_min=mz-ms1tol,mz_max=mz+ms1tol)]
+    } else {
+        stop('make_db_precursors: Unknown mass unit.')
+    }
+    masses$isobar = -1L
+    ## masses[,isobar := fifelse(isobar==-1L & mz_min < themz & themz < mz_max,..id,catid)]
+
+    start = 1L
+    while (start <= NROW(masses)) {
+        sel = masses[start:.N]
+        themz = sel[1L,mz]
+        id = sel[1L,catid]
+        upmz = sel[1L,mz_max]
+        x = sel[mz<(upmz)]
+        stop = start + NROW(x) - 1L
+        message('a',start,'o',stop)
+        masses[(start):(stop),`:=`(isobar=..id,deg=(1L+stop-start))]
+        start = stop + 1L
+    }
+    m$db$precursors = masses
+    m
+}
diff --git a/R/resources.R b/R/resources.R
index 746b2a6..61f5851 100644
--- a/R/resources.R
+++ b/R/resources.R
@@ -361,3 +361,7 @@ METFRAG_RESULT_READF = list(csv = function(file,...) data.table::fread(file=file
                             xml = function(file,...) readxl::read_excel(path=file,...))
 
 METFRAG_DEFAULT_PROC = 1L
+
+
+## DATA MODEL
+DB_CATALOGUE_KEY = c("set","tag","adduct","ID")
diff --git a/tests/testthat/test-integration.R b/tests/testthat/test-integration.R
index 4c0cb8f..368e072 100644
--- a/tests/testthat/test-integration.R
+++ b/tests/testthat/test-integration.R
@@ -4,9 +4,11 @@ test_that("Extraction returns what is needed.",{
     skip_if_not(shiny::isTruthy(topd) && shiny::isTruthy(projd),"Environment variables SS_INTEG_TOP_DATA_DIR and SS_INTEG_PROJ_DIR must be present for this test to work.")
 
     eo = init(top_data_dir=topd,projects=projd)
-    print(str(eo))
+    prdir = file.path(projd,"proj")
     m = run(envopts=eo,
-            project="proj",
-            phase=c("setup","comptab"))
+                project="proj",
+                phase=c("setup","comptab","db"))
+
+    cat = m$db$cat
     expect_true(1==1)
 })
-- 
GitLab