From a3a6adf74d9519bd6a7643307bc41ed39104dbd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Todor=20Kondi=C4=87?= <kontrapunkt@uclmail.net> Date: Thu, 9 Mar 2023 13:41:47 +0100 Subject: [PATCH] db: New stage for generation of the database. --- DESCRIPTION | 1 + R/api.R | 7 ++++ R/data-model.R | 55 +++++++++++++++++++++++++++++++ R/resources.R | 4 +++ tests/testthat/test-integration.R | 8 +++-- 5 files changed, 72 insertions(+), 3 deletions(-) create mode 100644 R/data-model.R diff --git a/DESCRIPTION b/DESCRIPTION index 1c92333..065829a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -50,6 +50,7 @@ Collate: 'errors.R' 'mix.R' 'envopts.R' + 'data-model.R' 'state.R' 'metfrag.R' 'plotting.R' diff --git a/R/api.R b/R/api.R index f976db7..de1dfdc 100644 --- a/R/api.R +++ b/R/api.R @@ -32,6 +32,7 @@ run <- function(envopts, all_phases=list(setup=setup_phase, comptab=mk_comp_tab, + db=make_db, extract=extr_data, prescreen=prescreen, sort=sort_spectra, @@ -917,3 +918,9 @@ metfrag <- function(m) { m } + +make_db <- function(m) { + m = make_db_catalogue(m) + m = make_db_precursors(m) + m +} diff --git a/R/data-model.R b/R/data-model.R new file mode 100644 index 0000000..ab73abf --- /dev/null +++ b/R/data-model.R @@ -0,0 +1,55 @@ +#Copyright (C) 2023 by University of Luxembourg + +## Shinyscreen works of an internal relational database implemented +## using `data.table' package. Implementation is here. + + +make_db_catalogue <- function(m) { + ## Takes comprehensive database from state `m' and generates a + ## catalogue with a unique key. This catalogue is based on + ## inputs. Each entry in the catalogue corresponds to a single + ## target mass from a single experimental run. + res = m$out$tab$comp[,unique(.SD),.SDcols=c("set","tag","adduct","ID")] + res[,catid:=.I] + setkeyv(res,DB_CATALOGUE_KEY) + setindex(res,catid) + m$db$cat = res + m +} + + +make_db_precursors <- function(m) { + ## Generate masses and label isobars. + + ## Get tolerance. + tmp = get_val_unit(m$conf$tolerance[['ms1 fine']]) + ms1tol = as.numeric(tmp[['val']]) + ms1unit = tmp[['unit']] + cat = m$db$cat + masses = m$out$tab$comp[cat,.(catid=catid,mz=mz),on=key(cat)] + setkey(masses,mz) + if (ms1unit == "ppm") { + masses[,`:=`(mz_min=mz-ms1tol*mz*1e-6,mz_max=mz+ms1tol*mz*1e-6)] + } else if (ms1unit == "Da") { + masses[,`:=`(mz_min=mz-ms1tol,mz_max=mz+ms1tol)] + } else { + stop('make_db_precursors: Unknown mass unit.') + } + masses$isobar = -1L + ## masses[,isobar := fifelse(isobar==-1L & mz_min < themz & themz < mz_max,..id,catid)] + + start = 1L + while (start <= NROW(masses)) { + sel = masses[start:.N] + themz = sel[1L,mz] + id = sel[1L,catid] + upmz = sel[1L,mz_max] + x = sel[mz<(upmz)] + stop = start + NROW(x) - 1L + message('a',start,'o',stop) + masses[(start):(stop),`:=`(isobar=..id,deg=(1L+stop-start))] + start = stop + 1L + } + m$db$precursors = masses + m +} diff --git a/R/resources.R b/R/resources.R index 746b2a6..61f5851 100644 --- a/R/resources.R +++ b/R/resources.R @@ -361,3 +361,7 @@ METFRAG_RESULT_READF = list(csv = function(file,...) data.table::fread(file=file xml = function(file,...) readxl::read_excel(path=file,...)) METFRAG_DEFAULT_PROC = 1L + + +## DATA MODEL +DB_CATALOGUE_KEY = c("set","tag","adduct","ID") diff --git a/tests/testthat/test-integration.R b/tests/testthat/test-integration.R index 4c0cb8f..368e072 100644 --- a/tests/testthat/test-integration.R +++ b/tests/testthat/test-integration.R @@ -4,9 +4,11 @@ test_that("Extraction returns what is needed.",{ skip_if_not(shiny::isTruthy(topd) && shiny::isTruthy(projd),"Environment variables SS_INTEG_TOP_DATA_DIR and SS_INTEG_PROJ_DIR must be present for this test to work.") eo = init(top_data_dir=topd,projects=projd) - print(str(eo)) + prdir = file.path(projd,"proj") m = run(envopts=eo, - project="proj", - phase=c("setup","comptab")) + project="proj", + phase=c("setup","comptab","db")) + + cat = m$db$cat expect_true(1==1) }) -- GitLab