diff --git a/DESCRIPTION b/DESCRIPTION index 1c92333e729518892877a1de1475d0ff33b36fbf..065829a0359956f1e41747b4be5cb4a926efdf6f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -50,6 +50,7 @@ Collate: 'errors.R' 'mix.R' 'envopts.R' + 'data-model.R' 'state.R' 'metfrag.R' 'plotting.R' diff --git a/R/api.R b/R/api.R index f976db708877320e0ca376aa442f3a9ba0182c77..de1dfdc01b6bcada21a1db12eed8083c86cb0ad6 100644 --- a/R/api.R +++ b/R/api.R @@ -32,6 +32,7 @@ run <- function(envopts, all_phases=list(setup=setup_phase, comptab=mk_comp_tab, + db=make_db, extract=extr_data, prescreen=prescreen, sort=sort_spectra, @@ -917,3 +918,9 @@ metfrag <- function(m) { m } + +make_db <- function(m) { + m = make_db_catalogue(m) + m = make_db_precursors(m) + m +} diff --git a/R/data-model.R b/R/data-model.R new file mode 100644 index 0000000000000000000000000000000000000000..ab73abf3aefdc27c6a6d807cc61363d8cc160c4a --- /dev/null +++ b/R/data-model.R @@ -0,0 +1,55 @@ +#Copyright (C) 2023 by University of Luxembourg + +## Shinyscreen works of an internal relational database implemented +## using `data.table' package. Implementation is here. + + +make_db_catalogue <- function(m) { + ## Takes comprehensive database from state `m' and generates a + ## catalogue with a unique key. This catalogue is based on + ## inputs. Each entry in the catalogue corresponds to a single + ## target mass from a single experimental run. + res = m$out$tab$comp[,unique(.SD),.SDcols=c("set","tag","adduct","ID")] + res[,catid:=.I] + setkeyv(res,DB_CATALOGUE_KEY) + setindex(res,catid) + m$db$cat = res + m +} + + +make_db_precursors <- function(m) { + ## Generate masses and label isobars. + + ## Get tolerance. + tmp = get_val_unit(m$conf$tolerance[['ms1 fine']]) + ms1tol = as.numeric(tmp[['val']]) + ms1unit = tmp[['unit']] + cat = m$db$cat + masses = m$out$tab$comp[cat,.(catid=catid,mz=mz),on=key(cat)] + setkey(masses,mz) + if (ms1unit == "ppm") { + masses[,`:=`(mz_min=mz-ms1tol*mz*1e-6,mz_max=mz+ms1tol*mz*1e-6)] + } else if (ms1unit == "Da") { + masses[,`:=`(mz_min=mz-ms1tol,mz_max=mz+ms1tol)] + } else { + stop('make_db_precursors: Unknown mass unit.') + } + masses$isobar = -1L + ## masses[,isobar := fifelse(isobar==-1L & mz_min < themz & themz < mz_max,..id,catid)] + + start = 1L + while (start <= NROW(masses)) { + sel = masses[start:.N] + themz = sel[1L,mz] + id = sel[1L,catid] + upmz = sel[1L,mz_max] + x = sel[mz<(upmz)] + stop = start + NROW(x) - 1L + message('a',start,'o',stop) + masses[(start):(stop),`:=`(isobar=..id,deg=(1L+stop-start))] + start = stop + 1L + } + m$db$precursors = masses + m +} diff --git a/R/resources.R b/R/resources.R index 746b2a64bed83a6641c37b743f1b05c0706f9711..61f5851092bf3fc954720d8a67f79dfedbc750ab 100644 --- a/R/resources.R +++ b/R/resources.R @@ -361,3 +361,7 @@ METFRAG_RESULT_READF = list(csv = function(file,...) data.table::fread(file=file xml = function(file,...) readxl::read_excel(path=file,...)) METFRAG_DEFAULT_PROC = 1L + + +## DATA MODEL +DB_CATALOGUE_KEY = c("set","tag","adduct","ID") diff --git a/tests/testthat/test-integration.R b/tests/testthat/test-integration.R index 4c0cb8fb16d2be1511d34fcfcfa09677aad8c2b5..368e0721a082e7d8749ca65d7a8399c9768f4643 100644 --- a/tests/testthat/test-integration.R +++ b/tests/testthat/test-integration.R @@ -4,9 +4,11 @@ test_that("Extraction returns what is needed.",{ skip_if_not(shiny::isTruthy(topd) && shiny::isTruthy(projd),"Environment variables SS_INTEG_TOP_DATA_DIR and SS_INTEG_PROJ_DIR must be present for this test to work.") eo = init(top_data_dir=topd,projects=projd) - print(str(eo)) + prdir = file.path(projd,"proj") m = run(envopts=eo, - project="proj", - phase=c("setup","comptab")) + project="proj", + phase=c("setup","comptab","db")) + + cat = m$db$cat expect_true(1==1) })