Refactored `gen_summ' to work off new data model.

04da47a2 · Todor Kondić · 4262e53b · 04da47a2 · 04da47a2 · 04da47a2
Commit 04da47a2 authored 1 year ago by Todor Kondić
--- a/R/api.R
+++ b/R/api.R
@@ -386,8 +386,7 @@ prescreen <- function(m) {
    m$qa = NULL
    m$out$tab$summ = NULL
    m$qa = analyse_extracted_data(m$db,m$conf$prescreen)
-    browser()
-    m$out$tab$summ = gen_summ(m$out$tab$comp,m$qa)
+    m$out$tab$summ = gen_summ(m$db,m$qa,m$out$tab$comp)
    message("(prescreen): End.")
    m
 }

--- a/R/data-model.R
+++ b/R/data-model.R
@@ -115,17 +115,70 @@ empty_spectra_table <- function() {
 }


-## Based on the `comprehensive' and `qa' tabs, greate `summ'.
-gen_summ <- function(comp,qa) {
-    comp_cols <- intersect(SUMM_COLS,colnames(comp))
-    rdcomp <- comp[,..comp_cols]
-    data.table::setkeyv(rdcomp,BASE_KEY)
-    summ <- qa[rdcomp,nomatch=F] #We changed `nomatch' cases from NA
-                                 #to F, because NA does not work well
-                                 #with X == F condition.
-    ## flgs <- c(QA_FLAGS,"ms2_sel")
-    ## summ[is.na(qa_ms1_exists),(flgs):=F]
-    data.table::setkeyv(summ,SUMM_KEY)
-    summ[.(F),c("qlt_ms1","qlt_ms2"):=0.,on="qa_ms1_exists"]
-    summ
+summ_needs_from_cat <- function(cat) {
+    ## Catalogue columns.
+    cat
+}
+
+summ_needs_from_precursors <- function(res,precursors) {
+    ## Mass columns.
+    precursors[res,on=.(catid),.(precid,
+                                 mz,
+                                 set,
+                                 adduct,
+                                 tag,
+                                 ID,
+                                 mz_l=mz_fine_min,
+                                 mz_r=mz_fine_max),by=.EACHI]
+}
+
+summ_needs_from_qa <- function(res,qa) {
+    needs = qa[,.SD,.SDcols=c("precid",
+                              "ce",
+                              "scan",
+                              "ms1_rt",
+                              "ms1_int",
+                              "ms2_rt",
+                              "ms2_int",
+                              "ms1_mean",
+                              "ms2_sel",
+                              "qa_pass",
+                              "qa_ms1_exists",
+                              "qa_ms2_exists",
+                              "qa_ms1_good_int",
+                              "qa_ms1_above_noise",
+                              "qa_ms2_near",
+                              "qa_ms2_good_int",
+                              "qlt_ms1",
+                              "qlt_ms2")]
+
+    res = needs[res,on=.(precid),allow.cartesian=T]
+    ## TODO: additional processing?
+    res
+}
+
+summ_needs_from_comp <- function(res,comp) {
+    needs = comp[,.(set,ID,Name,SMILES)]
+    setkey(needs,set,ID)
+    res[needs,on=.(set,ID),`:=`(Name=i.Name,
+                                SMILES=i.SMILES)]
+}
+
+## This function creates `summ' table. 
+gen_summ <- function(db,qa,comp) {
+
+    ## Start with the basic things.
+    res = summ_needs_from_cat(db$cat)
+
+    ## Add masses and precids.
+    res = summ_needs_from_precursors(res,db$precursors)
+
+    ## Add qa columns.
+    res = summ_needs_from_qa(res,qa)
+
+    setkeyv(res,SUMM_KEY)
+
+    ## Add comp columns.
+    summ_needs_from_comp(res,comp)
+    
 }
--- a/R/extraction.R
+++ b/R/extraction.R
@@ -272,17 +272,19 @@ analyse_extracted_data <- function(db,prescreen_param) {
    tab_noms2[,c("qa_ms2_exists","qa_ms2_good_int","qa_ms2_near"):=.(F,F,F)]

    ## Bind MS1-only and MS1/MS2 entries together.
-    res <- rbind(tab_ms2,tab_noms2,fill=T,use.names=T)
-    ## TODO: FIXME: Every single entry which was extracted has at
-    ## least MS1? Not true, we should treat all-NA results as
-    ## qa_ms1_exists == F. We curretly don't do it.
-    res[,qa_ms1_exists:=T]
+    res = rbind(tab_ms2,tab_noms2,fill=T,use.names=T)
+
+    ## If ms1_int has been calculated as a Na(N) value, this means
+    ## that no MS1 has been found for that precid.
+    res[,qa_ms1_exists:=F]
+    res[!is.na(ms1_int),qa_ms1_exists:=T]
    data.table::setkey(res,precid)
+
    
-    qflg <- QA_FLAGS[!(QA_FLAGS %in% "qa_pass")]
+    qflg = QA_FLAGS[!(QA_FLAGS %in% "qa_pass")]
    res[,qa_pass:=apply(.SD,1,all),.SDcols=qflg]
    res[.(T),del_rt:=abs(ms2_rt - ms1_rt),on="qa_pass",by='scan']
-    resby <- BASE_KEY_MS2[! (BASE_KEY_MS2 %in% 'scan')]
+    resby = BASE_KEY_MS2[! (BASE_KEY_MS2 %in% 'scan')]
    res[.(T),qa_tmp_ms1_max:= ms1_int==max(ms1_int),on="qa_pass",by=resby]
    res[,ms2_sel:=F]
    res[.(T,T),ms2_sel:= del_rt == del_rt[which.min(del_rt)],on=c("qa_pass","qa_tmp_ms1_max"),by=resby]
@@ -292,6 +294,13 @@ analyse_extracted_data <- function(db,prescreen_param) {
    res[,qlt_ms2:=apply(.SD,1,function(rw) sum(c(5L,3L,2L)*rw)),.SDcol=c("qa_ms2_exists",
                                                                 "qa_ms2_near",
                                                                 "qa_ms2_good_int")]
+    res[is.na(qlt_ms1),qlt_ms1:=0L]
+    res[is.na(qlt_ms2),qlt_ms2:=0L]
+
+    ## Set all other flags to false when qa_ms1_exists == F by decree.
+    flgs = c(QA_FLAGS,"ms2_sel")
+    res[qa_ms1_exists == F,(flgs):=F]
+
    res
 }