From 035dc03315b043fde24272723846556b4bcb6e58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Todor=20Kondi=C4=87?= <todor.kondic@uni.lu> Date: Wed, 31 Aug 2022 12:35:12 +0200 Subject: [PATCH] api,mix: prescreen,gen_summ: Update generation of summ table. --- R/api.R | 8 ++--- R/mix.R | 99 +++++++++++++++++++++++++++++++++++++-------------------- 2 files changed, 68 insertions(+), 39 deletions(-) diff --git a/R/api.R b/R/api.R index f9c1059..9c78852 100644 --- a/R/api.R +++ b/R/api.R @@ -536,12 +536,10 @@ conf_trans <- function(conf) { prescreen <- function(m) { ## Top-level auto prescreening function. message("(prescreen): Start.") - confpres <- conf_trans_pres(m$conf$prescreen) + ## confpres <- conf_trans_pres(m$conf$prescreen) - m$qa <- create_qa_table(m$extr,confpres) - m1 <- assess_ms1(m) - m <- assess_ms2(m1) - m$out$tab$summ <- gen_summ(m$out$tab$comp,m$qa$ms1,m$qa$ms2) + m$qa <- analyse_extracted_data(m$extr,m$conf$prescreen) + m$out$tab$summ <- gen_summ(m$out$tab$comp,m$qa) message("(prescreen): End.") m } diff --git a/R/mix.R b/R/mix.R index 72a3b07..52aa6ac 100644 --- a/R/mix.R +++ b/R/mix.R @@ -177,40 +177,40 @@ gen_empty_summ <- function() { EMPTY_SUMM } -gen_summ <- function(comp,qa_ms1,qa_ms2) { - comp_cols <- intersect(SUMM_COLS,colnames(comp)) - summ <- comp[,..comp_cols] - data.table::setkeyv(summ,BASE_KEY) - ms1_cols <- intersect(SUMM_COLS,colnames(qa_ms1)) - ms1_cols <- setdiff(ms1_cols,colnames(summ)) - summ <- qa_ms1[summ,c(..comp_cols,..ms1_cols),on=BASE_KEY] - ms2_cols <- intersect(colnames(qa_ms2),SUMM_COLS) - ms2_cols <- setdiff(ms2_cols,colnames(summ)) - summ <- qa_ms2[summ,c(..comp_cols,..ms1_cols,..ms2_cols),on=BASE_KEY] - data.table::setkeyv(summ,c(BASE_KEY_MS2,"an")) - summ[,qa_ms1_exists:=the_ifelse(!is.na(qa_ms1_good_int),T,F)] - summ[,qa_ms2_exists:=the_ifelse(!is.na(CE),T,F)] - summ[,qa_pass:=apply(.SD,1,all),.SDcols=QA_FLAGS[!(QA_FLAGS %in% "qa_pass")]] - summ$Comments<-"" - data.table::setkeyv(summ,DEF_KEY_SUMM) - data.table::setcolorder(summ,SUMM_COLS) - - ## Quality scores for ms1 and ms2. - summ[,qlt_ms1 := as.integer(Map(function(m1,m2,m3) { m1*5L + m2*3L + m3*2L}, - as.integer(qa_ms1_exists), - as.integer(qa_ms1_above_noise), - as.integer(qa_ms1_good_int)))] - summ[,qlt_ms2 := as.integer(Map(function(m1,m2,m3) { m1*5L + m2*3L + m3*2L}, - as.integer(qa_ms2_exists), - as.integer(qa_ms2_near), - as.integer(qa_ms2_good_int)))] - - summ[is.na(qlt_ms1),qlt_ms1:=0L] - summ[is.na(qlt_ms2),qlt_ms2:=0L] - - summ +## gen_summ <- function(comp,qa_ms1,qa_ms2) { +## comp_cols <- intersect(SUMM_COLS,colnames(comp)) +## summ <- comp[,..comp_cols] +## data.table::setkeyv(summ,BASE_KEY) +## ms1_cols <- intersect(SUMM_COLS,colnames(qa_ms1)) +## ms1_cols <- setdiff(ms1_cols,colnames(summ)) +## summ <- qa_ms1[summ,c(..comp_cols,..ms1_cols),on=BASE_KEY] +## ms2_cols <- intersect(colnames(qa_ms2),SUMM_COLS) +## ms2_cols <- setdiff(ms2_cols,colnames(summ)) +## summ <- qa_ms2[summ,c(..comp_cols,..ms1_cols,..ms2_cols),on=BASE_KEY] +## data.table::setkeyv(summ,c(BASE_KEY_MS2,"an")) +## summ[,qa_ms1_exists:=the_ifelse(!is.na(qa_ms1_good_int),T,F)] +## summ[,qa_ms2_exists:=the_ifelse(!is.na(CE),T,F)] +## summ[,qa_pass:=apply(.SD,1,all),.SDcols=QA_FLAGS[!(QA_FLAGS %in% "qa_pass")]] +## summ$Comments<-"" +## data.table::setkeyv(summ,DEF_KEY_SUMM) +## data.table::setcolorder(summ,SUMM_COLS) + +## ## Quality scores for ms1 and ms2. +## summ[,qlt_ms1 := as.integer(Map(function(m1,m2,m3) { m1*5L + m2*3L + m3*2L}, +## as.integer(qa_ms1_exists), +## as.integer(qa_ms1_above_noise), +## as.integer(qa_ms1_good_int)))] +## summ[,qlt_ms2 := as.integer(Map(function(m1,m2,m3) { m1*5L + m2*3L + m3*2L}, +## as.integer(qa_ms2_exists), +## as.integer(qa_ms2_near), +## as.integer(qa_ms2_good_int)))] + +## summ[is.na(qlt_ms1),qlt_ms1:=0L] +## summ[is.na(qlt_ms2),qlt_ms2:=0L] + +## summ -} +## } pp_touch_q<-function(summ) { ## Returns indices that are ok to be auto processed. which(summ$checked==SUMM_CHK_NONE | summ$checked==SUMM_CHK_AUTO) @@ -768,6 +768,7 @@ analyse_extracted_data <- function(extr,prescreen_param) { ## times was that it was helpful, at least sometimes. tab_ms2[qa_ms1_good_int==T,qa_ms1_above_noise:=fifelse(ms1_int>ms1_mean/3.,T,F)] tab_ms2[qa_ms1_good_int==T & qa_ms1_above_noise==T & qa_ms2_good_int==T,qa_ms2_near:=T] + tab_ms2$qa_ms2_exists=T ## Find MS1 with no corresponding MS2. @@ -784,13 +785,43 @@ analyse_extracted_data <- function(extr,prescreen_param) { ## QA for the above (lazy qa ... take only the max peak into account). tab_noms2[,c("qa_ms1_good_int","qa_ms1_above_noise"):=.(ms1_int>ms1_int_thresh,ms1_int>ms1_mean/3.)] + ## MS2 QA criteria all fail. + tab_noms2[,c("qa_ms2_exists","qa_ms2_good_int","qa_ms2_near"):=.(F,F,F)] + ## Bind MS1-only and MS1/MS2 entries together. - res <- rbind(tab_ms2,tab_noms2,fill=T) + res <- rbind(tab_ms2,tab_noms2,fill=T,use.names=T) + + ## Every single entry which was extracted has at least MS1. + res[,qa_ms1_exists:=T] data.table::setkeyv(res,BASE_KEY) + + qflg <- QA_FLAGS[!(QA_FLAGS %in% "qa_pass")] + res[,qa_pass:=apply(.SD,1,all),.SDcols=qflg] + res[.(T),del_rt:=abs(ms2_rt - ms1_rt),on="qa_pass",by='an'] + res[.(T),ms2_sel:=ms2_rt[which.min(del_rt)]==ms2_rt,on="qa_pass",by=BASE_KEY_MS2] + res[,qlt_ms1:=apply(.SD,1,function(rw) sum(c(5L,3L,2L)*rw)),.SDcol=c("qa_ms1_exists", + "qa_ms1_above_noise", + "qa_ms1_good_int")] + res[,qlt_ms2:=apply(.SD,1,function(rw) sum(c(5L,3L,2L)*rw)),.SDcol=c("qa_ms2_exists", + "qa_ms2_near", + "qa_ms2_good_int")] res } +## Based on the `comprehensive' and `qa' tabs, greate `summ'. +gen_summ <- function(comp,qa) { + comp_cols <- intersect(SUMM_COLS,colnames(comp)) + rdcomp <- comp[,..comp_cols] + data.table::setkeyv(rdcomp,BASE_KEY) + summ <- qa[rdcomp,nomatch=NA] + flgs <- c(QA_FLAGS,"ms2_sel") + summ[is.na(qa_ms1_exists),(flgs):=F] + data.table::setkeyv(summ,c(BASE_KEY_MS2,'an')) + summ[.(F),c("qlt_ms1","qlt_ms2"):=0.,on="qa_ms1_exists"] + summ +} + -- GitLab