Skip to content
Snippets Groups Projects
Commit 2386b504 authored by Todor Kondić's avatar Todor Kondić
Browse files

extr$ms? -> db$extr$cgm$ms?

parent c69c6a52
No related branches found
No related tags found
No related merge requests found
......@@ -464,8 +464,8 @@ create_plots <- function(m) {
## Select the data nedeed for plotting.
flt_summ <- m$out$tab$flt_summ
ms2 <- m$extr$ms2
ms1 <- m$extr$ms1
ms2 <- m$db$extr$cgm$ms2
ms1 <- m$db$extr$cgm$ms1
rt_min <- rt_in_min(m$conf$figures$rt_min)
rt_max <- rt_in_min(m$conf$figures$rt_max)
keytab <- flt_summ[,unique(.SD),.SDcol=c("adduct","ID")]
......@@ -606,8 +606,8 @@ report <- function(m) {
dir.create(REP_TOPDIR,recursive = T,showWarnings = F)
header <- knitr::knit_expand(fn_header)
flt_summ <- m$out$tab$reptab
ms2 <- m$extr$ms2
ms1 <- m$extr$ms1
ms2 <- m$db$extr$cgm$ms2
ms1 <- m$db$extr$cgm$ms1
rt_min <- rt_in_min(m$conf$figures$rt_min)
rt_max <- rt_in_min(m$conf$figures$rt_max)
keytab <- flt_summ[,unique(.SD),.SDcol=c("adduct","ID")]
......@@ -781,7 +781,7 @@ metfrag <- function(m) {
path = m$run$metfrag$path,
subpaths = m$run$metfrag$subpaths,
db_file = m$run$metfrag$db_file,
stag_tab = stagtab, ms2 = m$extr$ms2,
stag_tab = stagtab, ms2 = m$db$extr$cgm$ms2,
runtime=m$run$metfrag$runtime,
java_bin=m$run$metfrag$java_bin,
nproc = m$conf$metfrag$nproc)
......
......@@ -547,260 +547,6 @@ conf_trans_pres <- function(pres_list) {
pres_list
}
## PRESCREENING
create_qa_table <- function(extr,conf_presc) {
## The first input argument is the extracted `ms`, table
## containing MS1 and MS2 spectra. The argument `conf_presc` is
## m$conf$prescreen, the prescreening parameters given in the conf
## file.
## The qa table is just a copy of ms with added quality control
## columns QA_COLS.
## The QA_FLAGS columns are flags specifying which properties of
## compounds are known well, or not.
## For each compound (mass) we ask the following questions:
## qa_ms1_exists -- does the MS1 spectrum exist at all?
## qa_ms2_exists -- do we have any MS2 spectra at all?
## qa_ms1_above_noise -- is MS1 above the noise treshold?
## qa_ms2_near -- is there any MS2 spectrum inside the tolerated
## retention time window around the MS1 peak? That is, are we
## non-RT-shifted?
## qa_ms2_good_int -- Is there any MS2 spectral intensity greater
## than the MS2 threshold and less than the MS1 peak?
## qa_pass -- did the spectrum pass all the checks?
## The columns in QA_NUM_REAL are:
##
## ms1_int -- the maximum intensity of MS1 spectrum over the
## entire run;
##
## ms1_rt -- the retention time of the peak MS1.
## The columns in QA_NUM_INT are:
##
## ms2_sel -- index of the selected MS2 spectrum; if not NA, the
## associated spectrum passed all the checks (qa_pass == T); the
## spectrum itself is in one of the member sublists of the `spec'
## column. The integer `ms2_sel' is then the index of the spectrum
## in that sublist.
##
## ms1_rt_ind -- TODO (but not important to end users).
qa <- list(prescreen=conf_presc)
## checks <- extr$ms2[,{
## },keyby=BASE_KEY_MS2]
checks <- extr$ms2
checks[,(QA_FLAGS):=F]
## message("checks:")
## print(checks)
## message("done checks")
checks[,(QA_NUM_INT):=NA_integer_]
checks[,(QA_NUM_REAL):=NA_real_]
setkeyv(checks,BASE_KEY_MS2)
qa$checks <- checks
qa
}
assess_ms1 <- function(m) {
qa <- m$qa
ms1 <- m$extr$ms1
## Calculate auxiliary variables and indices.
qa_ms1 <- ms1[,.(ms1_rt_ind=which.max(intensity)),keyby=BASE_KEY]
qa_ms1 <- ms1[qa_ms1,.(ms1_rt_ind=ms1_rt_ind,
ms1_int=intensity[[ms1_rt_ind]],
ms1_rt=rt[[ms1_rt_ind]],
ms1_mean=mean(intensity)),on=BASE_KEY,by=.EACHI]
qa_ms1[,qa_ms1_good_int := ms1_int > qa$prescreen$ms1_int_thresh]
qa_ms1[,qa_ms1_above_noise := F]
qa_ms1[qa_ms1_good_int==T,qa_ms1_above_noise := .(ms1_int > qa$prescreen$s2n*ms1_mean)]
## checks[(!qa_ms1_above_noise),c("qa_ms2_good_int","qa_ms2_near","qa_ms2_exists","qa_pass"):=F]
## qa_ms1 <- check_ms1_noise(check_ms1(qa_ms1))
m$qa$ms1 <- qa_ms1
m
}
assess_ms2 <- function(m) {
presconf <- conf_trans_pres(m$conf$prescreen)
ms1 <- m$extr$ms1
ms2 <- m$extr$ms2
qa_ms1 <- m$qa$ms1
qa_ms2 <- ms2[qa_ms1[qa_ms1_above_noise==T],.(CE=unique(CE),
pc_rt=i.ms1_rt,
pc_int=i.ms1_int,
scan=unique(scan)),on=BASE_KEY,by=.EACHI,nomatch=NULL]
rt_win2 <- presconf$ret_time_shift_tol
qa_ms2 <- ms2[qa_ms2,.(pc_rt=pc_rt,
pc_int=pc_int,
ms2_int=max(intensity),
ms2_rt=unique(rt),
qa_ms2_near=head(rt,1) < pc_rt + rt_win2 & head(rt,1) > pc_rt - rt_win2),
by=.EACHI,on=c(BASE_KEY_MS2,"scan")]
qa_ms2$qa_ms2_good_int <-F
qa_ms2[qa_ms2_near==T,
qa_ms2_good_int := ms2_int > presconf$ms2_int_thresh & ms2_int < pc_int,
by=c(BASE_KEY_MS2,"scan")]
## qa_ms2$qa_pass <- F
## qa_ms2[qa_ms2_good_int==T,qa_pass:=T]
qa_ms2$ms2_sel <- F
qa_ms2[qa_ms2_good_int==T,ms2_sel:={
ind<-which.min(abs(ms2_rt-pc_rt))
z<-ms2_sel
z[[ind]]<-T
z
},by=BASE_KEY_MS2]
setkeyv(qa_ms2,BASE_KEY_MS2)
m$qa$ms2 <- qa_ms2
m
}
## Analyze extracted data.
analyse_extracted_data_old <- function(extr,prescreen_param) {
ms1 <- extr$ms1
ms2 <- extr$ms2
## Parameters.
presconf <- conf_trans_pres(prescreen_param)
rt_shift <- presconf$ret_time_shift_tol
det_ms2_noise <- presconf$det_ms2_noise
ms2_int_thresh <- presconf$ms2_int_thresh
ms1_int_thresh <- presconf$ms1_int_thresh
## Detect MS2 noise.
ms2_clc_ns <- if (det_ms2_noise) {
if (ms2_int_thresh>0) {
warning("Ignore user specified ms2_int_thresh value and autodetect noise instead.")
}
ms2[,ms2_thr:=0.33333333*mean(intensity),by="tag"]
} else {
ms2[,ms2_thr:=ms2_int_thresh]
}
## message('ms2_clc_ns:',key(ms2_clc_ns))
## ms2_clc_ns <- ms2_clc_ns[intensity>ms2_thr]
## We drop mz info.
tab_ms2 <- ms2_clc_ns[,.(ms2_rt=first(rt),ms2_int=max(intensity),ms2_thr=first(ms2_thr)),by=c(BASE_KEY_MS2,'an')]
tab_ms2[,qa_ms2_good_int:=ms2_int>ms2_thr,by="scan"]
data.table::setkeyv(tab_ms2,BASE_KEY_MS2)
tab_ms2[,`:=`(rt_left = ms2_rt - rt_shift,rt_right = ms2_rt + rt_shift)]
## Get mean ms1 value.
tab_ms1 <- extr$ms1
tab_ms1_mean <- tab_ms1[,.(ms1_mean=mean(intensity)),keyby=BASE_KEY]
## This function extracts intensity maxima on intervals given by
## RT vectors rt_1 and rt_2.
find_ms1_max <- function(rt,intensity,rt_1,rt_2)
{
mapply(function (rt_1,rt_2) {
rt_ival <- c(rt_1,rt_2)
intv <- findInterval(rt,rt_ival)
lintv = length(intv)
if (intv[1]==0L && intv[lintv] == 2L) {
pos = match(c(1L,2L),intv)
} else if (intv[1]==1L && intv[lintv]!=1L) {
pos = c(1L,match(2L,intv))
} else if (intv[1]==0L && intv[lintv]!=0L) {
pos = c(match(1L,intv),lintv)
} else {
pos = c(1L,lintv)
}
pmax = pos[[1]] + which.max(intensity[pos[[1]]:pos[[2]]]) - 1L
c(rt[pmax],intensity[pmax])
}, rt_1, rt_2, USE.NAMES=F)
}
## Perform MS1 maxima calculation in the neighbourhood of each
## MS2 result.
tmp = tab_ms1[tab_ms2,{
xx = find_ms1_max(rt,intensity,i.rt_left,i.rt_right)
.(scan=i.scan,
ms1_rt = xx[1,],
ms1_int = xx[2,])
},by=.EACHI, nomatch=NULL]
## Calculate QA values.
tab_ms2[tmp,c("ms1_rt","ms1_int"):=.(i.ms1_rt,i.ms1_int),on=c(BASE_KEY,'an')]
tab_ms2[,c("rt_left","rt_right"):=c(NULL,NULL)]
tab_ms2[tab_ms1_mean,ms1_mean:=i.ms1_mean]
tab_ms2[,`:=`(qa_ms1_good_int=fifelse(ms1_int>ms1_int_thresh,T,F),
qa_ms1_above_noise=F,
qa_ms2_near=F)]
## TODO: I wonder if so stupidly auto-calculated ms1 noise should
## be taken into account at all? My recollection from earlier
## times was that it was helpful, at least sometimes.
tab_ms2[qa_ms1_good_int==T,qa_ms1_above_noise:=fifelse(ms1_int>ms1_mean/3.,T,F)]
tab_ms2[qa_ms1_good_int==T & qa_ms1_above_noise==T & qa_ms2_good_int==T,qa_ms2_near:=T]
tab_ms2$qa_ms2_exists=T
## Find MS1 with no corresponding MS2.
ms1key <- tab_ms1[,unique(.SD),.SDcol=BASE_KEY]
ms2key <- tab_ms2[,unique(.SD),.SDcol=BASE_KEY]
ms2key$mch=T
tabmatches <- ms2key[ms1key]
ms1woms2 <- tabmatches[is.na(mch)][,mch:=NULL]
## calculate the most intense peak, its location and the mean for
## childless MS1.
tab_noms2 <- tab_ms1[ms1woms2,.(ms1_mean=mean(intensity),ms1_rt=rt[which.max(intensity)],ms1_int=max(intensity)),by=.EACHI,nomatch=NULL]
## QA for the above (lazy qa ... take only the max peak into account).
tab_noms2[,c("qa_ms1_good_int","qa_ms1_above_noise"):=.(ms1_int>ms1_int_thresh,ms1_int>ms1_mean/3.)]
## MS2 QA criteria all fail.
tab_noms2[,c("qa_ms2_exists","qa_ms2_good_int","qa_ms2_near"):=.(F,F,F)]
## Bind MS1-only and MS1/MS2 entries together.
res <- rbind(tab_ms2,tab_noms2,fill=T,use.names=T)
## Every single entry which was extracted has at least MS1.
res[,qa_ms1_exists:=T]
data.table::setkeyv(res,BASE_KEY)
qflg <- QA_FLAGS[!(QA_FLAGS %in% "qa_pass")]
res[,qa_pass:=apply(.SD,1,all),.SDcols=qflg]
res[.(T),del_rt:=abs(ms2_rt - ms1_rt),on="qa_pass",by='an']
resby <- BASE_KEY_MS2[! (BASE_KEY_MS2 %in% 'an')]
res[.(T),qa_tmp_ms1_max:= ms1_int==max(ms1_int),on="qa_pass",by=resby]
res[,ms2_sel:=F]
res[.(T,T),ms2_sel:= del_rt == del_rt[which.min(del_rt)],on=c("qa_pass","qa_tmp_ms1_max"),by=resby]
res[,qlt_ms1:=apply(.SD,1,function(rw) sum(c(5L,3L,2L)*rw)),.SDcol=c("qa_ms1_exists",
"qa_ms1_above_noise",
"qa_ms1_good_int")]
res[,qlt_ms2:=apply(.SD,1,function(rw) sum(c(5L,3L,2L)*rw)),.SDcol=c("qa_ms2_exists",
"qa_ms2_near",
"qa_ms2_good_int")]
res
}
gen_mz_err_f <- function(entry,msg) {
eppm <- grab_unit(entry,"ppm")
eda <- grab_unit(entry,"Da")
......
......@@ -28,7 +28,7 @@ make_dummy_mf_project <- function() {
kkk = STATE_DATA$out$tab$summ[,.(ii=.GRP),by=key(STATE_DATA$out$tab$summ)][,ii:=NULL][1:4]
m$out$tab$summ = STATE_DATA$out$tab$summ[kkk,on=key(STATE_DATA$out$tab$summ),nomatch=NULL]
m$extr = STATE_DATA$extr
m$db$extr = STATE_DATA$db$extr
m
}
......
......@@ -68,7 +68,7 @@ ok_return_val("metfrag_run",{
path = m$run$metfrag$path,
subpaths = m$run$metfrag$subpaths,
db_file = m$run$metfrag$db_file,
stag_tab = stagtab, ms2 = m$extr$ms2,
stag_tab = stagtab, ms2 = m$db$extr$cgm$ms2,
runtime=m$run$metfrag$runtime,
java_bin=m$run$metfrag$java_bin,
nproc = 2)
......
......@@ -12,7 +12,7 @@ test_that("Test empty project creation.",{
test_that("pack_ms2_w_summ",{
summ = STATE_DATA$out$tab$summ
ms2 = STATE_DATA$extr$ms2
ms2 = STATE_DATA$db$extr$cgm$ms2
res = pack_ms2_w_summ(summ,ms2)
expect_snapshot(res[1])
expect_snapshot(res[2])
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment