Skip to content
Snippets Groups Projects
Commit 17cbb8f0 authored by Todor Kondić's avatar Todor Kondić
Browse files

analyse_extracted_data: Adapted to db inputs. Also fixed sec/mins conversions...

analyse_extracted_data: Adapted to db inputs. Also fixed sec/mins conversions in a couple of places.
parent 05140fe1
No related branches found
No related tags found
No related merge requests found
......@@ -386,6 +386,7 @@ prescreen <- function(m) {
m$qa = NULL
m$out$tab$summ = NULL
m$qa = analyse_extracted_data(m$db,m$conf$prescreen)
browser()
m$out$tab$summ = gen_summ(m$out$tab$comp,m$qa)
message("(prescreen): End.")
m
......
......@@ -75,7 +75,8 @@ extr_cgrams_ms1 <- function(ms,tab,fdata) {
new_restab(tnort,MSnbase::chromatogram(ms,mz = mzrng))
} else data.table()
rbind(resnort,resrt,fill=T)
res = rbind(resnort,resrt,fill=T)
res[,rt:=rt/60]
}
......@@ -214,16 +215,18 @@ analyse_extracted_data <- function(db,prescreen_param) {
tab_ms2[,`:=`(rt_left = ms2_rt - rt_shift,rt_right = ms2_rt + rt_shift)]
## Get mean ms1 value.
tab_ms1 <- copy(ms1)
tab_ms1_mean <- tab_ms1[,.(ms1_mean=mean(intensity,na.rm=T)),keyby="precid"]
tab_ms1 = copy(ms1)
## To (artificially) differentiate beween ms1 and ms2 (because,
## they get stapled together later on, set scan to NA_character_.
tab_ms1[,scan:=NA_character_]
tab_ms1_mean = tab_ms1[,.(ms1_mean=mean(intensity,na.rm=T)),keyby="precid"]
browser()
## Perform MS1 maxima calculation in the neighbourhood of each
## MS2 result.
tmp = tab_ms1[tab_ms2,
{
xx = find_ms1_max(rt,intensity,i.rt_left,i.rt_right)
......@@ -234,13 +237,14 @@ analyse_extracted_data <- function(db,prescreen_param) {
by=.EACHI,
nomatch=NULL]
## Calculate QA values.
tab_ms2[tmp,c("ms1_rt","ms1_int"):=.(i.ms1_rt,i.ms1_int),on=c(BASE_KEY,'an')]
tab_ms2[tmp,on=.(precid,scan),c("ms1_rt","ms1_int"):=.(i.ms1_rt,i.ms1_int)]
tab_ms2[,c("rt_left","rt_right"):=c(NULL,NULL)]
tab_ms2[tab_ms1_mean,ms1_mean:=i.ms1_mean]
tab_ms2[,`:=`(qa_ms1_good_int=fifelse(ms1_int>ms1_int_thresh,T,F),
qa_ms1_above_noise=F,
qa_ms2_near=F)]
## TODO: I wonder if so stupidly auto-calculated ms1 noise should
## be taken into account at all? My recollection from earlier
## times was that it was helpful, at least sometimes.
......@@ -248,18 +252,19 @@ analyse_extracted_data <- function(db,prescreen_param) {
tab_ms2[qa_ms1_good_int==T & qa_ms1_above_noise==T & qa_ms2_good_int==T,qa_ms2_near:=T]
tab_ms2$qa_ms2_exists=T
## Reduce tab_ms1 to only to MS1 with no children.
precs_ms1 = tab_ms1[,unique(precid)]
precs_ms2 = tab_ms2[,unique(precid)]
precs_noms = data.table(precid=precs_ms1[!(precs_ms1 %in% precs_ms2)])
tab_noms2 = tab_ms1[precs_noms,
on=.(precid),
.SD,nomatch=NULL]
## Find MS1 with no corresponding MS2.
ms1key <- tab_ms1[,unique(.SD),.SDcol=BASE_KEY]
ms2key <- tab_ms2[,unique(.SD),.SDcol=BASE_KEY]
ms2key$mch=T
tabmatches <- ms2key[ms1key]
ms1woms2 <- tabmatches[is.na(mch)][,mch:=NULL]
## calculate the most intense peak, its location and the mean for
## childless MS1.
tab_noms2 <- tab_ms1[ms1woms2,.(ms1_mean=mean(intensity),ms1_rt=rt[which.max(intensity)],ms1_int=max(intensity)),by=.EACHI,nomatch=NULL]
tab_noms2 = tab_noms2[,.(ms1_mean=mean(intensity,na.rm=T),
ms1_rt=rt[which.max(intensity)],
ms1_int=max(intensity, na.rm=T)),
keyby="precid"]
## QA for the above (lazy qa ... take only the max peak into account).
tab_noms2[,c("qa_ms1_good_int","qa_ms1_above_noise"):=.(ms1_int>ms1_int_thresh,ms1_int>ms1_mean/3.)]
......@@ -268,15 +273,16 @@ analyse_extracted_data <- function(db,prescreen_param) {
## Bind MS1-only and MS1/MS2 entries together.
res <- rbind(tab_ms2,tab_noms2,fill=T,use.names=T)
## Every single entry which was extracted has at least MS1.
## TODO: FIXME: Every single entry which was extracted has at
## least MS1? Not true, we should treat all-NA results as
## qa_ms1_exists == F. We curretly don't do it.
res[,qa_ms1_exists:=T]
data.table::setkeyv(res,BASE_KEY)
data.table::setkey(res,precid)
qflg <- QA_FLAGS[!(QA_FLAGS %in% "qa_pass")]
res[,qa_pass:=apply(.SD,1,all),.SDcols=qflg]
res[.(T),del_rt:=abs(ms2_rt - ms1_rt),on="qa_pass",by='an']
resby <- BASE_KEY_MS2[! (BASE_KEY_MS2 %in% 'an')]
res[.(T),del_rt:=abs(ms2_rt - ms1_rt),on="qa_pass",by='scan']
resby <- BASE_KEY_MS2[! (BASE_KEY_MS2 %in% 'scan')]
res[.(T),qa_tmp_ms1_max:= ms1_int==max(ms1_int),on="qa_pass",by=resby]
res[,ms2_sel:=F]
res[.(T,T),ms2_sel:= del_rt == del_rt[which.min(del_rt)],on=c("qa_pass","qa_tmp_ms1_max"),by=resby]
......
......@@ -205,8 +205,8 @@ REPORT_TITLE = "Plots of EICs and MS2 Spectra"
## Select the most fundamental group of entries. Within this group,
## each ID is unique.
BASE_KEY = c("adduct","tag","ID")
BASE_KEY_MS2 = c(BASE_KEY,"CE","an")
BASE_KEY = "precid"#c("adduct","tag","ID")
BASE_KEY_MS2 = c("precid","ce","scan")#c(BASE_KEY,"CE","an")
FIG_DEF_CONF =list(grouping=list(group="adduct",
plot="ID",
......@@ -217,7 +217,7 @@ FIG_DEF_CONF =list(grouping=list(group="adduct",
SUMM_COLS=c("set",BASE_KEY_MS2,"mz","ms1_rt", "ms1_int", "ms2_rt", "ms2_int",
"ms1_mean","ms2_sel",QA_FLAGS,"Name", "SMILES", "Formula", "known","Comments","file")
SUMM_KEY = c("set","ID","adduct","tag","an")
SUMM_KEY = c("set","ID","adduct","tag","scan")
PLOT_FEATURES = c("adduct",
"tag",
......@@ -225,34 +225,34 @@ PLOT_FEATURES = c("adduct",
## Empty summary table.
EMPTY_SUMM = data.table::data.table(set=character(0),
adduct=character(0),
tag=character(0),
ID=character(0),
CE=character(0),
an=integer(0),
mz=numeric(0),
ms1_rt=numeric(0),
ms1_int=numeric(0),
ms2_rt=numeric(0),
ms2_int=numeric(0),
ms1_mean=numeric(0),
ms2_sel=logical(0),
qa_pass=logical(0),
qa_ms1_exists=logical(0),
qa_ms2_exists=logical(0),
qa_ms1_good_int=logical(0),
qa_ms1_above_noise=logical(0),
qa_ms2_near=logical(0),
qa_ms2_good_int=logical(0),
Name=character(0),
SMILES=character(0),
Formula=character(0),
known=character(0),
Comments=character(0),
file=character(0))
adduct=character(0),
tag=character(0),
ID=character(0),
CE=character(0),
scan=character(0),
mz=numeric(0),
ms1_rt=numeric(0),
ms1_int=numeric(0),
ms2_rt=numeric(0),
ms2_int=numeric(0),
ms1_mean=numeric(0),
ms2_sel=logical(0),
qa_pass=logical(0),
qa_ms1_exists=logical(0),
qa_ms2_exists=logical(0),
qa_ms1_good_int=logical(0),
qa_ms1_above_noise=logical(0),
qa_ms2_near=logical(0),
qa_ms2_good_int=logical(0),
Name=character(0),
SMILES=character(0),
Formula=character(0),
known=character(0),
Comments=character(0),
file=character(0))
## Default sorting keys of spectra in the summary table
DEF_KEY_SUMM = c(BASE_KEY_MS2,"an")
DEF_KEY_SUMM = c(BASE_KEY_MS2,"scan")
SUBSET_VALS = c(IGNORE="ignore",
......
......@@ -13,5 +13,15 @@ test_that("Test find_ms1_max behaviour.",{
expect_equal(x[2,1],0.3)
expect_equal(x[2,2],0.4)
x = find_ms1_max(rt,intensity,1.,1.7)
expect_true(is.na(x[[2]]))
## expect_true(is.na(x[[2]]))
expect_equal(length(x),0L)
## dbg = readRDS("~/scratch/dbg.rds")
## rt = dbg$rt
## intensity= dbg$intensity
## rt_1 = dbg$rt_1
## rt_2 = dbg$rt_2
## x = find_ms1_max(rt,intensity,rt_1,rt_2)
## expect_true(1==1)
})
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment