From 351f363283586bf7df2491c22e4f730aa16df7e7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Todor=20Kondi=C4=87?= <kontrapunkt@uclmail.net>
Date: Mon, 16 Jan 2023 14:51:59 +0100
Subject: [PATCH] metfrag: summarise_metfrag_results: Update. errors:
 check_key_absent: New error function.

---
 DESCRIPTION                   |  2 +-
 R/errors.R                    |  7 ++++
 R/metfrag.R                   | 61 +++++++++++++++++++++++++++++++----
 R/resources.R                 | 19 +++++++----
 tests/testthat/test-metfrag.R | 53 +++---------------------------
 5 files changed, 79 insertions(+), 63 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index faf7087..2059e21 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: shinyscreen
 Title: Pre-screening of Mass Spectrometry Data 
-Version: 1.2.9004
+Version: 1.2.9005
 Author: Todor Kondić
 Maintainer: Todor Kondić <todor.kondic@uni.lu>
 Authors@R: 
diff --git a/R/errors.R b/R/errors.R
index 53f048e..2859b70 100644
--- a/R/errors.R
+++ b/R/errors.R
@@ -37,3 +37,10 @@ check_extension <- function(extfileval,what) {
     if (extfileval[[1]]==extfileval[[2]]) stop(errorCondition(paste0("We could not find the extension for ",what, ". The returned value was: ", extfileval[[2]]),
                                                               class = paste0(what,'-no-ext-found')))
 }
+
+
+check_key_absent <- function(key,l,what) {
+    nms = names(l)
+    if (! (key %in% nms)) stop(errorCondition(paste0("Key",key, " has not been found for ", what),
+                                                              class = paste0(what,'-absent')))
+}
diff --git a/R/metfrag.R b/R/metfrag.R
index 8d8ae9b..747ae2c 100644
--- a/R/metfrag.R
+++ b/R/metfrag.R
@@ -41,6 +41,12 @@ metfrag_get_ms2_spec <- function(ms2,stag_entry) {
     x
 }
 
+get_mf_res_ext <- function(fn) {
+    ext = sub(pattern = r"(^.*\.([[:alnum:]]+)$)",r"(\1)", fn)
+    check_extension(c(ext=ext,file=fn),what="mf-res-file")
+    ext
+}
+
 metfrag_run <- function(param,path,subpaths,db_path,stag_tab,ms2,runtime,java_bin,nproc = 1L) {
     keys = intersect(colnames(stag_tab),colnames(ms2))
     message("Generating MetFrag configs.")
@@ -69,8 +75,7 @@ metfrag_run <- function(param,path,subpaths,db_path,stag_tab,ms2,runtime,java_bi
     pth = file.path(path,subpaths[["results"]])
     a_res_f = list.files(path = pth,
                          pattern = param$SampleName)[[1]]
-    ext = sub(pattern = r"(^.*\.([[:alnum:]]+)$)",r"(\1)", a_res_f)
-    check_extension(c(ext=ext,file=a_res_f),what="mf-res-file")
+    ext = get_mf_res_ext(a_res_f)
     file_tab[,f_res:=paste0(param$SampleName,"_",stag,".",(ext))]
 
     
@@ -93,10 +98,6 @@ get_metfrag_targets <- function(stag_tab,ms2) {
     
 }
 
-## get_metfrag_run_data(stag_tab,ms2,n=1) {
-## }
-
-
 metfrag_on_state <- function(mconf,mrun,summ) {
     
     
@@ -212,5 +213,51 @@ metfrag_run_many <- function(fn_jar,file_tab, mem = NA_character_, java_bin = "j
 }
 
 
-summarise_metfrag_results <- function(m) {
+summarise_metfrag_results <- function(param,path,subpaths,file_tab) {
+
+    ## which(max(as.numeric(mf_res$Score))==as.numeric(mf_res$Score))
+    index_maxScore = 1L
+    
+    ## First detect which reader we need.
+    ext = get_mf_res_ext(file_tab[1,f_res])
+    readf = METFRAG_RESULT_READF[[ext]]
+    keyz = as.character(union(key(file_tab),"stag"))
+    .read_results <- function() {
+        file_tab[,{
+            fn = file.path(..path,subpaths$results,f_res)
+            dt = data.table::rbindlist(lapply(fn,function (ff) as.data.table(readf(ff))))
+            dt
+        },
+        by = keyz]
+    }
+
+    .adapt_col_types <- function(x) {
+        x[,c("Score",
+             "NoExplPeaks",
+             "NumberPeaksUsed",
+             "FragmenterScore",
+             "OfflineIndividualMoNAScore"):=lapply(.SD, as.numeric),.SDcol=c("Score",
+                                                                             "NoExplPeaks",
+                                                                             "NumberPeaksUsed",
+                                                                             "FragmenterScore",
+                                                                             "OfflineIndividualMoNAScore")]
+    }
+
+    .calc_basic_scores <- function(x) {
+        x[,.(num_poss_IDs=length(Score),
+             max_Score=max(Score),
+             n_Score_GE4=length(which(Score>=4)),
+             n_Score_GE3=length(which(Score>=3)),
+             n_Score_GE2=length(which(Score>=2))),
+          keyby="stag"]
+
+    }
+
+    thetab = .read_results()
+    thetab = .adapt_col_types(thetab)
+    monatab = thetab[,.(max_MoNAIndiv=max(OfflineIndividualMoNAScore))]
+    basictab = .calc_basic_scores(thetab)
+
+        
 }
+
diff --git a/R/resources.R b/R/resources.R
index 290ca67..b00f15b 100644
--- a/R/resources.R
+++ b/R/resources.R
@@ -343,16 +343,21 @@ METFRAG_PREPFLT_CHOICES = c("UnconnectedCompoundFilter","IsotopeFilter")
 METFRAG_PREPFLT_DEFAULT = c("UnconnectedCompoundFilter","IsotopeFilter")
 METFRAG_POSTPFLT_CHOICES = c("InChIKeyFilter")
 METFRAG_POSTPFLT_DEFAULT = c("InChIKeyFilter")
-METFRAG_DEFAULT_SCORES = "FragmenterScore,OfflineIndividualMoNAScore" # AutomatedPeakFingerprintAnnotationScore,
-                                                                      # AutomatedLossFingerprintAnnotationScore
-                                                                      # supposedly
-                                                                      # do
-                                                                      # not
-                                                                      # work
-                                                                      # well.
+METFRAG_DEFAULT_SCORES = c(FragmenterScore="FragmenterScore",
+                           OfflineIndividualMoNAScore="OfflineIndividualMoNAScore")
+                                        # AutomatedPeakFingerprintAnnotationScore,
+                                        # AutomatedLossFingerprintAnnotationScore
+                                        # supposedly
+                                        # do
+                                        # not
+                                        # work
+                                        # well.
 METFRAG_DEFAULT_WEIGHTS = "1,1"
 METFRAG_DEFAULT_MAX_TREE_DEPTH = 2
 METFRAG_SAMPLE_NAME = "sample"
 METFRAG_DEFAULT_ABSMASSDEV = 1.E-3
 METFRAG_DEFAULT_RELMASSDEV = 5
 METFRAG_DB_SEARCH_RELDEV = 5
+
+METFRAG_RESULT_READF = list(csv = function(file,...) data.table::fread(file=file,...),
+                            xml = function(file,...) readxl::read_excel(path=file,...))
diff --git a/tests/testthat/test-metfrag.R b/tests/testthat/test-metfrag.R
index d05ef02..a2baace 100644
--- a/tests/testthat/test-metfrag.R
+++ b/tests/testthat/test-metfrag.R
@@ -71,54 +71,11 @@ ok_return_val("metfrag_run",{
                                                   f)))
             }
 
+            x = summarise_metfrag_results(param = m$conf$metfrag$param,
+                                          path = m$run$metfrag$path,
+                                          subpaths = m$run$metfrag$subpaths,
+                                          file_tab = ftab)
+
                                  
     })
 })
-
-## ok_return_val("metfrag_on_state",{
-##     ## skip_if_not(file.exists(Sys.getenv("METFRAG_JAR")),"Environment variable METFRAG_JAR does not contain a path to MetFrag jar package.")
-##     ## m = make_dummy_mf_project()
-##     ## res = metfrag_on_state(m)
-##     ## withr::with_dir(m$run$metfrag$path,{
-##     ##     fc_i = readChar(res$targets[1,files],nchars=file.size(res$targets[1,files]))
-##     ##     fc_i = gsub(paste0("LocalDatabasePath = ",m$run$metfrag$db_path),"",fc_i)
-##     ##     expect_snapshot(fc_i)
-    
-##     ##     fs_i = readChar(res$targets[3,files],nchars=file.size(res$targets[3,files]))
-##     ##     fs_i = gsub(paste0("LocalDatabasePath = ",m$run$metfrag$db_path),"",fs_i)
-##     ##     expect_snapshot(fs_i)
-    
-##     ##     fc_l = readChar(res$targets[.N-2,files],nchars=file.size(res$targets[.N-2,files]))
-##     ##     fc_l = gsub(paste0("LocalDatabasePath = ",m$run$metfrag$db_path),"",fc_l)
-##     ##     expect_snapshot(fc_l)
-
-##     ##     fs_l = readChar(res$targets[.N,files],nchars=file.size(res$targets[.N,files]))
-##     ##     fs_l = gsub(paste0("LocalDatabasePath = ",m$run$metfrag$db_path),"",fs_l)
-##     ##     expect_snapshot(fs_l)
-##     ## })
-## })
-
-
-
-
-## test_that("MetFrag example works.",{
-##     skip_if_not(file.exists(Sys.getenv("METFRAG_JAR")),"Environment variable METFRAG_JAR does not contain a path to MetFrag jar package.")
-##     skip_if_offline()
-##     withr::with_tempdir({
-##         runtime = path.expand(Sys.getenv("METFRAG_JAR"))
-##         fn_conf = system.file("testdata/example_parameter_file.txt",package = "shinyscreen")
-##         fn_peaks = system.file("testdata/example_data.txt",package = "shinyscreen")
-##         fn_log = "metfrag.log"
-##         file.copy(fn_conf,basename(fn_conf))
-##         file.copy(fn_peaks,basename(fn_peaks))
-
-##         metfrag_run(fn_jar = runtime,
-##                     fn_conf = basename(fn_conf),
-##                     fn_log = fn_log)
-
-##         content = readChar(fn_log,nchars=file.size(fn_log))
-##         expect_true(grepl(r"(0 candidate\(s\) discarded during processing due to errors)",content))
-##     })
-## })
-
-
-- 
GitLab