diff --git a/R/errors.R b/R/errors.R index 2859b708ec350770b476c54c324e6f5751c184e5..d6175be288e8afcbae71304653ba8eff6560eebb 100644 --- a/R/errors.R +++ b/R/errors.R @@ -39,8 +39,11 @@ check_extension <- function(extfileval,what) { } -check_key_absent <- function(key,l,what) { +check_key_absent <- function(keys,l,what) { nms = names(l) - if (! (key %in% nms)) stop(errorCondition(paste0("Key",key, " has not been found for ", what), - class = paste0(what,'-absent'))) + keys_in = keys %in% l + keys_absent = keys[!keys_in] + hv = if (length(keys_absent)>1L) "have" else "has" + if (length(keys_absent)>0L) stop(errorCondition(paste0("Keys [",paste0(keys_absent,collapse=', '), "] ",hv," not been found for ", what), + class = paste0(what,'-absent'))) } diff --git a/R/metfrag.R b/R/metfrag.R index f1f7862e8b93d0515232498cf3af1d20c3d9759c..949fd0190e7e74e0c7d4c0f34efc6589cba928fb 100644 --- a/R/metfrag.R +++ b/R/metfrag.R @@ -213,7 +213,7 @@ metfrag_run_many <- function(fn_jar,file_tab, mem = NA_character_, java_bin = "j } -summarise_metfrag_results <- function(param,path,subpaths,cand_parameters,scores,collect_candidates,file_tab) { +summarise_metfrag_results <- function(param,path,subpaths,cand_parameters,db_scores,int_scores,collect_candidates,file_tab) { ## which(max(as.numeric(mf_res$Score))==as.numeric(mf_res$Score)) index_maxScore = 1L @@ -228,11 +228,13 @@ summarise_metfrag_results <- function(param,path,subpaths,cand_parameters,scores dt = data.table::rbindlist(lapply(fn,function (ff) as.data.table(readf(ff)))) dt }, - by = keyz] + keyby = keyz] + + } .adapt_col_types <- function(x) { - x[,(names(scores)):=lapply(.SD, as.numeric),.SDcol=names(scores)] + x[,(names(db_scores)):=lapply(.SD, as.numeric),.SDcol=names(db_scores)] } .calc_basic_scores <- function(x) { @@ -255,33 +257,38 @@ summarise_metfrag_results <- function(param,path,subpaths,cand_parameters,scores .make_max_cols <- function(x) { - res = x[,lapply(.SD,function(s) max(s,na.rm=T)),.SDcol=names(scores),keyby=keyz] - data.table::setnames(res,old = names(res),new = paste0("max_",names(res))) + res = x[,{ + cols = lapply(.SD,function(s) max(s,na.rm=T)) + names(cols) = lapply(names(.SD),function(nn) paste0("Max_",nn)) + cols}, .SDcol=c(names(int_scores),names(db_scores)),keyby=keyz] res } .collect_candidates <- function(x) { - x[,lapply(.SD, function(col) paste(col,collapse=";")), - .SDcol=collect_candidates, - keyby=keyz] + res = x[,{cols = lapply(.SD, function(col) paste(col,collapse=";")) + names(cols) = lapply(names(.SD),function(nn) paste0("All_",nn)) + cols}, + .SDcol=collect_candidates, + keyby=keyz] + res } - + thetab = .read_results() .adapt_col_types(thetab) - - btab = .calc_basic_scores(thetab) - + btab = .calc_basic_scores(thetab) cctab = .collect_candidates(thetab) candtab = .get_candidate_param(thetab) mxtab = .make_max_cols(thetab) - res = file_tab + + res = file_tab[,`:=`(f_conf=NULL,f_log=NULL,f_res=NULL,f_spec=NULL)] + data.table::setkeyv(res,c(key(res),"stag")) res = res[btab] - res = res[cctab] res = res[candtab] res = res[mxtab] + res = res[cctab] res diff --git a/R/state.R b/R/state.R index fa9f2d854cd126781de5378b8f9a0a76513c7c7d..ace74b402b5a11a72866fac62a8b164292e53095 100644 --- a/R/state.R +++ b/R/state.R @@ -121,13 +121,6 @@ new_runtime_state <- function(project,envopts,conf=NULL) { for (x in subpaths) dir.create(file.path(mfdir,x),showWarnings=F) metfrag$subpaths = subpaths - ## Create score and weight entries. - param = conf$metfrag$param - param$MetFragScoreTypes = paste0(names(conf$metfrag$scores),collapse = ",") - param$MetFragScoreWeights = paste0(conf$metfrag$scores,collapse = ",") - - ## Fully expanded params end up in run object. - metfrag$param = param @@ -142,7 +135,27 @@ new_runtime_state <- function(project,envopts,conf=NULL) { check_file_absent(fpath,what="metfrag-db-file") metfrag$cando_local = T metfrag$db_path = fpath + + # Check if names exist in the database (if local). + dbnms = colnames(fread(fpath,nrows=1L)) + check_key_absent(c(names(conf$metfrag$database_scores), + conf$metfrag$cand_parameters, + conf$metfrag$collect_candidates), + dbnms,what="local-metfrag-database") + + + } + + ## Create score and weight entries. + param = conf$metfrag$param + + scores = c(conf$metfrag$intrinsic_scores,conf$metfrag$database_scores) + param$MetFragScoreTypes = paste0(names(scores),collapse = ",") + param$MetFragScoreWeights = paste0(scores,collapse = ",") + ## Fully expanded params end up in run object. + metfrag$param = param + } @@ -350,7 +363,8 @@ metfrag_conf <- function(m) { metfrag$param = param - metfrag$scores = METFRAG_DEFAULT_SCORES + metfrag$intrinsic_scores = METFRAG_DEFAULT_SCORES + metfrag$database_scores = list() m$conf$metfrag = metfrag m diff --git a/tests/testthat/_snaps/metfrag.md b/tests/testthat/_snaps/metfrag.md index 6fc1013d39d73eb55f0955f533c868f206a64e2b..b6d392e057bd0dd39ff85af8335d474887171b8d 100644 --- a/tests/testthat/_snaps/metfrag.md +++ b/tests/testthat/_snaps/metfrag.md @@ -42,3 +42,27 @@ 1: testsample_setNTs_ID100_adduct[M+H]+_tagAA_an5413.csv 2: testsample_setNTs_ID100_adduct[M+H]+_tagKO_an5434.csv +--- + + Code + x + Output + ID adduct tag an CE stag num_poss_IDs + 1: 100 [M+H]+ AA 5413 10 setNTs_ID100_adduct[M+H]+_tagAA_an5413 86 + 2: 100 [M+H]+ KO 5434 10 setNTs_ID100_adduct[M+H]+_tagKO_an5434 86 + max_Score n_Score_GE4 n_Score_GE3 n_Score_GE2 Identifier + 1: 4.938569 1 1 1 6057 + 2: 4.753741 1 1 1 6057 + CompoundName Max_FragmenterScore + 1: (2S)-2-amino-3-(4-hydroxyphenyl)propanoic acid 485.2443 + 2: (2S)-2-amino-3-(4-hydroxyphenyl)propanoic acid 679.6480 + Max_OfflineIndividualMoNAScore Max_PubMed_Count Max_Patent_Count + 1: 0.99441 107881 76367 + 2: 0.83380 107881 76367 + Max_AnnoTypeCount + 1: 9 + 2: 9 + All_Identifier + 1: 6057;6950578;73562;91482;562171;440311;3135546;25418841;17607993;22397448;14451762;72214162;47003404;9442556;447184;602411;45167;20826205;4123006;168705;438;2734814;89843;6422115;12919999;13052275;10678961;7436;201474;223277;73705;220089;72878;113330;7186;175593;18605013;824655;23548;6861469;40148016;2798463;117636;71327507;90407;15480193;89978;67455263;49866537;19756142;81446487;89905;65711231;231242;6871292;10726079;12315638;4105139;3080594;7275229;59323545;18625738;62234616;44600733;21885916;232526;71756776;55266573;139835;122651682;251544;24820110;16218074;59176;21191822;528966;602319;13384341;836717;519003;153707036;602288;81657;597077;197180;13672729 + 2: 6057;6950578;73562;91482;562171;440311;438;14451762;72214162;22397448;45167;602411;3135546;47003404;17607993;9442556;25418841;7436;10678961;6422115;7186;201474;168705;89843;4123006;447184;13052275;20826205;2734814;220089;12919999;223277;73705;113330;72878;175593;824655;23548;6861469;67455263;18605013;40148016;19756142;89905;81446487;59176;2798463;90407;13384341;15480193;6871292;49866537;71327507;231242;12315638;65711231;117636;71756776;4105139;89978;3080594;59323545;18625738;13672729;251544;55266573;24820110;10726079;232526;16218074;21885916;44600733;7275229;62234616;122651682;139835;528966;836717;602319;153707036;602288;519003;21191822;597077;197180;81657 + diff --git a/tests/testthat/helper-state.R b/tests/testthat/helper-state.R index 0e69544cf8350cbc87f8dcf0a20f5b1ac912adf2..d9f526f25a57511c98c4183a8887f119ed0d0960 100644 --- a/tests/testthat/helper-state.R +++ b/tests/testthat/helper-state.R @@ -7,7 +7,7 @@ make_dummy_mf_project <- function() { withr::with_options(list(fn_jar=Sys.getenv("METFRAG_JAR"), fn_db_dir=Sys.getenv("METFRAG_DB_DIR"), fn_db = Sys.getenv("METFRAG_DB"), - p_dir = tempfile(pattern="shinyscreen_dummy_mf_proj")), + p_dir = tempfile(pattern="shinyscreen_dummy_mf_proj")), { fn_jar = norm_path(getOption("fn_jar")) if (!file.exists(fn_jar)) fn_jar = "" @@ -24,11 +24,11 @@ make_dummy_mf_project <- function() { x$conf$metfrag$param$SampleName = "testsample" x$conf$metfrag$param$MetFragDatabaseType = "LocalCSV" x$conf$metfrag$db_file = basename(getOption("fn_db")) - x$conf$metfrag$scores = list(FragmenterScore=1.0, - OfflineIndividualMoNAScore=1.0, - PubMedCount=1.0, - PatentCount=1.0, - AnnoTypeCount=1.0) + x$conf$metfrag$intrinsic_scores = list(FragmenterScore=1.0, + OfflineIndividualMoNAScore=1.0) + x$conf$metfrag$database_scores = list(PubMed_Count=1.0, + Patent_Count=1.0, + AnnoTypeCount=1.0) x$conf$metfrag$cand_parameters = c("Identifier","CompoundName") x$conf$metfrag$collect_candidates = c("Identifier") diff --git a/tests/testthat/test-metfrag.R b/tests/testthat/test-metfrag.R index 7f3990b2616c8fd3aa39ff91cbcf3f816469d624..40e277bf2290a51f7e8d37a7b3b1ae4beb041572 100644 --- a/tests/testthat/test-metfrag.R +++ b/tests/testthat/test-metfrag.R @@ -1,13 +1,14 @@ test_that("Do adducts affect MetFrag config generation correctly?",{ - + skip_if_not(file.exists(Sys.getenv("METFRAG_JAR")),"Environment variable METFRAG_JAR does not contain a path to MetFrag jar package.") withr::with_tempdir({ + opts = list(fn_jar=Sys.getenv("METFRAG_JAR"), + fn_db_dir=Sys.getenv("METFRAG_DB_DIR"), + fn_db = Sys.getenv("METFRAG_DB")) + o = new_conf() - dir.create("db_dir") - saveRDS("","db_dir/dbfile.csv") - saveRDS("","mf.jar") - o$conf$metfrag$db_file = "dbfile.csv" - eo = envopts(metfrag_db_dir="db_dir", - metfrag_jar="mf.jar") + eo = envopts(metfrag_db_dir=opts$fn_db_dir, + metfrag_jar=opts$fn_jar) + o$conf$metfrag$db_file = basename(opts$fn_db) yaml::write_yaml(o$conf,file='conf-state.yaml') pproj = getwd() m = new_project(pproj,envopts = eo) @@ -75,9 +76,13 @@ ok_return_val("metfrag_run",{ path = m$run$metfrag$path, subpaths = m$run$metfrag$subpaths, cand_parameters = m$conf$metfrag$cand_parameters, - scores = m$conf$metfrag$scores, + db_scores = m$conf$metfrag$database_scores, + int_scores = m$conf$metfrag$intrinsic_scores, collect_candidates= m$conf$metfrag$collect_candidates, file_tab = ftab) + + expect_snapshot(x) + })