From 2eb43aed505e426fc11e23e890b26fa658fcf5ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Todor=20Kondi=C4=87?= <kontrapunkt@uclmail.net> Date: Sun, 5 Mar 2023 10:01:52 +0100 Subject: [PATCH] Sets are read from compound lists, or created based on the file name. --- R/api.R | 4 +++- R/base.R | 11 +++++++++++ tests/testthat/_snaps/base.md | 24 ++++++++++++++++++++++++ tests/testthat/test-base.R | 14 ++++++++++++++ 4 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 tests/testthat/_snaps/base.md diff --git a/R/api.R b/R/api.R index e1706cb..d14964c 100644 --- a/R/api.R +++ b/R/api.R @@ -127,10 +127,12 @@ load_compound_input <- function(m) { # nonexist <- setdiff(fnfields,fields) coll[[l]] <- dt #if (length(nonexist)==0) dt else dt[,(nonexist) := NULL] coll[[l]]$ORIG <- fn - } cmpds <- if (length(fns)>0) rbindlist(l=c(list(EMPTY_CMPD_LIST), coll), use.names = T, fill = T) else EMPTY_CMPD_LIST + ## Process sets. + cmpds = process_cmpd_sets(cmpds) + dups <- duplicated(cmpds$ID) dups <- dups | duplicated(cmpds$ID,fromLast = T) dupIDs <- cmpds$ID[dups] diff --git a/R/base.R b/R/base.R index 320d6b6..d64b4ef 100644 --- a/R/base.R +++ b/R/base.R @@ -143,3 +143,14 @@ uniqy_slugs <- function(slugs) { dt = data.table::data.table(slug=slugs) dt[,slug:=fifelse(rep(.N==1L,.N),slug,paste0(slug,"_",seq(1L,.N))),by="slug"]$slug } + +process_cmpd_sets <- function(cmpdlist) { + ## Process sets. + if (! ("set" %in% colnames(cmpdlist))) cmpdlist$set=NA_character_ else cmpdlist[,set:=as.character(set)] + ## Extract set names and fill out the empty ones. + slugs = cmpdlist[,.(slug=fifelse(is.na(set),gen_fname_slug(ORIG),set)),by=ORIG] + slugs[,slug:=uniqy_slugs(slug)] + + cmpdlist[slugs,set:=i.slug,on="ORIG"] + cmpdlist +} diff --git a/tests/testthat/_snaps/base.md b/tests/testthat/_snaps/base.md new file mode 100644 index 0000000..3383291 --- /dev/null +++ b/tests/testthat/_snaps/base.md @@ -0,0 +1,24 @@ +# process_cmpd_sets + + Code + x + Output + ID ORIG set + 1: 1 f1.csv f1_2 + 2: 2 f1.csv f1_2 + 3: 3 f2.csv aks + 4: 4 f3.csv se32 + 5: 5 f3.csv se32 + +--- + + Code + x + Output + ID ORIG set + 1: 1 b/f2.csv f2 + 2: 2 a/f1.csv f1 + 3: 3 q/f/g/f2.csv aks + 4: 4 d/e/f/f3.csv se31 + 5: 5 m/n/q/f3.csv se32 + diff --git a/tests/testthat/test-base.R b/tests/testthat/test-base.R index ca5eb16..60792b9 100644 --- a/tests/testthat/test-base.R +++ b/tests/testthat/test-base.R @@ -16,3 +16,17 @@ test_that("uniqy_slugs",{ message(paste(out,coll=',')) expect_true(1==1) }) + +test_that("process_cmpd_sets",{ + + ## Test case when no base filename is the same. + cmpdl = data.table(ID=1:5,ORIG=c("f1.csv","f1.csv","f2.csv","f3.csv","f3.csv"),set=c(NA_character_,NA_character_,"aks","se31","se32")) + x = process_cmpd_sets(cmpdl) + expect_snapshot(x) + + ## Test case with similar base filenames. + cmpdl = data.table(ID=1:5,ORIG=c("b/f2.csv","a/f1.csv","q/f/g/f2.csv","d/e/f/f3.csv","m/n/q/f3.csv"),set=c(NA_character_,NA_character_,"aks","se31","se32")) + x = process_cmpd_sets(cmpdl) + expect_snapshot(x) + +}) -- GitLab