diff --git a/R/api.R b/R/api.R index e1706cb743d87a16641aeb3dedd5c28c0470201d..d14964c7d4c7f3c42f6543747cbbd6e7931b09d7 100644 --- a/R/api.R +++ b/R/api.R @@ -127,10 +127,12 @@ load_compound_input <- function(m) { # nonexist <- setdiff(fnfields,fields) coll[[l]] <- dt #if (length(nonexist)==0) dt else dt[,(nonexist) := NULL] coll[[l]]$ORIG <- fn - } cmpds <- if (length(fns)>0) rbindlist(l=c(list(EMPTY_CMPD_LIST), coll), use.names = T, fill = T) else EMPTY_CMPD_LIST + ## Process sets. + cmpds = process_cmpd_sets(cmpds) + dups <- duplicated(cmpds$ID) dups <- dups | duplicated(cmpds$ID,fromLast = T) dupIDs <- cmpds$ID[dups] diff --git a/R/base.R b/R/base.R index 320d6b67e24aaf666374319b74f24f8ae7a87752..d64b4efa704835fbdcef04f8e622a081aa863c3d 100644 --- a/R/base.R +++ b/R/base.R @@ -143,3 +143,14 @@ uniqy_slugs <- function(slugs) { dt = data.table::data.table(slug=slugs) dt[,slug:=fifelse(rep(.N==1L,.N),slug,paste0(slug,"_",seq(1L,.N))),by="slug"]$slug } + +process_cmpd_sets <- function(cmpdlist) { + ## Process sets. + if (! ("set" %in% colnames(cmpdlist))) cmpdlist$set=NA_character_ else cmpdlist[,set:=as.character(set)] + ## Extract set names and fill out the empty ones. + slugs = cmpdlist[,.(slug=fifelse(is.na(set),gen_fname_slug(ORIG),set)),by=ORIG] + slugs[,slug:=uniqy_slugs(slug)] + + cmpdlist[slugs,set:=i.slug,on="ORIG"] + cmpdlist +} diff --git a/tests/testthat/_snaps/base.md b/tests/testthat/_snaps/base.md new file mode 100644 index 0000000000000000000000000000000000000000..3383291e685e5f54b5dd046143a9902abc811cba --- /dev/null +++ b/tests/testthat/_snaps/base.md @@ -0,0 +1,24 @@ +# process_cmpd_sets + + Code + x + Output + ID ORIG set + 1: 1 f1.csv f1_2 + 2: 2 f1.csv f1_2 + 3: 3 f2.csv aks + 4: 4 f3.csv se32 + 5: 5 f3.csv se32 + +--- + + Code + x + Output + ID ORIG set + 1: 1 b/f2.csv f2 + 2: 2 a/f1.csv f1 + 3: 3 q/f/g/f2.csv aks + 4: 4 d/e/f/f3.csv se31 + 5: 5 m/n/q/f3.csv se32 + diff --git a/tests/testthat/test-base.R b/tests/testthat/test-base.R index ca5eb162da59f4f8a880246f767bd10527ca5594..60792b9d9c7972bcda0024ef3c771d9bbf649975 100644 --- a/tests/testthat/test-base.R +++ b/tests/testthat/test-base.R @@ -16,3 +16,17 @@ test_that("uniqy_slugs",{ message(paste(out,coll=',')) expect_true(1==1) }) + +test_that("process_cmpd_sets",{ + + ## Test case when no base filename is the same. + cmpdl = data.table(ID=1:5,ORIG=c("f1.csv","f1.csv","f2.csv","f3.csv","f3.csv"),set=c(NA_character_,NA_character_,"aks","se31","se32")) + x = process_cmpd_sets(cmpdl) + expect_snapshot(x) + + ## Test case with similar base filenames. + cmpdl = data.table(ID=1:5,ORIG=c("b/f2.csv","a/f1.csv","q/f/g/f2.csv","d/e/f/f3.csv","m/n/q/f3.csv"),set=c(NA_character_,NA_character_,"aks","se31","se32")) + x = process_cmpd_sets(cmpdl) + expect_snapshot(x) + +})