Skip to content
Snippets Groups Projects
Commit c0ec5dc9 authored by Todor Kondić's avatar Todor Kondić
Browse files

Fun process_cmpd_sets correctly "slugifies" filenames.

parent 2eb43aed
No related branches found
No related tags found
No related merge requests found
......@@ -98,38 +98,11 @@ run_in_dir <- function(m) {
}
##' @export
load_compound_input <- function(m) {
coll <- list()
fields <- colnames(EMPTY_CMPD_LIST)
fns <- file.path(m$run$paths$project,m$conf$compounds$lists)
message("fns:",paste0(fns,collapse=","))
coltypes <- c(ID="character",
SMILES="character",
Formula="character",
Name="character",
RT="numeric",
mz="numeric")
for (l in 1:length(fns)) {
fn <- fns[[l]]
## Figure out column headers.
nms <- colnames(file2tab(fn,nrows=0))
## Read the table. Knowing column headers prevents unnecessary
## warnings.
dt <- file2tab(fn, colClasses=coltypes[nms])
verify_cmpd_l(dt=dt,fn=fn)
# nonexist <- setdiff(fnfields,fields)
coll[[l]] <- dt #if (length(nonexist)==0) dt else dt[,(nonexist) := NULL]
coll[[l]]$ORIG <- fn
}
cmpds <- if (length(fns)>0) rbindlist(l=c(list(EMPTY_CMPD_LIST), coll), use.names = T, fill = T) else EMPTY_CMPD_LIST
fns <- file.path(m$run$paths$project,m$conf$compounds$lists)
cmpds = join_compound_lists(fns)
## Process sets.
cmpds = process_cmpd_sets(cmpds)
......
......@@ -143,14 +143,3 @@ uniqy_slugs <- function(slugs) {
dt = data.table::data.table(slug=slugs)
dt[,slug:=fifelse(rep(.N==1L,.N),slug,paste0(slug,"_",seq(1L,.N))),by="slug"]$slug
}
process_cmpd_sets <- function(cmpdlist) {
## Process sets.
if (! ("set" %in% colnames(cmpdlist))) cmpdlist$set=NA_character_ else cmpdlist[,set:=as.character(set)]
## Extract set names and fill out the empty ones.
slugs = cmpdlist[,.(slug=fifelse(is.na(set),gen_fname_slug(ORIG),set)),by=ORIG]
slugs[,slug:=uniqy_slugs(slug)]
cmpdlist[slugs,set:=i.slug,on="ORIG"]
cmpdlist
}
......@@ -124,13 +124,14 @@ NUM_INP_HEIGHT="5%"
## Possible compound list fields
EMPTY_CMPD_LIST = dtable(ID=character(),
SMILES=character(),
Name=character(),
Formula=character(),
RT=numeric(),
mz=numeric(),
known=character(),
ORIG=character())
SMILES=character(),
Name=character(),
Formula=character(),
RT=numeric(),
mz=numeric(),
known=character(),
set=character(),
ORIG=character())
COMP_LIST_COLS = c("ID","Name","SMILES","Formula","RT","mz")
## Comprehensive table properties
COMP_NAME_MAP = list(RT="rt")
......
......@@ -453,14 +453,10 @@ app_state2state <- function(input,gui,envopts,m=NULL) {
}
get_sets <- function(gui) {
## TODO FIXME
## Think about this
## fn_lists <- file.path(gui$paths$project,gui$compounds$lists)
## df <- fread(file=fn_lists)
## if (!
## res = df[,unique(set)]
if (length(res)==0L) res = "ALL"
fn_lists <- file.path(gui$paths$project,gui$compounds$lists)
cmpds = join_compound_lists(fn_lists)
cmpds = process_cmpd_sets(cmpds)
cmpds[,unique(set)]
}
......
......@@ -431,3 +431,40 @@ pack_project <- function(m,fn_arch) {
})
fn_arch
}
join_compound_lists <- function(fname) {
coll <- list()
fields <- colnames(EMPTY_CMPD_LIST)
coltypes <- c(ID="character",
SMILES="character",
Formula="character",
Name="character",
RT="numeric",
mz="numeric")
l=0L
for (fn in fname) {
l = l + 1L
## Figure out column headers.
nms <- colnames(file2tab(fn,nrows=0))
## Read the table. Knowing column headers prevents unnecessary
## warnings.
dt <- file2tab(fn, colClasses=coltypes[nms])
verify_cmpd_l(dt=dt,fn=fn)
# nonexist <- setdiff(fnfields,fields)
coll[[l]] <- dt #if (length(nonexist)==0) dt else dt[,(nonexist) := NULL]
coll[[l]]$ORIG <- fn
}
if (length(fname)>0) rbindlist(l=c(list(EMPTY_CMPD_LIST), coll), use.names = T, fill = T) else EMPTY_CMPD_LIST
}
process_cmpd_sets <- function(cmpdlist) {
if (nrow(cmpdlist)==0L) return(EMPTY_CMPD_LIST)
## Process sets.
if (! ("set" %in% colnames(cmpdlist))) cmpdlist$set=NA_character_ else cmpdlist[,set:=as.character(set)]
slugs = cmpdlist[,.(slug=gen_fname_slug(ORIG)),by="ORIG"]
slugs[,slug:=uniqy_slugs(slug)]
cmpdlist[slugs,set:=fifelse(is.na(set),i.slug,set),on="ORIG"]
cmpdlist
}
# process_cmpd_sets
# uniqy_slugs
Code
x
out
Output
ID ORIG set
1: 1 f1.csv f1_2
2: 2 f1.csv f1_2
3: 3 f2.csv aks
4: 4 f3.csv se32
5: 5 f3.csv se32
---
Code
x
Output
ID ORIG set
1: 1 b/f2.csv f2
2: 2 a/f1.csv f1
3: 3 q/f/g/f2.csv aks
4: 4 d/e/f/f3.csv se31
5: 5 m/n/q/f3.csv se32
[1] "f1" "f2_1" "f2_2" "f2_3" "f3_1" "f4_1" "f4_2" "f5" "f3_2" "f6"
......@@ -135,7 +135,7 @@
list()
$input$tab$L0
Empty data.table (0 rows and 8 cols): ID,SMILES,Name,Formula,RT,mz...
Empty data.table (0 rows and 9 cols): ID,SMILES,Name,Formula,RT,mz...
......@@ -219,3 +219,50 @@
[1] "test_project/file1.csv" "test_project/file2.csv"
[3] "test_project/subdir/filesubdir.csv"
# process_cmpd_sets
Code
x
Output
ID ORIG set
1: 1 f1.csv f1
2: 2 f1.csv f1
3: 3 f2.csv aks
4: 4 f3.csv se31
5: 5 f3.csv se32
---
Code
x
Output
ID ORIG set
1: 1 b/f2.csv f2_1
2: 2 a/f1.csv f1
3: 3 q/f/g/f2.csv aks
4: 4 d/e/f/f3.csv se31
5: 5 m/n/q/f3.csv se32
---
Code
x
Output
ID ORIG set
1: 1 a/fn.csv fn_1
2: 2 a/fn.csv fn_1
3: 3 a/fn.csv set
4: 4 a/fn.csv set2
5: 5 a/fn.csv fn_1
6: 6 b/fn.csv fn_2
7: 7 b/fn.csv set2
8: 8 b/fn.csv set2
9: 9 b/fn.csv fn_2
---
Code
x
Output
Empty data.table (0 rows and 9 cols): ID,SMILES,Name,Formula,RT,mz...
......@@ -13,20 +13,5 @@ test_that("gen_fname_slug",{
test_that("uniqy_slugs",{
inp = c('f1','f2','f2','f2','f3','f4','f4','f5','f3','f6')
out = uniqy_slugs(inp)
message(paste(out,coll=','))
expect_true(1==1)
})
test_that("process_cmpd_sets",{
## Test case when no base filename is the same.
cmpdl = data.table(ID=1:5,ORIG=c("f1.csv","f1.csv","f2.csv","f3.csv","f3.csv"),set=c(NA_character_,NA_character_,"aks","se31","se32"))
x = process_cmpd_sets(cmpdl)
expect_snapshot(x)
## Test case with similar base filenames.
cmpdl = data.table(ID=1:5,ORIG=c("b/f2.csv","a/f1.csv","q/f/g/f2.csv","d/e/f/f3.csv","m/n/q/f3.csv"),set=c(NA_character_,NA_character_,"aks","se31","se32"))
x = process_cmpd_sets(cmpdl)
expect_snapshot(x)
expect_snapshot(out)
})
......@@ -33,3 +33,35 @@ test_that("pack_project",{
})
expect_snapshot(fls)
})
test_that("join_compound_lists with empty input returns empty output",{
x = join_compound_lists(character(0))
expect_true(nrow(x)==0L)
})
test_that("process_cmpd_sets",{
## Test case when no base filename is the same.
cmpdl = data.table(ID=1:5,ORIG=c("f1.csv","f1.csv","f2.csv","f3.csv","f3.csv"),set=c(NA_character_,NA_character_,"aks","se31","se32"))
x = process_cmpd_sets(cmpdl)
expect_snapshot(x)
## Test case with similar base filenames.
cmpdl = data.table(ID=1:5,ORIG=c("b/f2.csv","a/f1.csv","q/f/g/f2.csv","d/e/f/f3.csv","m/n/q/f3.csv"),set=c(NA_character_,NA_character_,"aks","se31","se32"))
x = process_cmpd_sets(cmpdl)
expect_snapshot(x)
## Test semi-filled cmpd.
cmpdl1 = data.table(ID=1:5,ORIG=rep("a/fn.csv",5),set=c(NA_character_,NA_character_,"set","set2",NA_character_))
cmpdl2 = data.table(ID=6:9,ORIG=rep("b/fn.csv",4),set=c(NA_character_,"set2","set2",NA_character_))
cmpdl = rbindlist(list(cmpdl1,cmpdl2))
x = process_cmpd_sets(cmpdl)
expect_snapshot(x)
## Empty input.
cmpdl = EMPTY_CMPD_LIST
x = process_cmpd_sets(cmpdl)
expect_snapshot(x)
})
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment