Newer
Older
## Copyright (C) 2020,2021,2023 by University of Luxembourg
## Licensed under the Apache License, Version 2.0 (the "License");
## you may not use this file except in compliance with the License.
## You may obtain a copy of the License at
## http://www.apache.org/licenses/LICENSE-2.0
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
init_state <- function(m) {
m$out$tab = list()
m$input$datafiles = NULL
m$input$tab$mzml = EMPTY_MZML
lab = gen_uniq_lab(list(),pref="L")
m$input$tab$lists = list()
m$input$tab[[lab[[1]]]] = EMPTY_CMPD_LIST
m$out$tab$comp = EMPTY_COMP_TAB
m
}
#' @title Create Initial State
#' @details Creates an initial, bare state.
#' @return A state object.
#' @export
#' @author Todor Kondić
new_state <- function() {
m <- new_conf()
init_state(m)
}
lst_fn_cmpl <- lapply(lst_cmpl,function (lst) {
bfn_cmpl <- lst
fn <- file.path(run$paths$project,bfn_cmpl)
if (!file.exists(fn)) stop("File ", fn, " does not exist in ", run$paths$project," .")
fn
})
run$paths$compounds$lists <- lst_fn_cmpl
run$paths$data = norm_path(file.path(envopts$top_data_dir,conf$paths$data))
check_dir_absent(run$paths$data,"data-dir", strict=T)
run$paths$datatab <- file.path(run$paths$project,FN_DATA_TAB)
#' @title Create Runtime Configuration
#' @details This is a part of the configuration that can only be
#' diagnosed at runtime.
#' @param project `character(1)`, project title (and the directory name under `projects`)
#' @param conf `conf`, configuration object, optional
#' @return `run` substate
#' @author Todor Kondić
#' @export
new_runtime_state <- function(project,envopts,conf=NULL) {
## A little helper for metfrag.
.metfrag <- function(project_path) {
## Defensive default.
metfrag = list(cando_local=F,
cando_remote=F,
cando_metfrag=F,
path="",
subpaths=list(results="",
config="",
spec="",
log=""),
## If jar defined.
metfrag$runtime = envopts$metfrag$jar
metfrag$java_bin = envopts$metfrag$java_bin
if (nchar(metfrag$runtime)>0L) {
metfrag$cando_metfrag = T
metfrag$cando_remote = T
}
## If `conf` not null, fully setup metfrag.
if (!is.null(conf)) {
## If MetFrag possible, set up the file structure.
if (metfrag$cando_metfrag) {
mfdir = file.path(project_path,"metfrag")
dir.create(mfdir, showWarnings = F)
subpaths = list(results = "results",
config = "config",
spec = "spec",
log = "log")
for (x in subpaths) dir.create(file.path(mfdir,x),showWarnings=F)
metfrag$subpaths = subpaths
## Check for local DBs.
root = envopts$metfrag$db_dir
bname = conf$metfrag$db_file
if (nchar(bname)>0L) {
fpath = file.path(root,bname)
check_file_absent(fpath,what="metfrag-db-file")
metfrag$cando_local = T
# Check if names exist in the database (if local).
dbnms = colnames(fread(fpath,nrows=1L))
check_key_absent(c(names(conf$metfrag$database_scores),
conf$metfrag$cand_parameters,
conf$metfrag$collect_candidates),
dbnms,what="local-metfrag-database")
## Create score and weight entries.
param = conf$metfrag$param
scores = c(conf$metfrag$intrinsic_scores,conf$metfrag$database_scores)
param$MetFragScoreTypes = paste0(names(scores),collapse = ",")
param$MetFragScoreWeights = paste0(scores,collapse = ",")
## Fully expanded params end up in run object.
metfrag$param = param
}
metfrag
}
## Check if all OK with project name.
check_notastring(project,"project",strict=T)
project_path = file.path(envopts$projects,basename(project))
check_dir_absent(project_path,"project_path", strict=T)
project = basename(project)
run = list()
run$project = project
run$paths$project = project_path
run$metfrag = .metfrag(project_path)
if (!is.null(conf)) runtime_from_conf(run=run,envopts=envopts,conf=conf) else run
#' @title Create a Bare Project
#' @details This function creates a `shinyscreen` project with no
#' configuration. This is not enough to run a pipeline, but will
#' start things for incremental configuration using the GUI.
#' @param project `character(1)`, path to a directory containing all the projects
#' @param envopts `envopts`, an `envopts` object.
#' @return New project `state`
#' @rdname new_project
#' @seealso [shinyscreen::new_state()]
#' @author Todor Kondić
#' @export
new_empty_project <- function(project,envopts) {
m$run <- new_runtime_state(project,envopts)
##' @title Create a New Fully Defined Project
##' @param project `character(1)`, path to a directory containing all
##' the projects
##' @param envopts `envopts`, an `envopts` object.
##' @param datatab `datatab`, a `data.table` describing data files,
##' optional
##' @param conf `conf`, a configuration object, optional
##' @details After the call to `new_project`, the `shinyscreen`
##' pipeline should be ready to start.
##' @rdname new_project
##' [shinyscreen::new_empty_project()]
##' @author Todor Kondić
new_project <- function(project,envopts,datatab=NULL,conf=NULL) {
m = new_state()
m$run = new_runtime_state(project,envopts=envopts)
fn_conf = file.path(m$run$paths$project,FN_CONF)
m$conf = if (is.null(conf)) {
check_file_absent(fn_conf,what="conf-file")
yaml::yaml.load_file(fn_conf)
} else conf
## m$conf$compounds$lists = label_cmpd_lists(m$conf$compounds$lists)
m$run = new_runtime_state(project,envopts=envopts,conf=m$conf)
m$input$tab$mzml = datatab
import_project <- function(project,envopts=envopts) {
m <- new_project(project,envopts=envopts)
fn_state <- file.path(m$run$paths$project,FN_STATE)
if (!file.exists(fn_state)) stop(paste0("Cannot import project. State file ",fn_state," does not exist, or is unreadable."))
lm <- readRDS(file=fn_state)
lm$run <- m$run
lm$conf <- m$conf
lm
}
##' @export
new_rv_state <- function() react_v(m=list2rev(new_state()))
## write_conf <- function(m,fn) {
## m$conf$paths$data <- get_fn_ftab(m)
## if (NROW(m$input$tab$mzml)>0) tab2file(tab=m$input$tab$mzml,file=file.path(m$run$paths$project,FN_DATA_TAB))
## yaml::write_yaml(x=m$conf,file=fn)
## }
## write_state <- function(m,fn_conf) {
## write_conf(m,fn_conf)
## tab2file(tab=m$input$tab$mzml,file=file.path(m$run$paths$project,FN_DATA_TAB))
## }
label_cmpd_lists <- function (lists) {
if (length(lists)>0) {
}
read_conf <- function(fn) {
cf <- yaml::yaml.load_file(fn)
cf$compounds$lists <- label_cmpd_lists(cf$compounds$lists)
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
cf
}
##' @export
get_fn_comp <- function(m) {
file.path(m$run$paths$project,FN_COMP_TAB)
}
##' @export
get_fn_summ <- function(m) {
file.path(m$run$paths$project, FN_SUMM)
}
##' @export
get_fn_extr <- function(m) {
file.path(m$run$paths$project, "extracted.rds")
}
##' @export
get_fn_conf <- function(m) {
file.path(m$run$paths$project, FN_CONF)
}
##' @export
get_fn_ftab <- function(m) {
file.path(m$run$paths$project, FN_DATA_TAB)
}
base_conf <- function () {
## Base state.
m <- list()
m$conf <- list(project=NA_character_,
compounds=list(lists=list(),
sets=""),
debug = F)
class(m$conf) = c("conf","list")
class(m) = "state"
## Extraction defaults.
m$conf$tolerance <- list("ms1 coarse"=MS1_ERR_COARSE,
"ms1 fine"=MS1_ERR_FINE,
"eic"=EIC_ERR,
"rt"=RT_EXTR_ERR)
m$conf$extract <- list(missing_precursor_info=DEF_CONF_MISSING_PCS)
m
}
presc_conf <- function(m) {
## Prescreening defaults.
m$conf$prescreen <- list("ms1_int_thresh"=MS1_INT_THOLD,
"ms2_int_thresh"=MS2_INT_THOLD,
"s2n"=MS1_SN_FAC,
"ret_time_shift_tol"=RT_SHIFT_ERR)
m
}
fig_conf <- function(m) {
## Plotting defaults.
m$conf$figures$rt_min <- "NA_real_ min"
m$conf$figures$rt_max <- "NA_real_ min"
m$conf$figures$ext <- "pdf"
m
}
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
metfrag_param <- function(MetFragDatabaseType,
FragmentPeakMatchAbsoluteMassDeviation,
FragmentPeakMatchRelativeMassDeviation,
DatabaseSearchRelativeMassDeviation,
MetFragCandidateWriter,
SampleName,
MaximumTreeDepth,
MetFragPreProcessingCandidateFilter,
MetFragPostProcessingCandidateFilter) {
list(MetFragDatabaseType=MetFragDatabaseType,
FragmentPeakMatchAbsoluteMassDeviation=FragmentPeakMatchAbsoluteMassDeviation,
FragmentPeakMatchRelativeMassDeviation=FragmentPeakMatchRelativeMassDeviation,
DatabaseSearchRelativeMassDeviation=DatabaseSearchRelativeMassDeviation,
MetFragCandidateWriter=MetFragCandidateWriter,
SampleName=SampleName,
MaximumTreeDepth=MaximumTreeDepth,
MetFragPreProcessingCandidateFilter=MetFragPreProcessingCandidateFilter,
MetFragPostProcessingCandidateFilter=MetFragPostProcessingCandidateFilter)
}
metfrag4conf <- function(db_file,
param,
intrinsic_scores,
database_scores,
cand_parameters,
collect_candidates,
nproc) {
list(db_file=db_file,
param = param,
intrinsic_scores = intrinsic_scores,
database_scores = database_scores,
cand_parameters = cand_parameters,
collect_candidates = collect_candidates,
nproc = nproc)
}
metfrag_conf <- function(m) {
## MetFrag configuration defaults.
param = metfrag_param(MetFragDatabaseType="",
FragmentPeakMatchAbsoluteMassDeviation=METFRAG_DEFAULT_ABSMASSDEV,
FragmentPeakMatchRelativeMassDeviation=METFRAG_DEFAULT_RELMASSDEV,
DatabaseSearchRelativeMassDeviation=METFRAG_DB_SEARCH_RELDEV,
MetFragCandidateWriter=METFRAG_DEFAULT_WRITER,
SampleName=METFRAG_SAMPLE_NAME,
MaximumTreeDepth=METFRAG_DEFAULT_MAX_TREE_DEPTH,
MetFragPreProcessingCandidateFilter=paste(METFRAG_PREPFLT_CHOICES,collapse=","),
MetFragPostProcessingCandidateFilter=paste(METFRAG_POSTPFLT_CHOICES,collapse=","))
m$conf$metfrag = metfrag4conf(db_file = "",
param = param,
intrinsic_scores = METFRAG_DEFAULT_SCORES,
database_scores = list(),
cand_parameters = character(0),
collect_candidates = character(0),
nproc = 1L)
m
}
new_conf <- function() {
o = metfrag_conf(
fig_conf(
presc_conf(
extr_conf(
base_conf()))))
o
}
encode_ms2_to_line <- function(ms2) {
## ms2 is a data.frame whose first column is mz and the second intensity.
paste0(paste(ms2[[1]],ms2[[2]],sep=':'),collapse=';')
}
pack_ms2_w_summ <- function(summ,ms2) {
## Takes summ, finds entries with high quality spectra and subsets ms2 based on that.
## Take the columns we need from summ.
x = summ[ms2_sel==T,.SD,.SDcols=c(key(summ),"mz","SMILES","Formula","Name")]
mrg_keys = c(intersect(key(ms2),key(summ)),"an")
ms2[x,.(mz=i.mz,ms2_spectrum=encode_ms2_to_line(.SD[,c("mz","intensity")])),on=mrg_keys,by=.EACHI]
}
pack_project <- function(m,fn_arch) {
ppath = m$run$paths$project
project = m$run$project
tmpdir = tempfile()
write.csv(data.frame(),file=file.path(ppath,"file1.csv"))
write.csv(data.frame(),file=file.path(ppath,"file2.csv"))
dir.create(file.path(ppath,"subdir"))
write.csv(data.frame(),file=file.path(ppath,"subdir","filesubdir.csv"))
dir.create(tmpdir,recursive=T)
file.copy(from=ppath,to=tmpdir,recursive=T)
withr::with_dir(tmpdir,{
tar(tarfile=fn_arch,compression="gzip")
})
fn_arch
}
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
join_compound_lists <- function(fname) {
coll <- list()
fields <- colnames(EMPTY_CMPD_LIST)
coltypes <- c(ID="character",
SMILES="character",
Formula="character",
Name="character",
RT="numeric",
mz="numeric")
l=0L
for (fn in fname) {
l = l + 1L
## Figure out column headers.
nms <- colnames(file2tab(fn,nrows=0))
## Read the table. Knowing column headers prevents unnecessary
## warnings.
dt <- file2tab(fn, colClasses=coltypes[nms])
verify_cmpd_l(dt=dt,fn=fn)
# nonexist <- setdiff(fnfields,fields)
coll[[l]] <- dt #if (length(nonexist)==0) dt else dt[,(nonexist) := NULL]
coll[[l]]$ORIG <- fn
}
if (length(fname)>0) rbindlist(l=c(list(EMPTY_CMPD_LIST), coll), use.names = T, fill = T) else EMPTY_CMPD_LIST
}
process_cmpd_sets <- function(cmpdlist) {
if (nrow(cmpdlist)==0L) return(EMPTY_CMPD_LIST)
## Process sets.
if (! ("set" %in% colnames(cmpdlist))) cmpdlist$set=NA_character_ else cmpdlist[,set:=as.character(set)]
slugs = cmpdlist[,.(slug=gen_fname_slug(ORIG)),by="ORIG"]
slugs[,slug:=uniqy_slugs(slug)]
cmpdlist[slugs,set:=fifelse(is.na(set),i.slug,set),on="ORIG"]
cmpdlist
}