add_ccs2csv.R 1.72 KB
Newer Older
1
## Copyright (C) 2020 by University of Luxembourg
2
3
4
5
6
7
8

## Usage:
##
## Rscript [--vanilla] add_ccs2csv.R <input ccs csv> <input metfrag csv> <output metfrag csv>

require(data.table)
require(assertthat)
Todor Kondic's avatar
Todor Kondic committed
9
require(yaml)
10
USAGE_STRING <- "Rscript [--vanilla] add_ccs2csv.R <pubchemlite input file> <input metfrag csv> <output metfrag csv>"
Todor Kondic's avatar
Todor Kondic committed
11
12

SEL_ADDUCTS <- NULL # Change to adducts needed, if a subset of adducts
Todor Kondic's avatar
Todor Kondic committed
13
14
15
                    # desired. Otherwise, all shall be returned.


16
17

args <- commandArgs(trailingOnly = T)
18
19
20
21
22
fn_config <- args[[1]]
x <- yaml::yaml.load_file(fn_config)
fn_ccs <- x$ccs_src_path
fn_mf_csv <- args[[2]]
fn_mf_res_csv <- args[[3]]
Todor Kondic's avatar
Todor Kondic committed
23

24
25
26
27
28
29
30
31
32
33

message("Input CCS file: ",fn_ccs)
message("Input Metfrag CSV file: ", fn_mf_csv)
message("Output Metfrag CSV file: ", fn_mf_res_csv)

dt_ccs <- data.table::fread(fn_ccs,sep=',')
message("Number of CCS rows: ",NROW(dt_ccs))
dt_mf_csv <- data.table::fread(fn_mf_csv,sep=',')
message("Number of MF rows: ", NROW(dt_mf_csv))

Todor Kondic's avatar
Todor Kondic committed
34
35
36
37
38
39
40
data.table::setnames(dt_ccs,"smi","SMILES",skip_absent = T)

## Turn adduct into a number of columns.
dt_tr_ccs <- data.table::dcast(dt_ccs, SMILES ~ adduct, value.var = "ccs")
message("Number of dcast CCS rows: ",NROW(dt_tr_ccs))

## Wrangle names.
Todor Kondic's avatar
Todor Kondic committed
41
add_nm <- grep("\\[.+",colnames(dt_tr_ccs),val=T)
Emma Schymanski's avatar
Emma Schymanski committed
42
43
message("Adducts: ",add_nm)

Todor Kondic's avatar
Todor Kondic committed
44
45
new_add_nm <- paste("pred_CCS_A2",add_nm,sep="_")
data.table::setnames(dt_tr_ccs,add_nm,new_add_nm)
46
47
mf_nms <- colnames(dt_mf_csv)

Todor Kondic's avatar
Todor Kondic committed
48
49
50
51
52
sel_add_cols <- if (!is.null(SEL_ADDUCTS))
                    paste0("pred_CCS_A2_",SEL_ADDUCTS) else new_add_nm

all_nms <- c(mf_nms,sel_add_cols)

Todor Kondic's avatar
Todor Kondic committed
53
res <- dt_tr_ccs[dt_mf_csv,on="SMILES",nomatch=NA][,..all_nms]
54
55
56
message("Number of res rows: ",NROW(res))


Todor Kondic's avatar
Todor Kondic committed
57
data.table::fwrite(res,file=fn_mf_res_csv,sep=",",quote = "auto",na = "")
58