Unverified Commit 2020e7d5 authored by Todor Kondic's avatar Todor Kondic
Browse files

pcl: add_ccs2csv.R: Update

parent 3da1c424
......@@ -34,26 +34,43 @@ message("Number of MF rows: ", NROW(dt_mf_csv))
data.table::setnames(dt_ccs,"smi","SMILES",skip_absent = T)
## Turn adduct into a number of columns.
dt_tr_ccs <- data.table::dcast(dt_ccs, SMILES ~ adduct, value.var = "ccs")
message("Number of dcast CCS rows: ",NROW(dt_tr_ccs))
res <- dt_ccs[,.(SMILES=smi,adduct,ccs)][dt_mf_csv,on="SMILES",nomatch=NULL]
## Wrangle names.
add_nm <- grep("\\[.+",colnames(dt_tr_ccs),val=T)
message("Adducts: ",add_nm)
idsmi <- dt_mf_csv[,.(Identifier,SMILES)]
res <- res[idsmi,on="Identifier",nomatch=NA]
new_add_nm <- paste("pred_CCS_A2",add_nm,sep="_")
data.table::setnames(dt_tr_ccs,add_nm,new_add_nm)
mf_nms <- colnames(dt_mf_csv)
sel_add_cols <- if (!is.null(SEL_ADDUCTS))
paste0("pred_CCS_A2_",SEL_ADDUCTS) else new_add_nm
uniq_smiles_merged <- db_res[,unique(SMILES)]
uniq_smiles_pcl <- dt_mf_csv[,unique(SMILES)]
all_nms <- c(mf_nms,sel_add_cols)
diff <- setdiff(uniq_smiles_pcl,uniq_smiles_merged)
res <- dt_tr_ccs[dt_mf_csv,on="SMILES",nomatch=NA][,..all_nms]
message("Number of res rows: ",NROW(res))
data.table::fwrite(res,file=fn_mf_res_csv,sep=",",quote = "auto",na = "")
message("Unique SMILES entries in pure PCL: ", uniq_smiles_pcl)
message("Unique nonempty SMILES entries in PCL with CCS: ", uniq_smiles_merged)
message("Number of SMILES in pure PCL with missing CCS: ", diff)
## Obsolete?
## ## Turn adduct into a number of columns.
## dt_tr_ccs <- data.table::dcast(dt_ccs, SMILES ~ adduct, value.var = "ccs")
## message("Number of dcast CCS rows: ",NROW(dt_tr_ccs))
## ## Wrangle names.
## add_nm <- grep("\\[.+",colnames(dt_tr_ccs),val=T)
## message("Adducts: ",add_nm)
## new_add_nm <- paste("pred_CCS_A2",add_nm,sep="_")
## data.table::setnames(dt_tr_ccs,add_nm,new_add_nm)
## mf_nms <- colnames(dt_mf_csv)
## sel_add_cols <- if (!is.null(SEL_ADDUCTS))
## paste0("pred_CCS_A2_",SEL_ADDUCTS) else new_add_nm
## all_nms <- c(mf_nms,sel_add_cols)
## res <- dt_tr_ccs[dt_mf_csv,on="SMILES",nomatch=NA][,..all_nms]
## message("Number of res rows: ",NROW(res))
## data.table::fwrite(res,file=fn_mf_res_csv,sep=",",quote = "auto",na = "")
data.table::fwrite(res,file=fn_mf_res_csv,sep=",",quote = "auto",na = "")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment