Commit f4c48624 authored by Jessy Krier's avatar Jessy Krier
Browse files

updated

parent 5eafb320
No preview for this file type
This diff is collapsed.
## Script to Screen TPs
## Emma Schymanski & Jessy Krier, 2 May 2020
###### Directory and Package Setup #####
##If you don't have the latest RChemMass, reinstall:
library(devtools)
install_github("schymane/RChemMass", dependencies = F)
##If you have the latest RChemMass, do:
library(RChemMass)
##Set wd:
base_dir <- "/Users/jessykrier/Dokumente/Uni.lu/Master in Systems Biology/4.Semester/MasterThesis/Master_Thesis/Compound_List_TransfProducts/"
setwd(base_dir)
##get the latest newest functions:
extr_anno_url <- "https://git-r3lab.uni.lu/eci/pubchem/-/raw/master/annotations/tps/extractAnnotations.R?inline=false"
download.file(extr_anno_url,paste0(base_dir,"extractAnnotations.R"))
source("extractAnnotations.R")
## get HSDB file:
hsdb_file <- paste0(base_dir,"Metabolism_Metabolites_HSDB_merged.csv")
##### Start TP searching using Suspects file #####
cmpd_info <- read.csv("TPSuspects_CompTox_wPCIDs.csv", stringsAsFactors = F)
# #get PubChem CID and the parent CID based on InChIKey
# # we will use the parent CID from now on ...
# for (i in 1:length(cmpd_info$INPUT)) {
# PCID <- getPcId(cmpd_info$INCHIKEY[i])
# cmpd_info$PCID[i] <- PCID
# cmpd_info$Parent_CID[i] <- getPCIDs.CIDtype(PCID,type="parent")
# }
# write.csv(cmpd_info, "Jessys_Suspects_CompTox_wPCIDs.csv", row.names = F)
##### For the first run #####
#i <- 73
for (i in 1:nrow(cmpd_info)) {
query_CID <- as.numeric(cmpd_info$Parent_CID[i])
# this gets you a download file and unique CIDs of the transformations section
trans_out <- getPcCand.trans(query_CID)
trans_cid_file <- trans_out$TP_File_Name
trans_tp_cids <- trans_out$TP_Unique_CIDs
# this gets you a list of unique CIDs from HSDB, plus a download file
hsdb_out <- getPcAnno.TPcids(query_CID, hsdb_file)
hsdb_cid_file <- hsdb_out$TP_File_Name
hsdb_tp_cids <- hsdb_out$TP_Unique_CIDs
# },
}
#### For the Shiny files ####
MoNA_pest_list <- read.csv("Match_MoNA_VeryGood.csv", stringsAsFactors = F)
#k <- 3
for (k in 1:nrow(MoNA_pest_list)) {
query_CID <- as.numeric(MoNA_pest_list$CID[k])
# this gets you a download file and unique CIDs of the transformations section
trans_out <- getPcCand.trans(query_CID)
trans_cid_file <- trans_out$TP_File_Name
trans_tp_cids <- trans_out$TP_Unique_CIDs
# this gets you a list of unique CIDs from HSDB, plus a download file
hsdb_out <- getPcAnno.TPcids(query_CID, hsdb_file)
hsdb_cid_file <- hsdb_out$TP_File_Name
hsdb_tp_cids <- hsdb_out$TP_Unique_CIDs
file_name_1 <- paste("CID_",MoNA_pest_list$CID[k], "_norman_s60.csv", sep = "")
file_name_2 <- paste("Metabolism_Metabolites_HSDB_merged_",MoNA_pest_list$CID[k], "_selected.csv", sep = "")
print(paste(file.exists(file_name_1),file_name_1))
print(paste(file.exists(file_name_2),file_name_2))
if(file.exists(file_name_1) | file.exists(file_name_2)){
if(!file.exists(file_name_2)){
merged_tp_cids <- unique(c(query_CID,trans_tp_cids))
}
else{
#you might want to create a new file name, if so, enter here:
hsdb_selected_file <- sub(".csv","_selected.csv",hsdb_cid_file)
hsdb_info <- read.csv(hsdb_selected_file, stringsAsFactors = F)
##select only those CIDs to use, collapse and uniquify
hsdb_tp_cids <- hsdb_info$selected_tp_cids[which(hsdb_info$use==TRUE)]
hsdb_tp_cids <- paste(hsdb_tp_cids,collapse="|")
hsdb_tp_cids <- suppressWarnings(as.numeric(unique(strsplit(hsdb_tp_cids,"|",fixed=T)[[1]])))
# this gets you all CIDs from both sources, including parent
merged_tp_cids <- unique(c(query_CID,trans_tp_cids,hsdb_tp_cids))
}
##### Create files for Shinyscreen #####
## note this first file can run a while
MSInfo_file <- getMSInfo.cids(merged_tp_cids,paste0(query_CID,"_MSInfo.csv")) #getting all mass info
getMSInfo.files(MSInfo_file, set_id = query_CID)
}
}
##### Create files for Zenodo/PubChem #####
## we also now need to create files to save the new HSDB entries in the transformations format
hsdb_desc <- paste0("HSDB is a toxicology database that focuses on the ",
"toxicology of potentially hazardous chemicals. See ",
"https://pubchem.ncbi.nlm.nih.gov/source/11933")
dataset_DOI <- "10.5281/zenodo.3827487"
dataset_desc <- "Metabolites extracted from HSDB and validated by ECI/PubChem"
# for parent compounds (atrazine)
trans_file <- createFiles.trans(hsdb_tp_cids, hsdb_selected_file, MSInfo_file,
hsdb_desc, dataset_DOI, dataset_desc,
file_name = "")
# for metabolites (deethylatrazine)
trans_file <- createFiles.trans(hsdb_tp_cids, hsdb_selected_file, MSInfo_file,
hsdb_desc, dataset_DOI, dataset_desc,
file_name = "",startCIDisPredecessor = F)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment