Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Mira Narayanan
Master_Thesis_Project
Commits
f4c48624
Commit
f4c48624
authored
Jun 22, 2020
by
Jessy Krier
Browse files
updated
parent
5eafb320
Changes
4
Expand all
Hide whitespace changes
Inline
Side-by-side
.DS_Store
View file @
f4c48624
No preview for this file type
TP_Suspect_List/Match_MoNAVeryGood.xlsx
0 → 100644
View file @
f4c48624
File added
TP_Suspect_List/extractAnnotations.R
0 → 100644
View file @
f4c48624
This diff is collapsed.
Click to expand it.
TP_Suspect_List/runme_SuspectList.R
0 → 100644
View file @
f4c48624
## Script to Screen TPs
## Emma Schymanski & Jessy Krier, 2 May 2020
###### Directory and Package Setup #####
##If you don't have the latest RChemMass, reinstall:
library
(
devtools
)
install_github
(
"schymane/RChemMass"
,
dependencies
=
F
)
##If you have the latest RChemMass, do:
library
(
RChemMass
)
##Set wd:
base_dir
<-
"/Users/jessykrier/Dokumente/Uni.lu/Master in Systems Biology/4.Semester/MasterThesis/Master_Thesis/Compound_List_TransfProducts/"
setwd
(
base_dir
)
##get the latest newest functions:
extr_anno_url
<-
"https://git-r3lab.uni.lu/eci/pubchem/-/raw/master/annotations/tps/extractAnnotations.R?inline=false"
download.file
(
extr_anno_url
,
paste0
(
base_dir
,
"extractAnnotations.R"
))
source
(
"extractAnnotations.R"
)
## get HSDB file:
hsdb_file
<-
paste0
(
base_dir
,
"Metabolism_Metabolites_HSDB_merged.csv"
)
##### Start TP searching using Suspects file #####
cmpd_info
<-
read.csv
(
"TPSuspects_CompTox_wPCIDs.csv"
,
stringsAsFactors
=
F
)
# #get PubChem CID and the parent CID based on InChIKey
# # we will use the parent CID from now on ...
# for (i in 1:length(cmpd_info$INPUT)) {
# PCID <- getPcId(cmpd_info$INCHIKEY[i])
# cmpd_info$PCID[i] <- PCID
# cmpd_info$Parent_CID[i] <- getPCIDs.CIDtype(PCID,type="parent")
# }
# write.csv(cmpd_info, "Jessys_Suspects_CompTox_wPCIDs.csv", row.names = F)
##### For the first run #####
#i <- 73
for
(
i
in
1
:
nrow
(
cmpd_info
))
{
query_CID
<-
as.numeric
(
cmpd_info
$
Parent_CID
[
i
])
# this gets you a download file and unique CIDs of the transformations section
trans_out
<-
getPcCand.trans
(
query_CID
)
trans_cid_file
<-
trans_out
$
TP_File_Name
trans_tp_cids
<-
trans_out
$
TP_Unique_CIDs
# this gets you a list of unique CIDs from HSDB, plus a download file
hsdb_out
<-
getPcAnno.TPcids
(
query_CID
,
hsdb_file
)
hsdb_cid_file
<-
hsdb_out
$
TP_File_Name
hsdb_tp_cids
<-
hsdb_out
$
TP_Unique_CIDs
# },
}
#### For the Shiny files ####
MoNA_pest_list
<-
read.csv
(
"Match_MoNA_VeryGood.csv"
,
stringsAsFactors
=
F
)
#k <- 3
for
(
k
in
1
:
nrow
(
MoNA_pest_list
))
{
query_CID
<-
as.numeric
(
MoNA_pest_list
$
CID
[
k
])
# this gets you a download file and unique CIDs of the transformations section
trans_out
<-
getPcCand.trans
(
query_CID
)
trans_cid_file
<-
trans_out
$
TP_File_Name
trans_tp_cids
<-
trans_out
$
TP_Unique_CIDs
# this gets you a list of unique CIDs from HSDB, plus a download file
hsdb_out
<-
getPcAnno.TPcids
(
query_CID
,
hsdb_file
)
hsdb_cid_file
<-
hsdb_out
$
TP_File_Name
hsdb_tp_cids
<-
hsdb_out
$
TP_Unique_CIDs
file_name_1
<-
paste
(
"CID_"
,
MoNA_pest_list
$
CID
[
k
],
"_norman_s60.csv"
,
sep
=
""
)
file_name_2
<-
paste
(
"Metabolism_Metabolites_HSDB_merged_"
,
MoNA_pest_list
$
CID
[
k
],
"_selected.csv"
,
sep
=
""
)
print
(
paste
(
file.exists
(
file_name_1
),
file_name_1
))
print
(
paste
(
file.exists
(
file_name_2
),
file_name_2
))
if
(
file.exists
(
file_name_1
)
|
file.exists
(
file_name_2
)){
if
(
!
file.exists
(
file_name_2
)){
merged_tp_cids
<-
unique
(
c
(
query_CID
,
trans_tp_cids
))
}
else
{
#you might want to create a new file name, if so, enter here:
hsdb_selected_file
<-
sub
(
".csv"
,
"_selected.csv"
,
hsdb_cid_file
)
hsdb_info
<-
read.csv
(
hsdb_selected_file
,
stringsAsFactors
=
F
)
##select only those CIDs to use, collapse and uniquify
hsdb_tp_cids
<-
hsdb_info
$
selected_tp_cids
[
which
(
hsdb_info
$
use
==
TRUE
)]
hsdb_tp_cids
<-
paste
(
hsdb_tp_cids
,
collapse
=
"|"
)
hsdb_tp_cids
<-
suppressWarnings
(
as.numeric
(
unique
(
strsplit
(
hsdb_tp_cids
,
"|"
,
fixed
=
T
)[[
1
]])))
# this gets you all CIDs from both sources, including parent
merged_tp_cids
<-
unique
(
c
(
query_CID
,
trans_tp_cids
,
hsdb_tp_cids
))
}
##### Create files for Shinyscreen #####
## note this first file can run a while
MSInfo_file
<-
getMSInfo.cids
(
merged_tp_cids
,
paste0
(
query_CID
,
"_MSInfo.csv"
))
#getting all mass info
getMSInfo.files
(
MSInfo_file
,
set_id
=
query_CID
)
}
}
##### Create files for Zenodo/PubChem #####
## we also now need to create files to save the new HSDB entries in the transformations format
hsdb_desc
<-
paste0
(
"HSDB is a toxicology database that focuses on the "
,
"toxicology of potentially hazardous chemicals. See "
,
"https://pubchem.ncbi.nlm.nih.gov/source/11933"
)
dataset_DOI
<-
"10.5281/zenodo.3827487"
dataset_desc
<-
"Metabolites extracted from HSDB and validated by ECI/PubChem"
# for parent compounds (atrazine)
trans_file
<-
createFiles.trans
(
hsdb_tp_cids
,
hsdb_selected_file
,
MSInfo_file
,
hsdb_desc
,
dataset_DOI
,
dataset_desc
,
file_name
=
""
)
# for metabolites (deethylatrazine)
trans_file
<-
createFiles.trans
(
hsdb_tp_cids
,
hsdb_selected_file
,
MSInfo_file
,
hsdb_desc
,
dataset_DOI
,
dataset_desc
,
file_name
=
""
,
startCIDisPredecessor
=
F
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment