Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Aishwarya Alex
xBELtoCellD
Commits
84c6f867
Commit
84c6f867
authored
Jan 16, 2018
by
Aishwarya Alex
Browse files
restrucred code, class
parent
cce9e582
Changes
1
Hide whitespace changes
Inline
Side-by-side
namespaceTranslator.py
View file @
84c6f867
...
...
@@ -15,154 +15,180 @@ import MySQLdb, reflectclient, os,urllib
from
reflectclient.config
import
EntityType
import
rdflib
from
SPARQLWrapper
import
SPARQLWrapper
,
JSON
,
XML
,
N3
,
RDF
class
namespaceTranslator
:
#LOAD NAMESPACE DICTIONARY
def
queryMeSHSPARQL
():
#sparql = rdflib.URIRef("http://id.nlm.nih.gov/mesh/sparql")
sparql
=
SPARQLWrapper
(
"http://id.nlm.nih.gov/mesh/sparql"
)
sparql
.
setQuery
(
"""
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX meshv: <http://id.nlm.nih.gov/mesh/vocab#>
PREFIX mesh: <http://id.nlm.nih.gov/mesh/>
PREFIX mesh2015: <http://id.nlm.nih.gov/mesh/2015/>
PREFIX mesh2016: <http://id.nlm.nih.gov/mesh/2016/>
PREFIX mesh2017: <http://id.nlm.nih.gov/mesh/2017/>
SELECT ?d
FROM <http://id.nlm.nih.gov/mesh>
WHERE {
?d a meshv:Descriptor .
?d meshv:concept ?c .
?c meshv:term ?term .
?term rdfs:label ?label .
FILTER (REGEX(?label,"^insomnia$","i"))
}
"""
)
sparql
.
setReturnFormat
(
JSON
)
result
=
sparql
.
query
().
convert
()
print
result
def
getDict
(
filename
):
dict
=
{}
for
line
in
open
(
filename
,
'r'
):
nspace
=
line
.
split
()
dict
[
nspace
[
0
]]
=
nspace
[
1
]
return
dict
#LOAD reaction DICTIONARY
def
getReactionDict
():
reactionDict
=
{}
for
line
in
open
(
"inputs/reactionDictFile.txt"
,
'r'
):
nspace
=
line
.
split
()
reactionDict
[
nspace
[
0
]]
=
nspace
[
1
]
return
reactionDict
def
termToIdentifier
(
belnamespace
,
term
):
if
"GO"
in
belnamespace
:
term
=
getGoIDfromTerm
(
term
)
elif
"CHEM"
in
belnamespace
:
term
=
getChemicalIDfromTerm
(
term
)
# else:
# idAndElement=getIDandNamespacefromAbundance("UNKNOWN",term)
# term=idAndElement[0]
return
term
#Get GOID from reflect
def
getGoIDfromTerm
(
term
):
goID
=
term
#print term
reflect_client
=
reflectclient
.
ReflectClient
()
GOresult
=
reflect_client
.
get_entities
(
term
,
[
EntityType
.
biological_process
,
EntityType
.
cellular_component
,
EntityType
.
molecular_function
])
if
len
(
GOresult
)
>
0
:
goID
=
GOresult
[
0
][
'entities'
][
0
][
'identifier'
]
return
goID
def
getIDandNamespacefromAbundance
(
term
,
elementName
):
termId
=
term
#elementName="UNKNOWN"
idAndelement
=
[]
#check GO
reflect_client
=
reflectclient
.
ReflectClient
()
# check for GO cellular component ->Complex
GOresult
=
reflect_client
.
get_entities
(
term
,
[
EntityType
.
cellular_component
,
EntityType
.
biological_process
,
EntityType
.
molecular_function
])
if
len
(
GOresult
)
>
0
:
termId
=
GOresult
[
0
][
'entities'
][
0
][
'identifier'
]
entityType
=
GOresult
[
0
][
'entities'
][
0
][
'type'
]
if
entityType
==
"-21"
or
entityType
==
"-23"
:
elementName
=
"PHENOTYPE"
elif
entityType
==
"-22"
:
elementName
=
"COMPLEX"
#not found in GO, so check in mesh
if
termId
is
term
:
print
termId
+
" : here"
meshID
=
getMeshIDfromTerm
(
term
)
if
meshID
is
not
term
:
termId
=
meshID
elementName
=
"PHENOTYPE"
idAndelement
=
[
termId
,
elementName
]
#print idAndelement
return
idAndelement
#Get pubchemID from reflect
def
getChemicalIDfromTerm
(
term
):
chemID
=
term
reflect_client
=
reflectclient
.
ReflectClient
()
chemResult
=
reflect_client
.
get_entities
(
term
,
[
EntityType
.
chemicals
])
if
len
(
chemResult
)
>
0
:
chemID
=
chemResult
[
0
][
'entities'
][
0
][
'identifier'
]
return
chemID
#Get meshID from reflect
def
getMeshIDfromTerm
(
term
):
mesh_id
=
term
lowerTerm
=
term
.
lower
()
mDict
=
getMeSHDict
()
mesh_id
=
mDict
[
lowerTerm
]
if
mDict
.
has_key
(
lowerTerm
)
is
True
else
term
return
mesh_id
def
getMeSHDict
():
dict
=
{}
filename
=
"inputs/2017MeshTree.txt"
try
:
# input file fetch from :https://www.nlm.nih.gov/mesh/2017/download/2017MeshTree.txt
if
not
os
.
path
.
exists
(
filename
):
urllib
.
urlretrieve
(
"https://www.nlm.nih.gov/mesh/2017/download/2017MeshTree.txt"
,
filename
)
else
:
for
line
in
open
(
filename
,
'r'
):
line
=
line
.
replace
(
'
\x00
'
,
''
)
line
=
line
.
strip
()
nspace
=
line
.
split
(
"
\t
"
)
if
len
(
nspace
)
==
3
:
meshID
=
nspace
[
1
]
term
=
nspace
[
2
].
lower
()
dict
[
term
]
=
meshID
except
IndexError
as
dicterror
:
print
str
(
"MeSH Dictionary error"
)
return
dict
#getMeshIDfromTerm("Drug-Induced Liver Injury")
# getGoIDfromTerm("leukotriene production involved in inflammatory response")
#getGoIDfromTerm("Stress, Physiological")
# getChemicalIDfromTerm("lipopigment")
#getIDandNamespacefromAbundance("synaptic transmission","UNKNOWN")
#print getIDandNamespacefromAbundance("gamma Secretase Complex","UNKNOWN")
#print getIDandNamespacefromAbundance("Insomnia","PHENOTYPE")
# getIDandNamespacefromAbundance("Drug-Induced Liver Injury","UNKNOWN")
# getIDandNamespacefromAbundance("Polymorphism, Single Nucleotide","UNKNOWN")
queryMeSHSPARQL
()
\ No newline at end of file
def
__init__
(
self
):
''' Constructor for this class. '''
# Create some member animals
dict
=
{}
filename
=
"inputs/2017MeshTree.txt"
try
:
# input file fetch from :https://www.nlm.nih.gov/mesh/2017/download/2017MeshTree.txt
if
not
os
.
path
.
exists
(
filename
):
urllib
.
urlretrieve
(
"https://www.nlm.nih.gov/mesh/2017/download/2017MeshTree.txt"
,
filename
)
else
:
for
line
in
open
(
filename
,
'r'
):
line
=
line
.
replace
(
'
\x00
'
,
''
)
line
=
line
.
strip
()
nspace
=
line
.
split
(
"
\t
"
)
if
len
(
nspace
)
==
3
:
meshID
=
nspace
[
1
]
term
=
nspace
[
2
].
lower
()
dict
[
term
]
=
meshID
except
IndexError
as
dicterror
:
print
str
(
"MeSH Dictionary error"
)
self
.
meshDict
=
dict
def
queryMeSHSPARQL
(
self
):
#sparql = rdflib.URIRef("http://id.nlm.nih.gov/mesh/sparql")
sparql
=
SPARQLWrapper
(
"http://id.nlm.nih.gov/mesh/sparql"
)
sparql
.
setQuery
(
"""
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX meshv: <http://id.nlm.nih.gov/mesh/vocab#>
PREFIX mesh: <http://id.nlm.nih.gov/mesh/>
PREFIX mesh2015: <http://id.nlm.nih.gov/mesh/2015/>
PREFIX mesh2016: <http://id.nlm.nih.gov/mesh/2016/>
PREFIX mesh2017: <http://id.nlm.nih.gov/mesh/2017/>
SELECT ?d
FROM <http://id.nlm.nih.gov/mesh>
WHERE {
?d a meshv:Descriptor .
?d meshv:concept ?c .
?c meshv:term ?term .
?term rdfs:label ?label .
FILTER (REGEX(?label,"^insomnia$","i"))
}
"""
)
sparql
.
setReturnFormat
(
JSON
)
result
=
sparql
.
query
().
convert
()
print
result
def
getDict
(
self
,
filename
):
dict
=
{}
for
line
in
open
(
filename
,
'r'
):
nspace
=
line
.
split
()
dict
[
nspace
[
0
]]
=
nspace
[
1
]
return
dict
#LOAD reaction DICTIONARY
def
getReactionDict
(
self
):
reactionDict
=
{}
for
line
in
open
(
"inputs/reactionDictFile.txt"
,
'r'
):
nspace
=
line
.
split
()
reactionDict
[
nspace
[
0
]]
=
nspace
[
1
]
return
reactionDict
def
termToIdentifier
(
self
,
belnamespace
,
term
):
if
"GO"
in
belnamespace
:
term
=
namespaceTranslator
.
getGoIDfromTerm
(
term
)
elif
"CHEM"
in
belnamespace
:
term
=
namespaceTranslator
.
getChemicalIDfromTerm
(
term
)
# else:
# idAndElement=getIDandNamespacefromAbundance("UNKNOWN",term)
# term=idAndElement[0]
return
term
#Get GOID from reflect
def
getGoIDfromTerm
(
self
,
term
):
goID
=
term
#print term
reflect_client
=
reflectclient
.
ReflectClient
()
GOresult
=
reflect_client
.
get_entities
(
term
,
[
EntityType
.
biological_process
,
EntityType
.
cellular_component
,
EntityType
.
molecular_function
])
if
len
(
GOresult
)
>
0
:
goID
=
GOresult
[
0
][
'entities'
][
0
][
'identifier'
]
return
goID
def
getIDandNamespacefromAbundance
(
self
,
term
,
elementName
):
termId
=
term
#elementName="UNKNOWN"
idAndelement
=
[]
#check GO
reflect_client
=
reflectclient
.
ReflectClient
()
# check for GO cellular component ->Complex
GOresult
=
reflect_client
.
get_entities
(
term
,
[
EntityType
.
cellular_component
,
EntityType
.
biological_process
,
EntityType
.
molecular_function
])
if
len
(
GOresult
)
>
0
:
termId
=
GOresult
[
0
][
'entities'
][
0
][
'identifier'
]
entityType
=
GOresult
[
0
][
'entities'
][
0
][
'type'
]
if
entityType
==
"-21"
or
entityType
==
"-23"
:
elementName
=
"PHENOTYPE"
elif
entityType
==
"-22"
:
elementName
=
"COMPLEX"
#not found in GO, so check in mesh
if
termId
is
term
:
print
termId
+
" : here"
meshID
=
self
.
getMeshIDfromTerm
(
term
)
if
meshID
is
not
term
:
termId
=
meshID
elementName
=
"PHENOTYPE"
idAndelement
=
[
termId
,
elementName
]
#print idAndelement
return
idAndelement
#Get pubchemID from reflect
def
getChemicalIDfromTerm
(
self
,
term
):
chemID
=
term
reflect_client
=
reflectclient
.
ReflectClient
()
chemResult
=
reflect_client
.
get_entities
(
term
,
[
EntityType
.
chemicals
])
if
len
(
chemResult
)
>
0
:
chemID
=
chemResult
[
0
][
'entities'
][
0
][
'identifier'
]
return
chemID
#Get meshID from reflect
def
getMeshIDfromTerm
(
self
,
term
):
mesh_id
=
term
lowerTerm
=
term
.
lower
()
mDict
=
self
.
meshDict
#namespaceTranslator.getMeSHDict()
mesh_id
=
mDict
[
lowerTerm
]
if
mDict
.
has_key
(
lowerTerm
)
is
True
else
term
return
mesh_id
def
getMeSHDict
(
self
):
dict
=
{}
filename
=
"inputs/2017MeshTree.txt"
try
:
# input file fetch from :https://www.nlm.nih.gov/mesh/2017/download/2017MeshTree.txt
if
not
os
.
path
.
exists
(
filename
):
urllib
.
urlretrieve
(
"https://www.nlm.nih.gov/mesh/2017/download/2017MeshTree.txt"
,
filename
)
else
:
for
line
in
open
(
filename
,
'r'
):
line
=
line
.
replace
(
'
\x00
'
,
''
)
line
=
line
.
strip
()
nspace
=
line
.
split
(
"
\t
"
)
if
len
(
nspace
)
==
3
:
meshID
=
nspace
[
1
]
term
=
nspace
[
2
].
lower
()
dict
[
term
]
=
meshID
except
IndexError
as
dicterror
:
print
str
(
"MeSH Dictionary error"
)
return
dict
#getMeshIDfromTerm("Drug-Induced Liver Injury")
# getGoIDfromTerm("leukotriene production involved in inflammatory response")
#getGoIDfromTerm("Stress, Physiological")
# getChemicalIDfromTerm("lipopigment")
#getIDandNamespacefromAbundance("synaptic transmission","UNKNOWN")
#print getIDandNamespacefromAbundance("gamma Secretase Complex","UNKNOWN")
#print getIDandNamespacefromAbundance("Insomnia","PHENOTYPE")
# getIDandNamespacefromAbundance("Drug-Induced Liver Injury","UNKNOWN")
# getIDandNamespacefromAbundance("Polymorphism, Single Nucleotide","UNKNOWN")
NT
=
namespaceTranslator
()
print
NT
.
getMeshIDfromTerm
(
"Oligodendroglia"
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment