Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Aishwarya Alex
xBELtoCellD
Commits
b60c76fb
Commit
b60c76fb
authored
Jan 24, 2018
by
Aishwarya Alex
Browse files
restrucred code, class
parent
3183bc49
Changes
1
Hide whitespace changes
Inline
Side-by-side
namespaceTranslator.py
View file @
b60c76fb
...
...
@@ -11,40 +11,30 @@
##############################################################################################
# Import the modules needed to run the script.
import
MySQLdb
,
reflectclient
,
os
,
urllib
import
reflectclient
,
os
,
urllib
,
sqlite3
from
reflectclient.config
import
EntityType
import
rdflib
from
SPARQLWrapper
import
SPARQLWrapper
,
JSON
,
XML
,
N3
,
RDF
from
SPARQLWrapper
import
SPARQLWrapper
,
JSON
import
xml.etree.ElementTree
as
ET
class
namespaceTranslator
:
#LOAD NAMESPACE DICTIONARY
def
__init__
(
self
):
''' Constructor for this class. '''
# Create some member animals
dict
=
{}
filename
=
"inputs/2017MeshTree.txt"
try
:
# input file fetch from :https://www.nlm.nih.gov/mesh/2017/download/2017MeshTree.txt
if
not
os
.
path
.
exists
(
filename
):
urllib
.
urlretrieve
(
"https://www.nlm.nih.gov/mesh/2017/download/2017MeshTree.txt"
,
filename
)
else
:
for
line
in
open
(
filename
,
'r'
):
line
=
line
.
replace
(
'
\x00
'
,
''
)
line
=
line
.
strip
()
nspace
=
line
.
split
(
"
\t
"
)
if
len
(
nspace
)
==
3
:
meshID
=
nspace
[
1
]
term
=
nspace
[
2
].
lower
()
dict
[
term
]
=
meshID
try
:
cursor
=
self
.
createMeSHDB
()
self
.
meshCursor
=
cursor
except
IndexError
as
dicterror
:
print
str
(
"MeSH Dictionary error"
)
self
.
meshDict
=
dict
#
self.meshDict = dict
def
queryMeSHSPARQL
(
self
):
#sparql = rdflib.URIRef("http://id.nlm.nih.gov/mesh/sparql")
sparql
=
SPARQLWrapper
(
"http://id.nlm.nih.gov/mesh/sparql"
)
sparql
.
setQuery
(
"""
def
queryMeSHSPARQL
(
self
,
term
):
sparql
=
SPARQLWrapper
(
"http://id.nlm.nih.gov/mesh/sparql"
,)
preparedQuery
=
"""
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
...
...
@@ -65,13 +55,21 @@ class namespaceTranslator:
?c meshv:term ?term .
?term rdfs:label ?label .
FILTER (REGEX(?label,"^
insomnia$
","i"))
FILTER (REGEX(?label,
\
"
^
"""
+
term
+
"""$
\
"
,"i"))
}
"""
)
"""
sparql
.
addCustomParameter
(
"inference"
,
"true"
)
sparql
.
setQuery
(
preparedQuery
)
sparql
.
setReturnFormat
(
JSON
)
result
=
sparql
.
query
().
convert
()
print
result
meshID
=
None
if
len
(
result
[
'results'
][
'bindings'
])
>
0
:
meshResult
=
result
[
'results'
][
'bindings'
][
0
][
'd'
][
'value'
].
split
(
"/"
)[
-
1
]
meshID
=
meshResult
if
meshResult
is
not
None
else
None
return
meshID
def
getDict
(
self
,
filename
):
dict
=
{}
...
...
@@ -112,29 +110,40 @@ class namespaceTranslator:
def
getIDandNamespacefromAbundance
(
self
,
term
,
elementName
):
termId
=
term
#elementName="UNKNOWN"
idAndelement
=
[]
#check GO
reflect_client
=
reflectclient
.
ReflectClient
()
# check for GO cellular component ->Complex
GOresult
=
reflect_client
.
get_entities
(
term
,
[
EntityType
.
cellular_component
,
EntityType
.
biological_process
,
EntityType
.
molecular_function
])
if
len
(
GOresult
)
>
0
:
termId
=
GOresult
[
0
][
'entities'
][
0
][
'identifier'
]
entityType
=
GOresult
[
0
][
'entities'
][
0
][
'type'
]
if
entityType
==
"-21"
or
entityType
==
"-23"
:
elementName
=
"PHENOTYPE"
elif
entityType
==
"-22"
:
elementName
=
"COMPLEX"
#not found in GO, so check in mesh
if
termId
is
term
:
print
termId
+
" : here"
meshID
=
self
.
getMeshIDfromTerm
(
term
)
if
meshID
is
not
term
:
termId
=
meshID
elementName
=
"PHENOTYPE"
#check if protein : eg: Amyloid beta peptides- abundance/ protein abundance w/o namespace
if
elementName
==
"PROTEIN"
:
POresult
=
reflect_client
.
get_entities
(
term
,
[
EntityType
.
proteins
])
if
len
(
POresult
)
>
0
:
print
POresult
termId
=
POresult
[
0
][
'entities'
][
0
][
'identifier'
]
entityType
=
POresult
[
0
][
'entities'
][
0
][
'type'
]
if
entityType
==
'9606'
:
elementName
=
"PROTEIN"
# check for GO cellular component ->Complex, biological process, moelcular function -> phenotype
else
:
GOresult
=
reflect_client
.
get_entities
(
term
,
[
EntityType
.
cellular_component
,
EntityType
.
biological_process
,
EntityType
.
molecular_function
])
if
len
(
GOresult
)
>
0
:
print
"Returns in GO term"
termId
=
GOresult
[
0
][
'entities'
][
0
][
'identifier'
]
entityType
=
GOresult
[
0
][
'entities'
][
0
][
'type'
]
if
entityType
==
"-21"
or
entityType
==
"-23"
:
elementName
=
"PHENOTYPE"
elif
entityType
==
"-22"
:
elementName
=
"COMPLEX"
elif
entityType
==
"9606"
:
elementName
=
"PROTEIN"
#if element is not GO term or Protein , check in MeSH
else
:
#meshID=self.getMeshIDfromTermDict(termId)
meshID
=
self
.
getMeshIDfromdb
(
self
.
meshCursor
,
termId
)
if
meshID
is
not
None
:
termId
=
meshID
elementName
=
"PHENOTYPE"
idAndelement
=
[
termId
,
elementName
]
#print idAndelement
return
idAndelement
...
...
@@ -147,17 +156,85 @@ class namespaceTranslator:
chemID
=
chemResult
[
0
][
'entities'
][
0
][
'identifier'
]
return
chemID
#Get meshID from reflect
def
getMeshIDfromTerm
(
self
,
term
):
# Get meshID from from Dictionary # FASTER
def
getMeshIDfromdb
(
self
,
meshcursor
,
term
):
try
:
parameter
=
[
term
.
lower
()]
meshcursor
.
execute
(
"SELECT meshID FROM meshTermID WHERE term=?;"
,
parameter
)
row
=
meshcursor
.
fetchone
()
if
row
is
not
None
:
return
row
[
0
]
else
:
return
None
except
sqlite3
.
Error
as
sqr
:
print
"Sqlite3 Error"
,
sqr
.
message
except
Exception
as
ex
:
print
ex
.
message
,
ex
.
args
def
getMeshIDfromTermSPARQL
(
self
,
term
):
meshID
=
self
.
queryMeSHSPARQL
(
term
)
return
meshID
if
meshID
is
not
None
else
None
#Get meshID from from Dictionary # FASTER
def
getMeshIDfromTermDict
(
self
,
term
):
mesh_id
=
term
lowerTerm
=
term
.
lower
()
mDict
=
self
.
meshDict
#namespaceTranslator.getMeSHDict()
mesh_id
=
mDict
[
lowerTerm
]
if
mDict
.
has_key
(
lowerTerm
)
is
True
else
term
mesh_id
=
mDict
.
get
(
lowerTerm
,
term
)
#mesh_id = mDict[lowerTerm] if mDict.has_key(lowerTerm) is True else term
return
mesh_id
def
getMeSHDictFromDesc2012XML
(
self
):
dict
=
{}
xmlfilename
=
"inputs/desc2012.xml"
meshtextFilename
=
"inputs/mesh_desc2012.txt"
try
:
# input file fetch from :https://www.nlm.nih.gov/mesh/2017/download/2017MeshTree.txt
print
(
"Creating MeSH Dictionary ..."
)
if
not
os
.
path
.
exists
(
xmlfilename
):
print
(
"Local copy does not exist, Downloading local copy of Mesh2012 desc2012.xml ... "
)
urllib
.
urlretrieve
(
"ftp://nlmpubs.nlm.nih.gov/online/mesh/2012/xmlmesh/desc2012.xml"
,
xmlfilename
)
if
not
os
.
path
.
exists
(
meshtextFilename
):
print
(
"Creating text file, to store extracted terms and uniqueID from xml, to be used in future runs ..."
)
txtFile
=
open
(
meshtextFilename
,
'w'
)
tree
=
ET
.
parse
(
xmlfilename
)
root
=
tree
.
getroot
()
for
descriptorRec
in
root
.
findall
(
"DescriptorRecord"
):
meshID
=
descriptorRec
.
find
(
"DescriptorUI"
).
text
descName
=
descriptorRec
.
find
(
"DescriptorName"
).
find
(
"String"
).
text
.
encode
(
'utf-8'
)
#print descName, meshID
txtFile
.
write
(
descName
+
"
\t
"
+
meshID
+
"
\n
"
)
conceptlist
=
descriptorRec
.
find
(
"ConceptList"
)
for
concept
in
conceptlist
.
findall
(
"Concept"
):
termList
=
concept
.
find
(
"TermList"
)
for
term
in
termList
.
findall
(
"Term"
):
altTerm
=
term
.
find
(
"String"
).
text
.
encode
(
'utf-8'
)
if
altTerm
!=
descName
:
#print altTerm,meshID
txtFile
.
write
(
altTerm
+
"
\t
"
+
meshID
+
"
\n
"
)
txtFile
.
close
()
else
:
for
line
in
open
(
meshtextFilename
,
'r'
):
#line = line.replace('\x00', '')
line
=
line
.
strip
()
nspace
=
line
.
split
(
"
\t
"
)
if
len
(
nspace
)
==
2
:
meshID
=
nspace
[
1
]
term
=
nspace
[
0
].
lower
()
dict
[
term
]
=
meshID
def
getMeSHDict
(
self
):
except
IndexError
as
dicterror
:
print
str
(
"MeSH Dictionary error"
)
except
Exception
as
otherError
:
print
" !!!EXCEPTION!!! : "
+
str
(
otherError
.
args
)
print
otherError
.
message
,
type
(
otherError
)
return
dict
def
getMeSHDictFromMTree
(
self
):
dict
=
{}
filename
=
"inputs/2017MeshTree.txt"
try
:
...
...
@@ -165,30 +242,80 @@ class namespaceTranslator:
if
not
os
.
path
.
exists
(
filename
):
urllib
.
urlretrieve
(
"https://www.nlm.nih.gov/mesh/2017/download/2017MeshTree.txt"
,
filename
)
else
:
for
line
in
open
(
filename
,
'r'
):
line
=
line
.
replace
(
'
\x00
'
,
''
)
line
=
line
.
strip
()
nspace
=
line
.
split
(
"
\t
"
)
if
len
(
nspace
)
==
3
:
meshID
=
nspace
[
1
]
term
=
nspace
[
2
].
lower
()
dict
[
term
]
=
meshID
for
line
in
open
(
filename
,
'r'
):
line
=
line
.
replace
(
'
\x00
'
,
''
)
line
=
line
.
strip
()
nspace
=
line
.
split
(
"
\t
"
)
if
len
(
nspace
)
==
3
:
meshID
=
nspace
[
1
]
term
=
nspace
[
2
].
lower
()
dict
[
term
]
=
meshID
except
IndexError
as
dicterror
:
print
str
(
"MeSH Dictionary error"
)
return
dict
#getMeshIDfromTerm("Drug-Induced Liver Injury")
# getGoIDfromTerm("leukotriene production involved in inflammatory response")
#getGoIDfromTerm("Stress, Physiological")
# getChemicalIDfromTerm("lipopigment")
#getIDandNamespacefromAbundance("synaptic transmission","UNKNOWN")
#print getIDandNamespacefromAbundance("gamma Secretase Complex","UNKNOWN")
#print getIDandNamespacefromAbundance("Insomnia","PHENOTYPE")
# getIDandNamespacefromAbundance("Drug-Induced Liver Injury","UNKNOWN")
# getIDandNamespacefromAbundance("Polymorphism, Single Nucleotide","UNKNOWN")
def
createMeSHDB
(
self
):
xmlfilename
=
"inputs/desc2012.xml"
meshtextFilename
=
"inputs/mesh_desc2012.txt"
try
:
# input file fetch from :https://www.nlm.nih.gov/mesh/2017/download/2017MeshTree.txt
print
(
"Connecting MeSH sqliteDB ..."
)
if
not
os
.
path
.
exists
(
xmlfilename
):
print
(
"Local copy does not exist, Downloading local copy of Mesh2012 desc2012.xml ... "
)
urllib
.
urlretrieve
(
"ftp://nlmpubs.nlm.nih.gov/online/mesh/2012/xmlmesh/desc2012.xml"
,
xmlfilename
)
if
not
os
.
path
.
exists
(
meshtextFilename
):
print
(
"Creating text file, to store extracted terms and uniqueID from xml, to be used in future runs ..."
)
txtFile
=
open
(
meshtextFilename
,
'w'
)
tree
=
ET
.
parse
(
xmlfilename
)
root
=
tree
.
getroot
()
for
descriptorRec
in
root
.
findall
(
"DescriptorRecord"
):
meshID
=
descriptorRec
.
find
(
"DescriptorUI"
).
text
descName
=
descriptorRec
.
find
(
"DescriptorName"
).
find
(
"String"
).
text
.
encode
(
'utf-8'
)
# print descName, meshID
txtFile
.
write
(
descName
+
"
\t
"
+
meshID
+
"
\n
"
)
conceptlist
=
descriptorRec
.
find
(
"ConceptList"
)
for
concept
in
conceptlist
.
findall
(
"Concept"
):
termList
=
concept
.
find
(
"TermList"
)
for
term
in
termList
.
findall
(
"Term"
):
altTerm
=
term
.
find
(
"String"
).
text
.
encode
(
'utf-8'
)
if
altTerm
!=
descName
:
# print altTerm,meshID
txtFile
.
write
(
altTerm
+
"
\t
"
+
meshID
+
"
\n
"
)
txtFile
.
close
()
conn
=
sqlite3
.
connect
(
"inputs/xbel2celld.db"
)
conn
.
text_factory
=
str
cursor
=
conn
.
cursor
()
conn
.
execute
(
"""SELECT name FROM sqlite_master WHERE type='table' AND name='meshTermID';"""
)
rows
=
cursor
.
fetchall
()
if
len
(
rows
)
==
0
:
print
(
"Database doesn't exist, creating ...."
)
cursor
.
execute
(
"""CREATE TABLE IF NOT EXISTS meshTermID (
term text PRIMARYKEY,
meshID text
);"""
)
for
line
in
open
(
meshtextFilename
,
'r'
):
# line = line.replace('\x00', '')
line
=
line
.
strip
()
nspace
=
line
.
split
(
"
\t
"
)
if
len
(
nspace
)
==
2
:
meshID
=
nspace
[
1
]
term
=
nspace
[
0
].
lower
()
#print term,meshID
parameters
=
[
term
,
meshID
]
cursor
.
execute
(
"""INSERT INTO meshTermID (term,meshID) VALUES (?,?);"""
,
parameters
)
conn
.
commit
()
conn
.
close
()
conn
=
sqlite3
.
connect
(
"inputs/xbel2celld.db"
)
conn
.
text_factory
=
str
cursor
=
conn
.
cursor
()
print
(
"Creating connection to MeSHDB ...."
)
return
cursor
except
sqlite3
.
Error
as
sqr
:
print
"Sqlite3 Error"
,
sqr
.
message
except
Exception
as
ex
:
print
ex
.
message
,
ex
.
args
NT
=
namespaceTranslator
()
print
NT
.
getMeshIDfromTerm
(
"Oligodendroglia"
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment