Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Aishwarya Alex
xBELtoCellD
Commits
9add831f
Commit
9add831f
authored
Jan 24, 2018
by
Aishwarya Alex
Browse files
restrucred code, class
parent
b60c76fb
Changes
1
Hide whitespace changes
Inline
Side-by-side
xBEL2CellD.py
View file @
9add831f
...
...
@@ -10,7 +10,7 @@
##############################################################################################
# Import the modules needed to run the script.
import
string
,
reflectclient
,
logging
,
sys
,
datetime
import
string
,
reflectclient
,
logging
,
sys
,
datetime
,
re
import
namespaceTranslator
as
NTL
import
xml.etree.ElementTree
as
ET
...
...
@@ -22,42 +22,18 @@ class xBEL2CellD:
def
__init__
(
self
):
self
.
NT
=
NTL
.
namespaceTranslator
()
self
.
mDict
=
self
.
NT
.
meshDict
#self.mDict = self.NT.meshDict
self
.
meshCursor
=
self
.
NT
.
meshCursor
self
.
reflect_client
=
reflectclient
.
ReflectClient
()
# Get meshID from reflect
def
getMeshIDfromTerm
(
self
,
term
):
mesh_id
=
term
mDict
=
self
.
mDict
mesh_id
=
mDict
[
term
.
lower
()]
if
mDict
.
has_key
(
term
.
lower
())
is
True
else
term
return
mesh_id
def
getIDandNamespacefromAbundance
(
self
,
term
,
elementName
):
termId
=
term
# elementName="UNKNOWN"
idAndelement
=
[]
# check GO
# check for GO cellular component ->Complex
GOresult
=
self
.
reflect_client
.
get_entities
(
term
,
[
EntityType
.
cellular_component
,
EntityType
.
biological_process
,
EntityType
.
molecular_function
])
if
len
(
GOresult
)
>
0
:
termId
=
GOresult
[
0
][
'entities'
][
0
][
'identifier'
]
entityType
=
GOresult
[
0
][
'entities'
][
0
][
'type'
]
if
entityType
==
"-21"
or
entityType
==
"-23"
:
elementName
=
"PHENOTYPE"
elif
entityType
==
"-22"
:
elementName
=
"COMPLEX"
# not found in GO, so check in mesh
else
:
meshID
=
self
.
getMeshIDfromTerm
(
term
)
if
meshID
is
not
term
:
termId
=
meshID
elementName
=
"PHENOTYPE"
idAndelement
=
[
termId
,
elementName
]
return
idAndelement
# # Get meshID from reflect
# def getMeshIDfromTerm(self, term):
# mesh_id = term
# mDict = self.mDict
# mesh_id = mDict[term.lower()] if mDict.has_key(term.lower()) is True else term
# return mesh_id
# func to get parameters(function, name and namespace) for elements (any node SUBJECT,OBJECT, MODIFIER(if nested))
def
get_details_from_node
(
self
,
speciesMap
,
node
):
...
...
@@ -99,45 +75,64 @@ class xBEL2CellD:
self
.
check_to_add_in_file
(
element
,
"outputs/additional/toMapEntities.txt"
)
# check for abundance element name from namespace
if
element
==
"abundance"
:
if
element
==
"abundance"
or
belnamespace
==
""
:
if
belnamespace
is
not
""
:
new_element
=
element
+
"_"
+
belnamespace
if
abundanceDict
.
has_key
(
new_element
):
element
=
abundanceDict
[
new_element
]
else
:
idAndElement
=
self
.
NT
.
getIDandNamespacefromAbundance
(
identifier
,
element
)
term
=
idAndElement
[
0
]
element
=
idAndElement
[
1
]
if
"MESH"
in
belnamespace
:
#id=self.NT.getMeshIDfromTermSPARQL(identifier)
id
=
self
.
NT
.
getMeshIDfromdb
(
self
.
meshCursor
,
identifier
)
if
id
is
not
None
:
identifier
=
id
elementName
=
"PHENOTYPE"
elif
belnamespace
not
in
(
'SCHEM'
,
'CHEBI'
):
idAndElement
=
self
.
NT
.
getIDandNamespacefromAbundance
(
identifier
,
elementName
)
identifier
=
idAndElement
[
0
]
elementName
=
idAndElement
[
1
]
self
.
check_to_add_in_file
(
belnamespace
,
"outputs/additional/toAddNamespaces.txt"
)
else
:
# no namespace, checking in G0, eg encountered: neuroinflammation,Amyloid beta peptides, etc
element
=
"UNKNOWN"
element
Name
=
"UNKNOWN"
# identifier = NT.termToIdentifier(belnamespace, parameter.text)
idAndElement
=
self
.
NT
.
getIDandNamespacefromAbundance
(
identifier
,
element
)
idAndElement
=
self
.
NT
.
getIDandNamespacefromAbundance
(
identifier
,
elementName
)
identifier
=
idAndElement
[
0
]
element
=
idAndElement
[
1
]
if
"GO"
in
identifier
:
elementName
=
idAndElement
[
1
]
if
re
.
match
(
"^GO[0-9]+$"
,
str
(
identifier
))
is
not
None
:
belnamespace
=
"GO"
elif
str
(
identifier
).
startswith
(
"D"
)
:
elif
re
.
match
(
"^D[0-9]+$"
,
str
(
identifier
))
is
not
None
:
belnamespace
=
"MESH"
# TOFIX repeated Mesh Dictionary creation
elif
identifier
.
startswith
(
"ENSP0000"
):
belnamespace
=
"ENSMBL"
elif
"MESH"
in
belnamespace
or
"GO"
in
belnamespace
:
idAndElement
=
self
.
NT
.
getIDandNamespacefromAbundance
(
identifier
,
element
)
print
idAndElement
identifier
=
idAndElement
[
0
]
element
=
idAndElement
[
1
]
if
"MESH"
in
belnamespace
:
#id = self.NT.getMeshIDfromTermSPARQL(identifier)
id
=
self
.
NT
.
getMeshIDfromdb
(
self
.
meshCursor
,
identifier
)
if
id
is
not
None
:
identifier
=
id
elementName
=
"PHENOTYPE"
else
:
idAndElement
=
self
.
NT
.
getIDandNamespacefromAbundance
(
identifier
,
elementName
)
identifier
=
idAndElement
[
0
]
elementName
=
idAndElement
[
1
]
if
"GO"
in
identifier
:
belnamespace
=
"GO"
elif
str
(
identifier
).
startswith
(
"D"
)
:
elif
re
.
match
(
"^D[0-9]+$"
,
str
(
identifier
))
is
not
None
:
belnamespace
=
"MESH"
# mDict = NT.getMeSHDict()
# identifier = mDict[identifier.lower()] if mDict.has_key(identifier.lower()) is True else identifier
elif
belnamespace
is
not
""
:
identifier
=
self
.
NT
.
termToIdentifier
(
belnamespace
,
parameter
.
text
)
if
namespaceDict
.
has_key
(
belnamespace
)
is
not
True
:
if
namespaceDict
.
has_key
(
belnamespace
)
is
not
True
and
belnamespace
!=
""
:
# if namespaceDict.has_key(belnamespace) is False or belnamespace is not "":
idAndElement
=
self
.
NT
.
getIDandNamespacefromAbundance
(
identifier
,
element
)
identifier
=
idAndElement
[
0
]
...
...
@@ -149,8 +144,13 @@ class xBEL2CellD:
if
"GO"
in
identifier
:
belnamespace
=
"GO"
elif
str
(
identifier
).
startswith
(
"D"
)
:
elif
re
.
match
(
"^D[0-9]+$"
,
str
(
identifier
))
is
not
None
:
belnamespace
=
"MESH"
elif
identifier
.
startswith
(
"ENSP0000"
):
belnamespace
=
"ENSMBL"
#miriamnamespace="urn:miriam:ensembl"
self
.
check_to_add_in_file
(
belnamespace
,
"outputs/additional/toAddNamespaces.txt"
)
if
namespaceDict
.
has_key
(
belnamespace
):
...
...
@@ -163,6 +163,7 @@ class xBEL2CellD:
finalnamespace
=
"urn:miriam:hgnc.symbol:"
+
identifier
+
","
+
finalnamespace
if
"SCHEM"
in
belnamespace
or
"CHEBI"
in
finalnamespace
:
finalnamespace
=
""
elementName
=
"SIMPLE_MOLECULE"
elementAnnotation
=
parameter
.
text
+
"
\t
"
+
finalnamespace
entry
=
elementName
+
"
\t
"
+
elementAnnotation
return
entry
...
...
@@ -265,7 +266,6 @@ class xBEL2CellD:
# LOAD namespace DIctionary
namespaceDict
=
self
.
NT
.
getDict
(
"inputs/namespaceDictFile.txt"
)
print
"here...."
reactionDict
=
self
.
NT
.
getDict
(
"inputs/reactionDictFile.txt"
)
# create dictionary for species
speciesMap
=
{}
...
...
@@ -273,10 +273,9 @@ class xBEL2CellD:
reactionIdentifier
=
0
expStatementFile
=
"outputs/additional/exceptionalStatements.txt"
file
=
open
(
expStatementFile
,
'w'
)
print
"opened exception file
\n
"
file
.
write
(
"GROUP|STATEMENT_NUMBER|RELATIONSHIP|EVIDENCE
\n
"
)
file
.
close
()
evidence
=
""
#
evidence = ""
for
index
,
statementGr
in
enumerate
(
root
.
iter
(
tag
=
self
.
ns_tag
(
'statementGroup'
))):
for
statement_num
,
statement
in
enumerate
(
statementGr
.
findall
(
self
.
ns_tag
(
'statement'
))):
print
"
\n\n\n
############################################"
...
...
@@ -431,7 +430,7 @@ class xBEL2CellD:
#convertXBEL('/home/alex/PhD/Bel2CellD/small_corpus.xbel')
logfile
=
"logs/"
+
datetime
.
datetime
.
now
().
strftime
(
"%Y_%m_%d_%H_%M"
)
+
"_log.txt"
logger
=
logging
.
getLogger
(
logfile
)
logger
.
addHandler
(
logging
.
FileHandler
(
logfile
))
#
logger.addHandler(logging.FileHandler(logfile))
logger
.
addHandler
(
logging
.
StreamHandler
(
sys
.
stdout
))
#
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment