Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Aishwarya Alex
xBELtoCellD
Commits
49d6fbb9
Commit
49d6fbb9
authored
Jan 17, 2017
by
Aishwarya Alex
Browse files
reformated- moved location and context to reaction from entities
parent
f62aa697
Changes
1
Hide whitespace changes
Inline
Side-by-side
reformat_xBEL2CellD.py
0 → 100644
View file @
49d6fbb9
#!/usr/bin/python
##############################################################################################
# title :xBEL2CellD.py
# description :This script will generate 2 tab delimited files(reactions.txt, nodes.txt) from an .XBEL file
# author :Aishwarya Alex
# date :20110930
# version :0.1
# usage :python pyscript.py
# notes :
# python_version :2.7.10
##############################################################################################
# Import the modules needed to run the script.
import
string
import
xml.etree.ElementTree
as
ET
import
namespaceTranslator
as
NT
# tagNamespacePrefix='{http://belframework.org/schema/1.0/xbel}'
# function to attach namespace to tag
def
ns_tag
(
tag
):
return
str
(
ET
.
QName
(
'{http://belframework.org/schema/1.0/xbel}'
+
tag
))
# function to remove namespace from tag
def
remove_ns_tag
(
tag
):
return
string
.
replace
(
str
(
tag
),
'{http://belframework.org/schema/1.0/xbel}'
,
''
)
def
check_to_add_in_file
(
entry
,
filename
):
alreadyIn
=
[
line
.
strip
()
for
line
in
open
(
filename
,
"r"
)]
if
entry
not
in
alreadyIn
:
f
=
open
(
filename
,
"a"
)
f
.
write
(
entry
+
"
\n
"
)
f
.
close
()
# function to get annotationMap
def
get_annotation_details
(
nodeAnnoGroup
):
annoMap
=
{}
for
anno
in
nodeAnnoGroup
.
iter
(
tag
=
ns_tag
(
'annotation'
)):
# print (anno.get(ns_tag("refID")).upper(),anno.text)
annoMap
[
anno
.
get
(
ns_tag
(
"refID"
)).
upper
()]
=
anno
.
text
return
annoMap
# function to get annotation elelment from map
def
get_annotation_from_element
(
annoMap
,
annoElement
):
anno
=
annoMap
.
get
(
annoElement
)
if
annoMap
.
get
(
annoElement
)
is
not
None
else
""
return
anno
# function to find leafterm (term with function and parameter
def
get_leafTerm
(
term
):
leafterm
=
term
element
=
term
.
get
(
ns_tag
(
'function'
))
if
term
.
get
(
ns_tag
(
"function"
))
is
not
None
else
None
parameter
=
term
.
find
(
ns_tag
(
"parameter"
))
if
term
.
find
(
ns_tag
(
"parameter"
))
is
not
None
else
None
if
element
is
not
None
and
parameter
is
None
:
check_to_add_in_file
(
element
,
"outputs/additional/skippedLevels.txt"
)
nextTerm
=
term
.
find
(
ns_tag
(
"term"
))
return
get_leafTerm
(
nextTerm
)
return
leafterm
#function to check id species exists, if not create entry, else return id
# function to get subject and object (and modifier) enteries.
def
getSpecies
(
nodeEntry
,
speciesDict
):
nodeFile
=
"outputs/nodes.txt"
speciesId
=
speciesDict
.
get
(
nodeEntry
)
#print "speciesId retrieved :"+str(speciesId)
totalSpecies
=
len
(
speciesDict
)
returnVal
=
[]
if
speciesId
is
None
:
totalSpecies
=
totalSpecies
+
1
speciesId
=
"s"
+
str
(
totalSpecies
)
speciesDict
[
nodeEntry
]
=
speciesId
nodeFile
=
open
(
"outputs/nodes.txt"
,
'a'
)
nodeEntry
=
speciesId
+
"
\t
"
+
nodeEntry
+
"
\n
"
nodeFile
.
write
(
nodeEntry
)
nodeFile
.
close
()
print
speciesId
+
" : "
+
nodeEntry
returnVal
.
append
(
speciesId
)
returnVal
.
append
(
speciesDict
)
return
returnVal
#GET DETAILS FROM TERM FUNCTION AND PARameter
def
get_entry_for_term
(
term
):
try
:
entry
=
""
entityDict
=
NT
.
getDict
(
"inputs/EntityDictFile.txt"
)
abundanceDict
=
NT
.
getDict
(
"inputs/abundanceEntity.txt"
)
namespaceDict
=
NT
.
getDict
(
"inputs/namespaceDictFile.txt"
)
element
=
term
.
get
(
ns_tag
(
'function'
))
parameter
=
term
.
find
(
ns_tag
(
"parameter"
))
belnamespace
=
parameter
.
get
(
ns_tag
(
"ns"
))
if
parameter
.
get
(
ns_tag
(
"ns"
))
is
not
None
else
""
miriamnamespace
=
belnamespace
elementName
=
""
# function@TERM
elementAnnotation
=
""
# ns@PARAMETER+text
#get element
# check for abundance element name from namespace
if
element
==
"abundance"
:
element
=
abundanceDict
[
element
]
if
element
in
entityDict
.
keys
():
elementName
=
entityDict
[
element
]
else
:
elementName
=
element
#add not handled entity to file
check_to_add_in_file
(
element
,
"outputs/additional/toMapEntities.txt"
)
if
namespaceDict
.
has_key
(
belnamespace
):
miriamnamespace
=
namespaceDict
[
belnamespace
]
elif
belnamespace
is
not
""
:
#add not handled namespace to file
check_to_add_in_file
(
belnamespace
,
"outputs/additional/toAddNamespaces.txt"
)
elementAnnotation
=
parameter
.
text
+
"
\t
"
+
miriamnamespace
+
":"
+
parameter
.
text
entry
=
elementName
+
"
\t
"
+
elementAnnotation
return
entry
except
Exception
as
ExceptionTerm
:
print
"!!! FUNC:get_entry_for_term!!!"
print
str
(
ExceptionTerm
.
args
)
#func to get parameters(function, name and namespace) for elements (any node SUBJECT,OBJECT, MODIFIER(if nested))
def
get_details_from_node
(
speciesMap
,
node
):
try
:
returnVal
=
[]
term
=
node
.
find
(
ns_tag
(
"term"
))
leafTerm
=
get_leafTerm
(
term
)
nodeEntry
=
get_entry_for_term
(
leafTerm
)
returnVal
=
getSpecies
(
nodeEntry
,
speciesMap
)
#add entry or retrieve existing Id
return
returnVal
except
Exception
as
detailsEx
:
print
"!!! FUNC:get_details_from_node!!!"
print
str
(
detailsEx
.
args
)
#Main function to convert- starts here
def
convertXBEL
(
filepath
):
# create new output file 1.nodes.txt 2.reactions.txt
print
filepath
nodeFile
=
"outputs/nodes.txt"
nFile
=
open
(
nodeFile
,
"w"
)
nFile
.
write
(
"IDENTIFIER
\t
TYPE
\t
NAME
\t
URN
\n
"
)
nFile
.
close
()
reactionFile
=
"outputs/reactions.txt"
rFile
=
open
(
reactionFile
,
"w"
)
rFile
.
write
(
"IDENTIFIER
\t
TYPE
\t
REACTANTS
\t
MODIFIERS
\t
PRODUCTS
\t
MODIFIER_TYPE
\t
SPECIES
\t
DISEASE
\t
CELL
\t
TISSUE
\t
CELLSTRUCTURE
\t
CELLLINE
\t
OTHER_ANNOTATION
\n
"
)
rFile
.
close
()
try
:
tree
=
ET
.
parse
(
filepath
)
root
=
tree
.
getroot
()
# create new files
f
=
open
(
"outputs/additional/toMapEntities.txt"
,
"w"
)
f
.
close
()
f
=
open
(
"outputs/additional/toMapReactions.txt"
,
"w"
)
f
.
close
()
# LOAD namespace DIctionary
namespaceDict
=
NT
.
getDict
(
"inputs/namespaceDictFile.txt"
)
reactionDict
=
NT
.
getDict
(
"inputs/reactionDictFile.txt"
)
#create dictionary for species
speciesMap
=
{}
# get all statement nodes, annotation and sub nodes (complex, or composite structures)
reactionIdentifier
=
0
expStatementFile
=
"outputs/additional/exceptionalStatements.txt"
file
=
open
(
expStatementFile
,
'w'
)
file
.
write
(
"GROUP|STATEMENT_NUMBER|RELATIONSHIP|EVIDENCE
\n
"
)
file
.
close
()
evidence
=
""
for
index
,
statementGr
in
enumerate
(
root
.
iter
(
tag
=
ns_tag
(
'statementGroup'
))):
for
statement_num
,
statement
in
enumerate
(
statementGr
.
findall
(
ns_tag
(
'statement'
))):
print
"
\n\n\n
############################################"
print
"GROUP"
+
str
(
index
+
1
)
+
" :: STATEMENT "
+
str
(
statement_num
+
1
)
print
"############################################"
reactionIdentifier
+=
1
# statement number
##annotation for evidence statement: pubmedid, journal, evidence , species , cellLine
annoGroup
=
statement
.
find
(
ns_tag
(
'annotationGroup'
))
# get_node_info_recursive(annoGroup)
##get info on the relationship -CURRENTLY :only reference, AVIALABLE :species,and diseases, cellline..
relationship
=
statement
.
get
(
ns_tag
(
"relationship"
))
#if statement.get(ns_tag("relationship")) is not None else None)
evidence
=
annoGroup
.
find
(
ns_tag
(
"evidence"
)).
text
if
annoGroup
is
not
None
else
evidence
citation
=
annoGroup
.
find
(
ns_tag
(
"citation"
))
if
annoGroup
is
not
None
else
citation
referenceType
=
citation
.
get
(
ns_tag
(
"type"
))
if
annoGroup
is
not
None
else
referenceType
referenceType
=
namespaceDict
.
get
(
referenceType
)
if
namespaceDict
.
get
(
referenceType
)
is
not
None
else
referenceType
reference
=
citation
.
find
(
ns_tag
(
"reference"
)).
text
if
annoGroup
is
not
None
else
reference
annoMap
=
get_annotation_details
(
annoGroup
)
if
annoGroup
is
not
None
else
annoMap
species
=
get_annotation_from_element
(
annoMap
,
"SPECIES"
)
disease
=
get_annotation_from_element
(
annoMap
,
"DISEASE"
)
cell
=
get_annotation_from_element
(
annoMap
,
"CELL"
)
tissue
=
get_annotation_from_element
(
annoMap
,
"TISSUE"
)
cellline
=
get_annotation_from_element
(
annoMap
,
"CELLLINE"
)
cellstructure
=
get_annotation_from_element
(
annoMap
,
"CELLSTRUCTURE"
)
annoEntry
=
species
+
"
\t
"
+
disease
+
"
\t
"
+
cell
+
"
\t
"
+
tissue
+
"
\t
"
+
cellstructure
+
"
\t
"
+
cellline
keyList
=
annoMap
.
keys
()
otherKeys
=
set
(
keyList
)
-
set
([
'SPECIES'
,
'DISEASE'
,
'CELL'
,
'TISSUE'
,
'CELLSTRUCTURE'
,
'CELLLINE'
])
#print otherKeys
otherAnnotation
=
[
each
+
":"
+
annoMap
[
each
]
for
each
in
otherKeys
]
print
"EVIDENCE: "
+
evidence
reactant
=
""
modifier
=
""
product
=
""
modifierType
=
""
#nFile = open(nodeFile, "a")
subject
=
statement
.
find
(
ns_tag
(
"subject"
))
object
=
statement
.
find
(
ns_tag
(
"object"
))
nested
=
object
.
find
(
ns_tag
(
"statement"
))
if
object
is
not
None
else
None
#IF RELATIONSHIP IS MEPTY, check for subject node type is reaction
if
relationship
is
None
:
relationship
=
subject
.
find
(
ns_tag
(
'term'
)).
get
(
ns_tag
(
'function'
))
print
"------RELATIONSHIP------
\n
"
+
relationship
#entityDict = NT.getdict("inputs/EntityDictFile.txt")
#OBJECT IS NOT NONE : Statement has atleast reactnat and product
if
object
is
not
None
:
if
nested
is
not
None
:
#print "Get modifier, subject and object"
try
:
modObject
=
subject
subject
=
nested
.
find
(
ns_tag
(
"subject"
))
object
=
nested
.
find
(
ns_tag
(
"object"
))
modifierType
=
relationship
relationship
=
nested
.
get
(
ns_tag
(
"relationship"
))
# if nested.get(ns_tag("relationship")) is not None else "complexAbundance")
print
"MODIFIER :"
returnedVal
=
get_details_from_node
(
speciesMap
,
modObject
)
speciesMap
=
returnedVal
[
1
]
modifier
=
returnedVal
[
0
]
except
Exception
as
nestedException
:
print
"!!! EXCEPTION in nested statement!!!"
print
" !!!EXCEPTION!!! : "
+
str
(
nestedException
.
args
)
exit
()
#REACTANT AND PRODUCTS
print
"REACTANT :"
returnedVal
=
get_details_from_node
(
speciesMap
,
subject
)
speciesMap
=
returnedVal
[
1
]
reactant
=
returnedVal
[
0
]
print
"PRODUCT :"
returnedVal
=
get_details_from_node
(
speciesMap
,
object
)
speciesMap
=
returnedVal
[
1
]
product
=
returnedVal
[
0
]
#if reaction is ComplexAbundance
elif
relationship
==
"complexAbundance"
:
complexTerms
=
subject
.
find
(
ns_tag
(
'term'
)).
findall
(
ns_tag
(
'term'
))
complexList
=
[]
for
each
in
complexTerms
:
nodeEntry
=
get_entry_for_term
(
each
)
returnedVal
=
getSpecies
(
nodeEntry
,
speciesMap
)
complexList
.
append
(
returnedVal
[
0
])
#print "Handle complex"
reactant
=
','
.
join
(
complexList
)
# if reaction is reaction: then get list of reactnats and products
elif
relationship
==
'reaction'
:
reactionTerms
=
subject
.
find
(
ns_tag
(
'term'
)).
findall
(
ns_tag
(
'term'
))
for
term
in
reactionTerms
:
part
=
term
.
get
(
ns_tag
(
'function'
))
if
part
==
"reactants"
:
reactantTerms
=
term
.
findall
(
ns_tag
(
'term'
))
reactantList
=
[]
for
each
in
reactantTerms
:
nodeEntry
=
get_entry_for_term
(
each
)
returnedVal
=
getSpecies
(
nodeEntry
,
speciesMap
)
reactantList
.
append
(
returnedVal
[
0
])
print
"Handle complex"
reactant
=
','
.
join
(
reactantList
)
if
part
==
"products"
:
productTerms
=
term
.
findall
(
ns_tag
(
'term'
))
productList
=
[]
for
each
in
productTerms
:
nodeEntry
=
get_entry_for_term
(
each
)
returnedVal
=
getSpecies
(
nodeEntry
,
speciesMap
)
productList
.
append
(
returnedVal
[
0
])
print
"Handle complex"
product
=
','
.
join
(
productList
)
print
"REACTION : "
reactionEntry
=
"r"
+
str
(
reactionIdentifier
)
+
"
\t
"
+
relationship
+
"
\t
"
+
reactant
+
"
\t
"
+
modifier
+
"
\t
"
+
product
+
"
\t
"
+
modifierType
+
"
\t
"
+
species
+
"
\t
"
+
referenceType
+
":"
+
reference
+
"
\t
"
+
annoEntry
+
"
\t
"
+
str
(
otherAnnotation
)
+
"
\n
"
print
reactionEntry
reactionFile
=
open
(
"outputs/reactions.txt"
,
'a'
)
reactionFile
.
write
(
reactionEntry
)
reactionFile
.
close
()
except
Exception
as
ex
:
f
=
open
(
expStatementFile
,
'a'
)
f
.
write
(
str
(
index
)
+
"|"
+
str
(
statement_num
)
+
"|"
+
str
(
relationship
)
+
"|"
+
evidence
+
"
\n
"
)
f
.
close
()
print
" !!!EXCEPTION!!! : "
+
str
(
ex
.
args
)
exit
()
# Convert file
# /home/alex/PhD/Bel2CellD/full_abstract3.xbel
#convertXBEL('/home/alex/PhD/Bel2CellD/small_corpus.xbel')
convertXBEL
(
'/home/alex/PhD/APP/Aetionomy_AD.xbel'
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment