Gitlab is now using https://gitlab.lcsb.uni.lu as it's primary address. Please update your bookmarks. FAQ.

Commit 49d6fbb9 authored by Aishwarya Alex's avatar Aishwarya Alex
Browse files

reformated- moved location and context to reaction from entities

parent f62aa697
#!/usr/bin/python
##############################################################################################
# title :xBEL2CellD.py
# description :This script will generate 2 tab delimited files(reactions.txt, nodes.txt) from an .XBEL file
# author :Aishwarya Alex
# date :20110930
# version :0.1
# usage :python pyscript.py
# notes :
# python_version :2.7.10
##############################################################################################
# Import the modules needed to run the script.
import string
import xml.etree.ElementTree as ET
import namespaceTranslator as NT
# tagNamespacePrefix='{http://belframework.org/schema/1.0/xbel}'
# function to attach namespace to tag
def ns_tag(tag):
return str(ET.QName('{http://belframework.org/schema/1.0/xbel}' + tag))
# function to remove namespace from tag
def remove_ns_tag(tag):
return string.replace(str(tag), '{http://belframework.org/schema/1.0/xbel}', '')
def check_to_add_in_file(entry,filename):
alreadyIn = [line.strip() for line in open(filename, "r")]
if entry not in alreadyIn:
f = open(filename, "a")
f.write(entry + "\n")
f.close()
# function to get annotationMap
def get_annotation_details(nodeAnnoGroup):
annoMap = {}
for anno in nodeAnnoGroup.iter(tag=ns_tag('annotation')):
# print (anno.get(ns_tag("refID")).upper(),anno.text)
annoMap[anno.get(ns_tag("refID")).upper()] = anno.text
return annoMap
# function to get annotation elelment from map
def get_annotation_from_element(annoMap, annoElement):
anno = annoMap.get(annoElement) if annoMap.get(annoElement) is not None else ""
return anno
# function to find leafterm (term with function and parameter
def get_leafTerm(term):
leafterm= term
element = term.get(ns_tag('function')) if term.get(ns_tag("function")) is not None else None
parameter = term.find(ns_tag("parameter")) if term.find(ns_tag("parameter")) is not None else None
if element is not None and parameter is None:
check_to_add_in_file(element, "outputs/additional/skippedLevels.txt")
nextTerm = term.find(ns_tag("term"))
return get_leafTerm(nextTerm)
return leafterm
#function to check id species exists, if not create entry, else return id
# function to get subject and object (and modifier) enteries.
def getSpecies(nodeEntry, speciesDict):
nodeFile = "outputs/nodes.txt"
speciesId=speciesDict.get(nodeEntry)
#print "speciesId retrieved :"+str(speciesId)
totalSpecies= len(speciesDict)
returnVal=[]
if speciesId is None:
totalSpecies = totalSpecies + 1
speciesId = "s" + str(totalSpecies)
speciesDict[nodeEntry] = speciesId
nodeFile=open("outputs/nodes.txt",'a')
nodeEntry = speciesId + "\t" + nodeEntry + "\n"
nodeFile.write(nodeEntry)
nodeFile.close()
print speciesId+" : " +nodeEntry
returnVal.append(speciesId)
returnVal.append(speciesDict)
return returnVal
#GET DETAILS FROM TERM FUNCTION AND PARameter
def get_entry_for_term(term):
try:
entry=""
entityDict = NT.getDict("inputs/EntityDictFile.txt")
abundanceDict= NT.getDict("inputs/abundanceEntity.txt")
namespaceDict = NT.getDict("inputs/namespaceDictFile.txt")
element = term.get(ns_tag('function'))
parameter = term.find(ns_tag("parameter"))
belnamespace = parameter.get(ns_tag("ns")) if parameter.get(ns_tag("ns")) is not None else ""
miriamnamespace = belnamespace
elementName = "" # function@TERM
elementAnnotation = "" # ns@PARAMETER+text
#get element
# check for abundance element name from namespace
if element=="abundance":
element=abundanceDict[element]
if element in entityDict.keys():
elementName = entityDict[element]
else:
elementName = element
#add not handled entity to file
check_to_add_in_file(element,"outputs/additional/toMapEntities.txt")
if namespaceDict.has_key(belnamespace):
miriamnamespace = namespaceDict[belnamespace]
elif belnamespace is not "":
#add not handled namespace to file
check_to_add_in_file(belnamespace,"outputs/additional/toAddNamespaces.txt")
elementAnnotation = parameter.text + "\t" + miriamnamespace + ":" + parameter.text
entry=elementName + "\t" + elementAnnotation
return entry
except Exception as ExceptionTerm:
print "!!! FUNC:get_entry_for_term!!!"
print str(ExceptionTerm.args)
#func to get parameters(function, name and namespace) for elements (any node SUBJECT,OBJECT, MODIFIER(if nested))
def get_details_from_node(speciesMap,node):
try:
returnVal = []
term = node.find(ns_tag("term"))
leafTerm=get_leafTerm(term)
nodeEntry=get_entry_for_term(leafTerm)
returnVal=getSpecies(nodeEntry,speciesMap) #add entry or retrieve existing Id
return returnVal
except Exception as detailsEx:
print "!!! FUNC:get_details_from_node!!!"
print str(detailsEx.args)
#Main function to convert- starts here
def convertXBEL(filepath):
# create new output file 1.nodes.txt 2.reactions.txt
print filepath
nodeFile = "outputs/nodes.txt"
nFile = open(nodeFile, "w")
nFile.write("IDENTIFIER\tTYPE\tNAME\tURN\n")
nFile.close()
reactionFile = "outputs/reactions.txt"
rFile = open(reactionFile, "w")
rFile.write("IDENTIFIER\tTYPE\tREACTANTS\tMODIFIERS\tPRODUCTS\tMODIFIER_TYPE\tSPECIES\tDISEASE\tCELL\tTISSUE\tCELLSTRUCTURE\tCELLLINE\tOTHER_ANNOTATION\n")
rFile.close()
try:
tree = ET.parse(filepath)
root = tree.getroot()
# create new files
f = open("outputs/additional/toMapEntities.txt", "w")
f.close()
f = open("outputs/additional/toMapReactions.txt", "w")
f.close()
# LOAD namespace DIctionary
namespaceDict = NT.getDict("inputs/namespaceDictFile.txt")
reactionDict = NT.getDict("inputs/reactionDictFile.txt")
#create dictionary for species
speciesMap={}
# get all statement nodes, annotation and sub nodes (complex, or composite structures)
reactionIdentifier = 0
expStatementFile = "outputs/additional/exceptionalStatements.txt"
file = open(expStatementFile, 'w')
file.write("GROUP|STATEMENT_NUMBER|RELATIONSHIP|EVIDENCE\n")
file.close()
evidence = ""
for index, statementGr in enumerate(root.iter(tag=ns_tag('statementGroup'))):
for statement_num, statement in enumerate(statementGr.findall(ns_tag('statement'))):
print "\n\n\n############################################"
print "GROUP" + str(index + 1) + " :: STATEMENT " + str(statement_num + 1)
print "############################################"
reactionIdentifier += 1 # statement number
##annotation for evidence statement: pubmedid, journal, evidence , species , cellLine
annoGroup = statement.find(ns_tag('annotationGroup'))
# get_node_info_recursive(annoGroup)
##get info on the relationship -CURRENTLY :only reference, AVIALABLE :species,and diseases, cellline..
relationship = statement.get(ns_tag("relationship")) #if statement.get(ns_tag("relationship")) is not None else None)
evidence = annoGroup.find(ns_tag("evidence")).text if annoGroup is not None else evidence
citation = annoGroup.find(ns_tag("citation")) if annoGroup is not None else citation
referenceType = citation.get(ns_tag("type")) if annoGroup is not None else referenceType
referenceType=namespaceDict.get(referenceType) if namespaceDict.get(referenceType) is not None else referenceType
reference = citation.find(ns_tag("reference")).text if annoGroup is not None else reference
annoMap = get_annotation_details(annoGroup) if annoGroup is not None else annoMap
species = get_annotation_from_element(annoMap, "SPECIES")
disease = get_annotation_from_element(annoMap, "DISEASE")
cell = get_annotation_from_element(annoMap, "CELL")
tissue = get_annotation_from_element(annoMap, "TISSUE")
cellline = get_annotation_from_element(annoMap, "CELLLINE")
cellstructure = get_annotation_from_element(annoMap, "CELLSTRUCTURE")
annoEntry = species + "\t" + disease + "\t" + cell + "\t" + tissue + "\t" + cellstructure + "\t" + cellline
keyList = annoMap.keys()
otherKeys = set(keyList) - set(['SPECIES', 'DISEASE', 'CELL', 'TISSUE', 'CELLSTRUCTURE', 'CELLLINE'])
#print otherKeys
otherAnnotation = [each + ":" + annoMap[each] for each in otherKeys]
print "EVIDENCE: " + evidence
reactant = ""
modifier = ""
product = ""
modifierType = ""
#nFile = open(nodeFile, "a")
subject=statement.find(ns_tag("subject"))
object =statement.find(ns_tag("object"))
nested=object.find(ns_tag("statement")) if object is not None else None
#IF RELATIONSHIP IS MEPTY, check for subject node type is reaction
if relationship is None:
relationship = subject.find(ns_tag('term')).get(ns_tag('function'))
print "------RELATIONSHIP------\n" + relationship
#entityDict = NT.getdict("inputs/EntityDictFile.txt")
#OBJECT IS NOT NONE : Statement has atleast reactnat and product
if object is not None:
if nested is not None:
#print "Get modifier, subject and object"
try:
modObject=subject
subject=nested.find(ns_tag("subject"))
object =nested.find(ns_tag("object"))
modifierType=relationship
relationship=nested.get(ns_tag("relationship")) # if nested.get(ns_tag("relationship")) is not None else "complexAbundance")
print "MODIFIER :"
returnedVal = get_details_from_node(speciesMap, modObject)
speciesMap = returnedVal[1]
modifier = returnedVal[0]
except Exception as nestedException:
print "!!! EXCEPTION in nested statement!!!"
print " !!!EXCEPTION!!! : " + str(nestedException.args)
exit()
#REACTANT AND PRODUCTS
print "REACTANT :"
returnedVal=get_details_from_node(speciesMap,subject)
speciesMap=returnedVal[1]
reactant=returnedVal[0]
print "PRODUCT :"
returnedVal=get_details_from_node(speciesMap,object)
speciesMap = returnedVal[1]
product = returnedVal[0]
#if reaction is ComplexAbundance
elif relationship == "complexAbundance" :
complexTerms=subject.find(ns_tag('term')).findall(ns_tag('term'))
complexList=[]
for each in complexTerms:
nodeEntry = get_entry_for_term(each)
returnedVal = getSpecies(nodeEntry, speciesMap)
complexList.append(returnedVal[0])
#print "Handle complex"
reactant=','.join(complexList)
# if reaction is reaction: then get list of reactnats and products
elif relationship== 'reaction':
reactionTerms=subject.find(ns_tag('term')).findall(ns_tag('term'))
for term in reactionTerms:
part = term.get(ns_tag('function'))
if part == "reactants":
reactantTerms= term.findall(ns_tag('term'))
reactantList=[]
for each in reactantTerms:
nodeEntry = get_entry_for_term(each)
returnedVal = getSpecies(nodeEntry, speciesMap)
reactantList.append(returnedVal[0])
print "Handle complex"
reactant = ','.join(reactantList)
if part == "products":
productTerms= term.findall(ns_tag('term'))
productList=[]
for each in productTerms:
nodeEntry = get_entry_for_term(each)
returnedVal = getSpecies(nodeEntry, speciesMap)
productList.append(returnedVal[0])
print "Handle complex"
product = ','.join(productList)
print "REACTION : "
reactionEntry = "r" + str(
reactionIdentifier) + "\t" + relationship + "\t" + reactant + "\t" + modifier + "\t" + product + "\t" + modifierType + "\t" + species + "\t" + referenceType + ":" + reference + "\t" + annoEntry + "\t" + str(
otherAnnotation) + "\n"
print reactionEntry
reactionFile = open("outputs/reactions.txt", 'a')
reactionFile.write(reactionEntry)
reactionFile.close()
except Exception as ex:
f = open(expStatementFile, 'a')
f.write(str(index) +"|"+str(statement_num)+"|"+str(relationship)+"|"+evidence+"\n")
f.close()
print " !!!EXCEPTION!!! : " + str(ex.args)
exit()
# Convert file
# /home/alex/PhD/Bel2CellD/full_abstract3.xbel
#convertXBEL('/home/alex/PhD/Bel2CellD/small_corpus.xbel')
convertXBEL('/home/alex/PhD/APP/Aetionomy_AD.xbel')
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment