Gitlab is now using https://gitlab.lcsb.uni.lu as it's primary address. Please update your bookmarks. FAQ.

Commit 7701705d authored by Aishwarya Alex's avatar Aishwarya Alex
Browse files

Working for small corpus,

with example input and output files
parent 3e7bd674
xBELtoCellD
\ No newline at end of file
<component name="ProjectDictionaryState">
<dictionary name="alex">
<words>
<w>aishwarya</w>
</words>
</dictionary>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Encoding">
<file url="PROJECT" charset="UTF-8" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectLevelVcsManager" settingsEditedManually="false">
<OptionsSetting value="true" id="Add" />
<OptionsSetting value="true" id="Remove" />
<OptionsSetting value="true" id="Checkout" />
<OptionsSetting value="true" id="Update" />
<OptionsSetting value="true" id="Status" />
<OptionsSetting value="true" id="Edit" />
<ConfirmationsSetting value="0" id="Add" />
<ConfirmationsSetting value="0" id="Remove" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7.10 (/usr/bin/python2.7)" project-jdk-type="Python SDK" />
<component name="masterDetails">
<states>
<state key="ScopeChooserConfigurable.UI">
<settings>
<splitter-proportions>
<option name="proportions">
<list>
<option value="0.2" />
</list>
</option>
</splitter-proportions>
</settings>
</state>
</states>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/xBELtoCellD.iml" filepath="$PROJECT_DIR$/.idea/xBELtoCellD.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
<option name="projectConfiguration" value="Nosetests" />
<option name="PROJECT_TEST_RUNNER" value="Nosetests" />
</component>
</module>
\ No newline at end of file
......@@ -2,4 +2,6 @@ proteinAbundance PROTEIN
pathology PHENOTYPE
biologicalProcess PHENOTYPE
rnaAbundance RNA
microRNAAbundance RNA
\ No newline at end of file
microRNAAbundance RNA
geneAbundance GENE
abundance SIMPLE_MOLECULE
\ No newline at end of file
increases UNKNOWKN_POSTIVE_INFLUENCE
directlyIncreases POSTIVE_INFLUENCE
\ No newline at end of file
increases UNKNOWN_POSTIVE_INFLUENCE
directlyIncreases POSTIVE_INFLUENCE
directlyDecreases NEGATIVE_INFLUENCE
decreases UNKNOWN_NEGATIVE_INFLUENCE
positiveCorrelation UNKNOWN_REDUCED_MODULATION
negativeCorrelation UNKNOWN_REDUCED_MODULATION
\ No newline at end of file
This diff is collapsed.
SCHEM
CHEBI
PFH
MGI
NCM
PFM
CHEBIID
NCH
SDIS
RGD
NCR
PFR
SPAC
SFAM
SCOMP
GOBP
catalyticActivity
peptidaseActivity
complexAbundance
kinaseActivity
transcriptionalActivity
molecularActivity
degradation
gtpBoundActivity
cellSurfaceExpression
translocation
list
phosphataseActivity
reaction
reactants
compositeAbundance
cellSecretion
transportActivity
causesNoChange
Complex
association
hasMembers
isA
subProcessOf
hasComponent
prognosticBiomarkerFor
hasComponents
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
proteinAbundance PROTEIN
pathology PHENOTYPE
biologicalProcess PHENOTYPE
rnaAbundance RNA
microRNAAbundance RNA
geneAbundance GENE
abundance SIMPLE_MOLECULE
\ No newline at end of file
HGNC urn:miriam:hgnc.symbol
PubMed urn:miriam:pubmed
GO urn:miriam:go
MESHPP urn:miriam:mesh.2012
MESHCL urn:miriam:mesh.2012
MESHD urn:miriam:mesh.2012
CHEBIID urn:miriam:chebi:CHEBI
\ No newline at end of file
increases UNKNOWN_POSTIVE_INFLUENCE
directlyIncreases POSTIVE_INFLUENCE
directlyDecreases NEGATIVE_INFLUENCE
decreases UNKNOWN_NEGATIVE_INFLUENCE
positiveCorrelation UNKNOWN_REDUCED_MODULATION
negativeCorrelation UNKNOWN_REDUCED_MODULATION
\ No newline at end of file
This diff is collapsed.
SCHEM
CHEBI
PFH
MGI
NCM
PFM
CHEBIID
NCH
SDIS
RGD
NCR
PFR
SPAC
SFAM
SCOMP
GOBP
catalyticActivity
peptidaseActivity
complexAbundance
kinaseActivity
transcriptionalActivity
molecularActivity
degradation
gtpBoundActivity
cellSurfaceExpression
translocation
list
phosphataseActivity
reaction
reactants
compositeAbundance
cellSecretion
transportActivity
causesNoChange
Complex
association
hasMembers
isA
subProcessOf
hasComponent
prognosticBiomarkerFor
hasComponents
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
......@@ -15,6 +15,7 @@ import string
import xml.etree.ElementTree as ET
import namespaceTranslator as NT
# tagNamespacePrefix='{http://belframework.org/schema/1.0/xbel}'
# function to attach namespace to tag
def ns_tag(tag):
......@@ -41,121 +42,155 @@ def get_tree_level(element, level=0, nodelist=[]):
get_tree_level(child, level + 1, nodelist)
# function to get annotationMap
def get_annotation_details(nodeAnnoGroup):
annoMap = {}
for anno in nodeAnnoGroup.iter(tag=ns_tag('annotation')):
# print (anno.get(ns_tag("refID")).upper(),anno.text)
annoMap[anno.get(ns_tag("refID")).upper()] = anno.text
return annoMap
# function to get tag, attrib and values from current node and its children
def get_node_info_recursive(node):
info=""
info = ""
if node is None:
return None
for child in node:
attributes = child.attrib
if bool(attributes) is True:
for key, value in attributes.iteritems():
info=str( remove_ns_tag(child.tag).upper() + " || " + remove_ns_tag(key) + " :: " + value + " || " + child.text)
info = str(remove_ns_tag(child.tag).upper() + " || " + remove_ns_tag(
key) + " :: " + value + " || " + child.text)
else:
info=str( remove_ns_tag(child.tag).upper() + " : " + child.text if child.text is not None else "")
info = str(remove_ns_tag(child.tag).upper() + " : " + child.text if child.text is not None else "")
get_node_info_recursive(child)
return info
# function to get annotation elelment from map
def getAnnoElement(annoMap, annoElement):
anno = annoMap.get(annoElement) if annoMap.get(annoElement) is not None else ""
return anno
# function to get subject and object (and modifier) enteries.
def getSpecies(nodeEntry, speciesDict):
speciesId=speciesDict.get(nodeEntry)
print "speciesId retrieved :"+str(speciesId)
if speciesId is not None:
return speciesId
else:
return None
# function to get parameters for elements (SUBJECT,OBJECT, MODIFIER)
# TO DO: add MODIFIER if SUBJECT/OBJECT is nested
def get_details_from_element(node, index, statement_num, evidence, expStatementFile):
try:
term = node.find(ns_tag("term"))
elementName = "" # function@TERM
elementAnnotation = "" # ns@PARAMETER+text
details = "EMPTY"
entityDict=NT.getEntityDict()
entityDict = NT.getEntityDict()
if term is not None:
element = term.get(ns_tag('function'))
parameter = term.find(ns_tag("parameter"))
if element in entityDict.keys():
elementName=entityDict[element]
elementName = entityDict[element]
else:
elementName=element
#LOAD entity DIctionary
alreadyIn=[line.strip() for line in open("outputs/additional/toMapEntities.txt","r")]
elementName = element
# LOAD entity DIctionary
alreadyIn = [line.strip() for line in open("outputs/additional/toMapEntities.txt", "r")]
if element not in alreadyIn:
f=open("outputs/additional/toMapEntities.txt","a")
f.write(element+"\n")
f = open("outputs/additional/toMapEntities.txt", "a")
f.write(element + "\n")
f.close()
if parameter is not None:
belnamespace=parameter.get(ns_tag("ns"))
namespaceDict=NT.getNamespaceDict()
miriamnamespace=belnamespace
belnamespace = parameter.get(ns_tag("ns"))
namespaceDict = NT.getNamespaceDict()
miriamnamespace = belnamespace
if namespaceDict.has_key(belnamespace):
miriamnamespace=namespaceDict[belnamespace]
miriamnamespace = namespaceDict[belnamespace]
else:
alreadyIn=[line.strip() for line in open("outputs/additional/toAddNamespaces.txt","r")]
alreadyIn = [line.strip() for line in open("outputs/additional/toAddNamespaces.txt", "r")]
if belnamespace not in alreadyIn:
f=open("outputs/additional/toAddNamespaces.txt","a")
f.write(belnamespace+"\n")
f = open("outputs/additional/toAddNamespaces.txt", "a")
f.write(belnamespace + "\n")
f.close()
elementAnnotation = parameter.text + "\t" + miriamnamespace+":"+parameter.text
elementAnnotation = parameter.text + "\t" + miriamnamespace + ":" + parameter.text
else:
print remove_ns_tag(node.tag).upper() + " is nested WITH A TERM ## ?? activity(object/subject)/complex"
nestedTerm = term.find(ns_tag("term"))
print get_node_info_recursive(node)
entry = str(index+1) + "|" + str(statement_num+1) + "|" + str(elementName + " :" + elementAnnotation) + "|" + str(get_node_info_recursive(node)).replace('|', ' ')+ "|" + str(evidence.encode('utf-8').replace("\r\n","")+"\n")
#print entry
file = open(expStatementFile, 'a')
file.write(entry)
file.close()
entry = str(index + 1) + "|" + str(statement_num + 1) + "|" + str(
elementName + " :" + elementAnnotation) + "|" + str(get_node_info_recursive(node)).replace('|',' ') + "|" + str(evidence.encode('utf-8').replace("\r\n", "") + "\n")
# print entry
f = open(expStatementFile, 'a')
f.write(entry)
f.close()
if "Activity" in elementName:
print "Skipped one level term with Activity:" + elementName
else:
print "Skipped one level :" + elementName
#exit()
# exit()
if nestedTerm is not None:
details = get_details_from_element(term, index, statement_num, evidence, expStatementFile)
return details
details = get_details_from_element(term, index, statement_num, evidence, expStatementFile)
return details
else:
print remove_ns_tag(node.tag).upper() + " is nested WITH A STATEMENT ## add as modifier to nested statement(reaction)"
print "----PRINTING NODE INFO RECURSIVELY"
entry = str(index+1) + "|" + str(statement_num+1) + "|" + str(elementName + " :" + elementAnnotation) + "|" +str(get_node_info_recursive(term)).replace('|',' ') +"|" + str(evidence.encode('utf-8').replace("\r\n","")+"\n")
print entry
file = open(expStatementFile, 'a')
file.write(entry)
file.close()
entry = str(index + 1) + "|" + str(statement_num + 1) + "|" + str(
elementName + " :" + elementAnnotation) + "|" + str(get_node_info_recursive(term)).replace('|',' ') + "|" + str(evidence.encode('utf-8').replace("\r\n", "") + "\n")
# print entry
f = open(expStatementFile, 'a')
f.write(entry)
f.close()
# get_node_info_recursive(node)
details = (elementName + "\t" + elementAnnotation) if elementAnnotation and elementName is not "" else details
return details
except AttributeError as elementEror:
print "!!!!!! EXCEPTION @ FUNC-get_details_from_element:\n" + str(elementEror)
def convertXBEL(filepath):
#create new output file 1.nodes.txt 2.reactions.txt
nodeFile="outputs/nodes.txt"
nFile=open(nodeFile,"w")
reactionFile="outputs/reactions.txt"
rFile=open(reactionFile,"w")
# create new output file 1.nodes.txt 2.reactions.txt
nodeFile = "outputs/nodes.txt"
nFile = open(nodeFile, "w")
nFile.write("IDENTIFIER\tTYPE\tNAME\tURN\tSPECIES\tDISEASE\tCELL\tTISSUE\tCELLSTRUCTURE\tCELLLINE\tOTHER_ANNOTATION\n")
nFile.close()
reactionFile = "outputs/reactions.txt"
rFile = open(reactionFile, "w")
rFile.write("IDENTIFIER\tTYPE\tREACTANTS\tMODIFIERS\tPRODUCTS\tMODIFIER_TYPE\tSPECIES\tANNOTATIONS\n")
rFile.close()
tree = ET.parse(filepath)
root = tree.getroot()
#create new files
f=open("outputs/additional/toMapEntities.txt","w")
# create new files
f = open("outputs/additional/toMapEntities.txt", "w")
f.close()
f = open("outputs/additional/toMapReactions.txt", "w")
f.close()
#LOAD namespace DIctionary
namespaceDict=NT.getNamespaceDict()
reactionDict=NT.getReactionDict()
# LOAD namespace DIctionary
namespaceDict = NT.getNamespaceDict()
reactionDict = NT.getReactionDict()
# get all statement nodes, annotation and sub nodes (complex, or composite structures)
reactionIdentifier = 0
totalSpecies = 0
expStatementFile="outputs/additional/exceptionalStatements.txt"
expStatementFile = "outputs/additional/exceptionalStatements.txt"
file = open(expStatementFile, 'w')
file.write("GROUP|STATEMENT_NUMBER|DETAILS|NODEINFO|EVIDENCE\n")
file.close()
evidence = ""
speciesMap={}
for index, statementGr in enumerate(root.iter(tag=ns_tag('statementGroup'))):
for statement_num, statement in enumerate(statementGr.iter(tag=ns_tag('statement'))):
for statement_num, statement in enumerate(statementGr.findall(ns_tag('statement'))):
print "\n\n\n############################################"
print "GROUP" + str(index + 1) + " :: STATEMENT " + str(statement_num + 1) + " \t\t\t**" + str(reactionIdentifier)
print "############################################"
......@@ -166,48 +201,112 @@ def convertXBEL(filepath):
# get_node_info_recursive(annoGroup)
##get info on the relationship -CURRENTLY :only reference, AVIALABLE :species,and diseases, cellline..
relationship = (statement.get(ns_tag("relationship")) if statement.get(ns_tag("relationship")) is not None else "Complex")
evidence = annoGroup.find(ns_tag("evidence")).text
print "EVIDENCE: "+evidence
evidence = annoGroup.find(ns_tag("evidence")).text if annoGroup is not None else evidence
citation = annoGroup.find(ns_tag("citation")) if annoGroup is not None else citation
referenceType = citation.get(ns_tag("type")) if annoGroup is not None else referenceType
reference = citation.find(ns_tag("reference")).text if annoGroup is not None else reference
annoMap = get_annotation_details(annoGroup) if annoGroup is not None else annoMap
species = getAnnoElement(annoMap, "SPECIES")
disease = getAnnoElement(annoMap, "DISEASE")
cell = getAnnoElement(annoMap, "CELL")
tissue = getAnnoElement(annoMap, "TISSUE")
cellline = getAnnoElement(annoMap, "CELLLINE")
cellstructure = getAnnoElement(annoMap, "CELLSTRUCTURE")
annoEntry = species + "\t" + disease + "\t" + cell + "\t" + tissue + "\t" + cellstructure + "\t" + cellline
keyList = annoMap.keys()
otherKeys = set(keyList) - set(['SPECIES', 'DISEASE', 'CELL', 'TISSUE', 'CELLSTRUCTURE', 'CELLLINE'])
print otherKeys
otherAnnotation = [each + ":" + annoMap[each] for each in otherKeys]
print "EVIDENCE: " + evidence
print "------RELATIONSHIP------\n" + relationship
citation = annoGroup.find(ns_tag("citation"))
referenceType = citation.get(ns_tag("type"))
reference = citation.find(ns_tag("reference")).text
reactant = ""
modifier = ""
product = ""
for each in ("subject", "object"):
print "------" + each.upper() + "------"
for element in statement.findall(ns_tag(each)):
totalSpecies += 1
# ADD CONDITION FOR NESTED (modifiers!!)
if each == "subject":
reactant = "s" + str(totalSpecies)
else:
product = "s" + str(totalSpecies)
nodeEntry= "s" + str(totalSpecies) + "\t" + get_details_from_element(element, index, statement_num,evidence, expStatementFile)+"\n"
print nodeEntry
nFile=open(nodeFile,"a")
modifierType = ""
nFile = open(nodeFile, "a")
subject=statement.find(ns_tag("subject"))
object =statement.find(ns_tag("object")) if statement.find(ns_tag("object")) is not None else None
nested=object.find(ns_tag("statement")) if object is not None else None
modifier=""
mod=None
if nested is not None:
mod=subject
nestedStatement= object.find(ns_tag("statement"))
subject=nestedStatement.find(ns_tag("subject"))
object =nestedStatement.find(ns_tag("object")) if statement.find(ns_tag("object")) is not None else None
modifierType=relationship
relationship=(nestedStatement.get(ns_tag("relationship")) if nestedStatement.get(ns_tag("relationship")) is not None else "Complex")
entry= get_details_from_element(subject, index,statement_num, evidence, expStatementFile)+"\t"+ annoEntry
spId=getSpecies(entry, speciesMap)
nodeEntry = "s" + str(spId)+ "\t" +entry + "\t" + str(otherAnnotation) + "\n"
if spId is None:
print "NEW ENTITY : reactant"
totalSpecies+=1
spId=totalSpecies
speciesMap[entry]=spId
nodeEntry ="s" + str(spId) + "\t" + entry
nodeEntry = nodeEntry + "\t" + str(otherAnnotation) + "\n"
nFile.write(nodeEntry)
reactant="s" + str(spId)
print "REACTANT : " +nodeEntry
if object is not None:
entry= get_details_from_element(object, index,statement_num, evidence, expStatementFile)+"\t"+ annoEntry
spId=getSpecies(entry, speciesMap)
nodeEntry = "s" + str(spId)+ "\t" +entry + "\t" + str(otherAnnotation) + "\n"
if spId is None:
print "NEW ENTITY : pRODUCT"
totalSpecies+=1
spId=totalSpecies
speciesMap[entry]=spId
nodeEntry ="s" + str(spId) + "\t" + entry
nodeEntry = nodeEntry + "\t" + str(otherAnnotation) + "\n"
nFile.write(nodeEntry)
nFile.close()
product = "s" + str(spId)
print "PRODUCT : " +nodeEntry
if mod is not None:
entry= get_details_from_element(mod, index,statement_num, evidence, expStatementFile)+"\t"+ annoEntry
spId=getSpecies(entry, speciesMap)
nodeEntry = "s" + str(spId)+ "\t" +entry + "\t" + str(otherAnnotation) + "\n"
if spId is None:
print "NEW ENTITY : MODIFIER"
totalSpecies+=1
spId=totalSpecies
speciesMap[entry]=spId
nodeEntry ="s" + str(spId) + "\t" + entry
nodeEntry = nodeEntry + "\t" + str(otherAnnotation) + "\n"
nFile.write(nodeEntry)
modifier = "s" + str(spId)
print "MODIFIER : "+nodeEntry
print "\n-----REACTION-----\n"
if modifierType in reactionDict.keys():
modifierType = reactionDict[modifierType]
if relationship in reactionDict.keys():
relationship=reactionDict[relationship]
relationship = reactionDict[relationship]
else:
alreadyIn=[line.strip() for line in open("outputs/additional/toMapReactions.txt","r")]
if element not in alreadyIn:
f=open("outputs/additional/toMapReactions.txt","a")
f.write(element+"\n")
alreadyIn = [line.strip() for line in open("outputs/additional/toMapReactions.txt", "r")]
if relationship not in alreadyIn:
f = open("outputs/additional/toMapReactions.txt", "a")
f.write(relationship + "\n")
f.close()
reactionEntry= "r" + str(reactionIdentifier) + "\t" + relationship + "\t" + reactant + "\t" + modifier + "\t" + product + "\t" + namespaceDict[referenceType] + ":" + reference+"\n"
reactionEntry = "r" + str(reactionIdentifier) + "\t" + relationship + "\t" + reactant + "\t" + modifier + "\t" + product + "\t" + modifierType + "\t" + species+"\t"+ namespaceDict[referenceType] + ":" + reference + "\n"
print reactionEntry
rFile=open(reactionFile,"a")
rFile = open(reactionFile, "a")
rFile.write(reactionEntry)
nFile.close()
rFile.close()
# if reactionIdentifier==25:
# exit()
except Exception as ex:
print " !!!EXCEPTION!!! : " + str(ex)
print "TO DO : Nested statement: Add as modifier"
exit()
#Convert file
#/home/alex/PhD/Bel2CellD/full_abstract3.xbel
convertXBEL('/home/alex/PhD/Bel2CellD/small_corpus.xbel')
\ No newline at end of file
# Convert file
# /home/alex/PhD/Bel2CellD/full_abstract3.xbel
convertXBEL('/home/alex/PhD/Bel2CellD/small_corpus.xbel')
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment