Commit d67b9503 authored by Aishwarya Alex's avatar Aishwarya Alex
Browse files

py2/3 compatibale

parent 2a62f557
......@@ -11,11 +11,16 @@
##############################################################################################
# Import the modules needed to run the script.
import reflectclient, os,urllib,sqlite3
from future.standard_library import install_aliases
install_aliases()
import reflectclient, os,urllib,sqlite3,requests
from io import open
from reflectclient.config import EntityType
from SPARQLWrapper import SPARQLWrapper, JSON
import xml.etree.ElementTree as ET
class namespaceTranslator:
#LOAD NAMESPACE DICTIONARY
def __init__(self):
......@@ -27,7 +32,7 @@ class namespaceTranslator:
self.meshCursor = cursor
except IndexError as dicterror:
print str("MeSH Dictionary error")
print(str("MeSH Dictionary error"))
#self.meshDict = dict
......@@ -99,63 +104,98 @@ class namespaceTranslator:
#Get GOID from reflect
def getGoIDfromTerm(self,term):
goID= term
#print term
reflect_client = reflectclient.ReflectClient()
GOresult=reflect_client.get_entities(term, [EntityType.biological_process,EntityType.cellular_component,EntityType.molecular_function])
if len(GOresult) >0:
goID=GOresult[0]['entities'][0]['identifier']
return goID
try:
goID= term
#print term
reflect_client = reflectclient.ReflectClient()
GOresult=reflect_client.get_entities(term, [EntityType.biological_process,EntityType.cellular_component,EntityType.molecular_function])
if len(GOresult) >0:
goID=GOresult[0]['entities'][0]['identifier']
return goID
except requests.ConnectionError as err:
print("ReflectCLient connection error: "+err)
return goID
except reflectclient.ReflectClientInvalidResponse as responseErr:
print("ReflectCLient connection error!! Returning term")
return goID
except Exception as otherex:
print("ReflectCLient error: " + otherex)
print(type(otherex))
return goID
def getIDandNamespacefromAbundance(self,term,elementName):
termId=term
idAndelement=[]
reflect_client = reflectclient.ReflectClient()
#check if protein : eg: Amyloid beta peptides- abundance/ protein abundance w/o namespace
if elementName=="PROTEIN":
POresult = reflect_client.get_entities(term, [EntityType.proteins])
if len (POresult)>0:
print POresult
termId=POresult[0]['entities'][0]['identifier']
entityType = POresult[0]['entities'][0]['type']
if entityType =='9606':
elementName="PROTEIN"
# check for GO cellular component ->Complex, biological process, moelcular function -> phenotype
else:
GOresult = reflect_client.get_entities(term, [EntityType.cellular_component,EntityType.biological_process, EntityType.molecular_function])
if len(GOresult)>0:
print "Returns in GO term"
termId = GOresult[0]['entities'][0]['identifier']
entityType=GOresult[0]['entities'][0]['type']
if entityType == "-21" or entityType == "-23":
elementName = "PHENOTYPE"
elif entityType == "-22":
elementName = "COMPLEX"
elif entityType == "9606":
elementName = "PROTEIN"
#if element is not GO term or Protein , check in MeSH
try:
termId=term
idAndelement=[]
reflect_client = reflectclient.ReflectClient()
#check if protein : eg: Amyloid beta peptides- abundance/ protein abundance w/o namespace
if elementName=="PROTEIN":
print("Connecting to reflectclient . . .")
POresult = reflect_client.get_entities(term, [EntityType.proteins])
if len (POresult)>0:
print(POresult)
termId=POresult[0]['entities'][0]['identifier']
entityType = POresult[0]['entities'][0]['type']
if entityType =='9606':
elementName="PROTEIN"
# check for GO cellular component ->Complex, biological process, moelcular function -> phenotype
else:
#meshID=self.getMeshIDfromTermDict(termId)
meshID=self.getMeshIDfromdb(self.meshCursor,termId)
if meshID is not None:
termId=meshID
elementName="PHENOTYPE"
idAndelement=[termId,elementName]
return idAndelement
print("Checking in reflect client, protein and go")
GOresult = reflect_client.get_entities(term, [EntityType.cellular_component,EntityType.biological_process, EntityType.molecular_function])
if len(GOresult)>0:
print("Returns in GO term")
termId = GOresult[0]['entities'][0]['identifier']
entityType=GOresult[0]['entities'][0]['type']
if entityType == "-21" or entityType == "-23":
elementName = "PHENOTYPE"
elif entityType == "-22":
elementName = "COMPLEX"
elif entityType == "9606":
elementName = "PROTEIN"
#if element is not GO term or Protein , check in MeSH
else:
#meshID=self.getMeshIDfromTermDict(termId)
meshID=self.getMeshIDfromdb(self.meshCursor,termId)
if meshID is not None:
termId=meshID
elementName="PHENOTYPE"
idAndelement=[termId,elementName]
return idAndelement
except requests.ConnectionError as err:
print("ReflectCLient connection error: "+err)
idAndelement=[term,elementName]
return idAndelement
except reflectclient.ReflectClientInvalidResponse as responseErr:
print("ReflectCLient connection error!! Returning term")
idAndelement = [term, elementName]
return idAndelement
except Exception as otherex:
print("ReflectCLient error: " + otherex)
print(type(otherex))
idAndelement = [term, elementName]
return idAndelement
#Get pubchemID from reflect
def getChemicalIDfromTerm(self,term):
chemID= term
reflect_client = reflectclient.ReflectClient()
chemResult=reflect_client.get_entities(term, [EntityType.chemicals])
if len(chemResult) >0:
chemID=chemResult[0]['entities'][0]['identifier']
return chemID
try:
chemID= term
reflect_client = reflectclient.ReflectClient()
chemResult=reflect_client.get_entities(term, [EntityType.chemicals])
if len(chemResult) >0:
chemID=chemResult[0]['entities'][0]['identifier']
return chemID
except requests.ConnectionError as err:
print("ReflectCLient connection error: " + err)
return chemID
except reflectclient.ReflectClientInvalidResponse as responseErr:
print("ReflectCLient connection error: ")
return chemID
except Exception as otherex:
print("ReflectCLient error: " + otherex)
print(type(otherex))
return chemID
# Get meshID from from Dictionary # FASTER
def getMeshIDfromdb(self,meshcursor,term):
......@@ -169,9 +209,9 @@ class namespaceTranslator:
return None
except sqlite3.Error as sqr:
print "Sqlite3 Error", sqr.message
print("Sqlite3 Error", sqr)
except Exception as ex:
print ex.message, ex.args
print(ex, ex.args)
def getMeshIDfromTermSPARQL(self, term):
meshID = self.queryMeSHSPARQL(term)
......@@ -195,7 +235,7 @@ class namespaceTranslator:
print("Creating MeSH Dictionary ...")
if not os.path.exists(xmlfilename):
print("Local copy does not exist, Downloading local copy of Mesh2012 desc2012.xml ... ")
urllib.urlretrieve("ftp://nlmpubs.nlm.nih.gov/online/mesh/2012/xmlmesh/desc2012.xml", xmlfilename)
urllib.request.urlretrieve("ftp://nlmpubs.nlm.nih.gov/online/mesh/2012/xmlmesh/desc2012.xml", xmlfilename)
if not os.path.exists(meshtextFilename):
print ("Creating text file, to store extracted terms and uniqueID from xml, to be used in future runs ...")
......@@ -204,14 +244,14 @@ class namespaceTranslator:
root = tree.getroot()
for descriptorRec in root.findall("DescriptorRecord"):
meshID=descriptorRec.find("DescriptorUI").text
descName=descriptorRec.find("DescriptorName").find("String").text.encode('utf-8')
descName=descriptorRec.find("DescriptorName").find("String")#.text.encode('utf-8')
#print descName, meshID
txtFile.write(descName + "\t" + meshID + "\n")
conceptlist= descriptorRec.find("ConceptList")
for concept in conceptlist.findall("Concept"):
termList=concept.find("TermList")
for term in termList.findall("Term"):
altTerm=term.find("String").text.encode('utf-8')
altTerm=term.find("String")#.text.encode('utf-8')
if altTerm != descName:
#print altTerm,meshID
txtFile.write(altTerm+ "\t" + meshID + "\n")
......@@ -227,10 +267,10 @@ class namespaceTranslator:
dict[term] = meshID
except IndexError as dicterror:
print str("MeSH Dictionary error")
print(str("MeSH Dictionary error"))
except Exception as otherError:
print " !!!EXCEPTION!!! : " + str(otherError.args)
print otherError.message, type(otherError)
print(" !!!EXCEPTION!!! : " + str(otherError.args))
print(otherError, type(otherError))
return dict
......@@ -240,7 +280,7 @@ class namespaceTranslator:
try:
# input file fetch from :https://www.nlm.nih.gov/mesh/2017/download/2017MeshTree.txt
if not os.path.exists(filename):
urllib.urlretrieve("https://www.nlm.nih.gov/mesh/2017/download/2017MeshTree.txt", filename)
urllib.request.urlretrieve("https://www.nlm.nih.gov/mesh/2017/download/2017MeshTree.txt", filename)
for line in open(filename,'r'):
line=line.replace('\x00','')
......@@ -252,7 +292,7 @@ class namespaceTranslator:
dict[term]=meshID
except IndexError as dicterror:
print str("MeSH Dictionary error")
print(str("MeSH Dictionary error"))
return dict
def createMeSHDB(self):
......@@ -263,24 +303,23 @@ class namespaceTranslator:
print("Connecting MeSH sqliteDB ...")
if not os.path.exists(xmlfilename):
print("Local copy does not exist, Downloading local copy of Mesh2012 desc2012.xml ... ")
urllib.urlretrieve("ftp://nlmpubs.nlm.nih.gov/online/mesh/2012/xmlmesh/desc2012.xml", xmlfilename)
urllib.request.urlretrieve("ftp://nlmpubs.nlm.nih.gov/online/mesh/2012/xmlmesh/desc2012.xml", xmlfilename)
print ("Establishing connection to MeSHDB ....")
if not os.path.exists(meshtextFilename):
print (
"Creating text file, to store extracted terms and uniqueID from xml, to be used in future runs ...")
print ( "Creating text file, to store extracted terms and uniqueID from xml, to be used in future runs ...")
txtFile = open(meshtextFilename, 'w')
tree = ET.parse(xmlfilename)
root = tree.getroot()
for descriptorRec in root.findall("DescriptorRecord"):
meshID = descriptorRec.find("DescriptorUI").text
descName = descriptorRec.find("DescriptorName").find("String").text.encode('utf-8')
descName = descriptorRec.find("DescriptorName").find("String")#.text.encode('utf-8')
# print descName, meshID
txtFile.write(descName + "\t" + meshID + "\n")
conceptlist = descriptorRec.find("ConceptList")
for concept in conceptlist.findall("Concept"):
termList = concept.find("TermList")
for term in termList.findall("Term"):
altTerm = term.find("String").text.encode('utf-8')
altTerm = term.find("String")#.text.encode('utf-8')
if altTerm != descName:
# print altTerm,meshID
txtFile.write(altTerm + "\t" + meshID + "\n")
......@@ -296,7 +335,7 @@ class namespaceTranslator:
term text PRIMARYKEY,
meshID text
);""")
for line in open(meshtextFilename, 'r'):
for line in open(meshtextFilename, 'r',encoding='utf-8'):
# line = line.replace('\x00', '')
line = line.strip()
nspace = line.split("\t")
......@@ -311,11 +350,12 @@ class namespaceTranslator:
conn = sqlite3.connect("inputs/xbel2celld.db")
conn.text_factory = str
cursor = conn.cursor()
print ("Connected to MeSHDB ....")
print ("Creating connection to MeSHDB ....")
return cursor
except sqlite3.Error as sqr:
print "Sqlite3 Error",sqr.message
print("Sqlite3 Error",sqr)
except Exception as ex:
print ex.message, ex.args
print(ex, ex.args)
#NT= namespaceTranslator()
#print (NT.getMeshIDfromdb(NT.meshCursor,"insomnia"))
\ No newline at end of file
......@@ -27,6 +27,14 @@ class xBEL2CellD:
self.reflect_client = reflectclient.ReflectClient()
# # Get meshID from reflect
# def getMeshIDfromTerm(self, term):
# mesh_id = term
# mDict = self.mDict
# mesh_id = mDict[term.lower()] if mDict.has_key(term.lower()) is True else term
# return mesh_id
# func to get parameters(function, name and namespace) for elements (any node SUBJECT,OBJECT, MODIFIER(if nested))
def get_details_from_node(self,speciesMap, node):
try:
......@@ -37,10 +45,10 @@ class xBEL2CellD:
returnVal = self.getSpecies(nodeEntry, speciesMap) # add entry or retrieve existing Id
return returnVal
except Exception as detailsEx:
print "!!! FUNC:get_details_from_node!!!"
print str(detailsEx.args)
print("!!! FUNC:get_details_from_node!!!")
print(str(detailsEx.args))
# GET DETAILS FROM TERM FUNCTION AND Parameter
# GET DETAILS FROM TERM FUNCTION AND PARameter
def get_entry_for_term(self,term):
try:
entry = ""
......@@ -53,13 +61,13 @@ class xBEL2CellD:
miriamnamespace = ""
identifier = parameter.text
# print bel values element,parameter,identifier
print " BEL:: #element: " + element + " #identifier: " + str(identifier) + " #namespace: " + belnamespace
print(" BEL:: #element: " + element + " #identifier: " + str(identifier) + " #namespace: " + belnamespace)
elementName = element # function@TERM
elementAnnotation = "" # ns@PARAMETER+text
# get element
if element in entityDict.keys():
if element in list(entityDict.keys()):
elementName = entityDict[element]
else:
......@@ -71,7 +79,7 @@ class xBEL2CellD:
if belnamespace is not "":
new_element = element + "_" + belnamespace
if abundanceDict.has_key(new_element):
if new_element in abundanceDict:
element = abundanceDict[new_element]
if "MESH" in belnamespace:
#id=self.NT.getMeshIDfromTermSPARQL(identifier)
......@@ -93,7 +101,7 @@ class xBEL2CellD:
identifier = idAndElement[0]
elementName = idAndElement[1]
print(identifier,elementName)
if re.match("^GO[0-9]+$", str(identifier)) is not None:
belnamespace = "GO"
elif re.match("^D[0-9]+$", str(identifier)) is not None:
......@@ -124,12 +132,12 @@ class xBEL2CellD:
elif belnamespace is not "":
identifier = self.NT.termToIdentifier(belnamespace, parameter.text)
if namespaceDict.has_key(belnamespace) is not True and belnamespace !="":
if (belnamespace in namespaceDict) is not True and belnamespace !="":
# if namespaceDict.has_key(belnamespace) is False or belnamespace is not "":
idAndElement = self.NT.getIDandNamespacefromAbundance(identifier, element)
identifier = idAndElement[0]
elementName = idAndElement[1]
if elementName in entityDict.keys():
if elementName in list(entityDict.keys()):
elementName = entityDict[element]
# add not handled entity to file
self.check_to_add_in_file(element, "outputs/additional/toMapEntities.txt")
......@@ -145,7 +153,7 @@ class xBEL2CellD:
self.check_to_add_in_file(belnamespace, "outputs/additional/toAddNamespaces.txt")
if namespaceDict.has_key(belnamespace):
if belnamespace in namespaceDict:
miriamnamespace = namespaceDict[belnamespace]
finalnamespace = miriamnamespace
......@@ -160,9 +168,9 @@ class xBEL2CellD:
entry = elementName + "\t" + elementAnnotation
return entry
except Exception as ExceptionTerm:
print "!!! FUNC:get_entry_for_term!!!"
print term, belnamespace
print str(ExceptionTerm.args)
print("!!! FUNC:get_entry_for_term!!!")
print(term, belnamespace)
print(str(ExceptionTerm.args))
......@@ -226,7 +234,7 @@ class xBEL2CellD:
nodeEntry = speciesId + "\t" + nodeEntry + "\n"
nodeFile.write(nodeEntry)
nodeFile.close()
print speciesId + " : " + nodeEntry
print(speciesId + " : " + nodeEntry)
returnVal.append(speciesId)
returnVal.append(speciesDict)
return returnVal
......@@ -235,7 +243,7 @@ class xBEL2CellD:
def convertXBEL(self, filepath):
# create new output file 1.nodes.txt 2.reactions.txt
print filepath
print(filepath)
nodeFile = "outputs/nodes.txt"
nFile = open(nodeFile, "w")
nFile.write("IDENTIFIER\tTYPE\tNAME\tURN\n")
......@@ -270,9 +278,9 @@ class xBEL2CellD:
#evidence = ""
for index, statementGr in enumerate(root.iter(tag=self.ns_tag('statementGroup'))):
for statement_num, statement in enumerate(statementGr.findall(self.ns_tag('statement'))):
print "\n\n\n############################################"
print "GROUP" + str(index + 1) + " :: STATEMENT " + str(statement_num + 1)
print "############################################"
print("\n\n\n############################################")
print("GROUP" + str(index + 1) + " :: STATEMENT " + str(statement_num + 1))
print("############################################")
reactionIdentifier += 1 # statement number
##annotation for evidence statement: pubmedid, journal, evidence , species , cellLine
......@@ -293,12 +301,11 @@ class xBEL2CellD:
cellline = self.get_annotation_from_element(annoMap, "CELLLINE")
cellstructure = self.get_annotation_from_element(annoMap, "CELLSTRUCTURE")
annoEntry = species + "\t" + disease + "\t" + cell + "\t" + tissue + "\t" + cellstructure + "\t" + cellline
keyList = annoMap.keys()
otherKeys = set(keyList) - set(
['SPECIES', 'DISEASE', 'CELL', 'TISSUE', 'CELLSTRUCTURE', 'CELLLINE'])
keyList = list(annoMap.keys())
otherKeys = set(keyList) - set(['SPECIES', 'DISEASE', 'CELL', 'TISSUE', 'CELLSTRUCTURE', 'CELLLINE'])
# print otherKeys
otherAnnotation = [each + ":" + annoMap[each] for each in otherKeys]
print "EVIDENCE: " + evidence
print("EVIDENCE: " + evidence)
reactant = ""
modifier = ""
......@@ -312,7 +319,7 @@ class xBEL2CellD:
# IF RELATIONSHIP IS MEPTY, check for subject node type is reaction
if relationship is None:
relationship = subject.find(self.ns_tag('term')).get(self.ns_tag('function'))
print "------RELATIONSHIP------\n" + relationship
print("------RELATIONSHIP------\n" + relationship)
# entityDict = NT.getdict("inputs/EntityDictFile.txt")
# OBJECT IS NOT NONE : Statement has atleast reactnat and product
......@@ -325,13 +332,13 @@ class xBEL2CellD:
object = nested.find(self.ns_tag("object"))
modifierType = relationship
relationship = nested.get(self.ns_tag("relationship")) # if nested.get(ns_tag("relationship")) is not None else "complexAbundance")
print "MODIFIER :"
print("MODIFIER :")
returnedVal = self.get_details_from_node(speciesMap, modObject)
speciesMap = returnedVal[1]
modifier = returnedVal[0]
except Exception as nestedException:
print "!!! EXCEPTION in nested statement!!!"
print " !!!EXCEPTION!!! : " + str(nestedException.args)
print("!!! EXCEPTION in nested statement!!!")
print(" !!!EXCEPTION!!! : " + str(nestedException.args))
f = open(expStatementFile, 'a')
f.write(
str(index) + "|" + str(statement_num) + "|" + str(
......@@ -342,11 +349,11 @@ class xBEL2CellD:
# exit()
# REACTANT AND PRODUCTS
print "REACTANT :"
print("REACTANT :")
returnedVal = self.get_details_from_node(speciesMap, subject)
speciesMap = returnedVal[1]
reactant = returnedVal[0]
print "PRODUCT :"
print("PRODUCT :")
returnedVal = self.get_details_from_node(speciesMap, object)
speciesMap = returnedVal[1]
product = returnedVal[0]
......@@ -376,7 +383,7 @@ class xBEL2CellD:
nodeEntry = self.get_entry_for_term(each)
returnedVal = self.getSpecies(nodeEntry, speciesMap)
reactantList.append(returnedVal[0])
print "Handle complex"
print("Handle complex")
reactant = ','.join(reactantList)
if part == "products":
productTerms = term.findall(self.ns_tag('term'))
......@@ -385,13 +392,13 @@ class xBEL2CellD:
nodeEntry = self.get_entry_for_term(each)
returnedVal = self.getSpecies(nodeEntry, speciesMap)
productList.append(returnedVal[0])
print "Handle complex"
print("Handle complex")
product = ','.join(productList)
print "REACTION : "
if modifierType in reactionDict.keys():
print("REACTION : ")
if modifierType in list(reactionDict.keys()):
modifierType = reactionDict[modifierType]
if relationship in reactionDict.keys():
if relationship in list(reactionDict.keys()):
relationship = reactionDict[relationship]
else:
alreadyIn = [line.strip() for line in open("outputs/additional/toMapReactions.txt", "r")]
......@@ -402,12 +409,12 @@ class xBEL2CellD:
reactionEntry = "r" + str(
reactionIdentifier) + "\t" + relationship + "\t" + reactant + "\t" + modifier + "\t" + product + "\t" + modifierType + "\t" + referenceType + ":" + reference + "\t" + species + "\t" + annoEntry + "\t" + str(
otherAnnotation) + "\n"
print reactionEntry
print(reactionEntry)
reactionFile = open("outputs/reactions.txt", 'a')
reactionFile.write(reactionEntry)
reactionFile.close()
except Exception as ex:
print " !!!EXCEPTION!!! : " + str(ex.args)
print(" !!!EXCEPTION!!! : " + str(ex.args))
f = open(expStatementFile, 'a')
f.write(str(index) + "|" + str(statement_num) + "|" + str(relationship) + "|" + evidence + "\n")
f.write(" !!!EXCEPTION!!! : " + str(ex.args) + "==================================\n")
......@@ -420,10 +427,10 @@ class xBEL2CellD:
# Convert file
# /home/alex/PhD/Bel2CellD/full_abstract3.xbel
#convertXBEL('/home/alex/PhD/Bel2CellD/small_corpus.xbel')
logfile= "logs/"+datetime.datetime.now().strftime("%Y_%m_%d_%H_%M")+"_log.txt"
logger=logging.getLogger(logfile)
#logfile= "logs/"+datetime.datetime.now().strftime("%Y_%m_%d_%H_%M")+"_log.txt"
#logger=logging.getLogger(logfile)
#logger.addHandler(logging.FileHandler(logfile))
logger.addHandler(logging.StreamHandler(sys.stdout))
#logger.addHandler(logging.StreamHandler(sys.stdout))
#
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment