Gitlab is now using https://gitlab.lcsb.uni.lu as it's primary address. Please update your bookmarks. FAQ.

Commit b60c76fb authored by Aishwarya Alex's avatar Aishwarya Alex
Browse files

restrucred code, class

parent 3183bc49
......@@ -11,40 +11,30 @@
##############################################################################################
# Import the modules needed to run the script.
import MySQLdb, reflectclient, os,urllib
import reflectclient, os,urllib,sqlite3
from reflectclient.config import EntityType
import rdflib
from SPARQLWrapper import SPARQLWrapper, JSON, XML, N3, RDF
from SPARQLWrapper import SPARQLWrapper, JSON
import xml.etree.ElementTree as ET
class namespaceTranslator:
#LOAD NAMESPACE DICTIONARY
def __init__(self):
''' Constructor for this class. '''
# Create some member animals
dict={}
filename="inputs/2017MeshTree.txt"
try:
# input file fetch from :https://www.nlm.nih.gov/mesh/2017/download/2017MeshTree.txt
if not os.path.exists(filename):
urllib.urlretrieve("https://www.nlm.nih.gov/mesh/2017/download/2017MeshTree.txt", filename)
else:
for line in open(filename,'r'):
line=line.replace('\x00','')
line=line.strip()
nspace=line.split("\t")
if len(nspace)==3:
meshID=nspace[1]
term=nspace[2].lower()
dict[term]=meshID
try:
cursor=self.createMeSHDB()
self.meshCursor = cursor
except IndexError as dicterror:
print str("MeSH Dictionary error")
self.meshDict = dict
#self.meshDict = dict
def queryMeSHSPARQL(self):
#sparql = rdflib.URIRef("http://id.nlm.nih.gov/mesh/sparql")
sparql=SPARQLWrapper("http://id.nlm.nih.gov/mesh/sparql")
sparql.setQuery("""
def queryMeSHSPARQL(self,term):
sparql=SPARQLWrapper("http://id.nlm.nih.gov/mesh/sparql",)
preparedQuery="""
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
......@@ -65,13 +55,21 @@ class namespaceTranslator:
?c meshv:term ?term .
?term rdfs:label ?label .
FILTER (REGEX(?label,"^insomnia$","i"))
FILTER (REGEX(?label,\"^"""+term+"""$\","i"))
}
""")
"""
sparql.addCustomParameter("inference","true")
sparql.setQuery(preparedQuery)
sparql.setReturnFormat(JSON)
result= sparql.query().convert()
print result
meshID =None
if len(result['results']['bindings']) >0:
meshResult=result['results']['bindings'][0]['d']['value'].split("/")[-1]
meshID=meshResult if meshResult is not None else None
return meshID
def getDict(self,filename):
dict={}
......@@ -112,29 +110,40 @@ class namespaceTranslator:
def getIDandNamespacefromAbundance(self,term,elementName):
termId=term
#elementName="UNKNOWN"
idAndelement=[]
#check GO
reflect_client = reflectclient.ReflectClient()
# check for GO cellular component ->Complex
GOresult = reflect_client.get_entities(term, [EntityType.cellular_component,EntityType.biological_process, EntityType.molecular_function])
if len(GOresult) > 0 :
termId = GOresult[0]['entities'][0]['identifier']
entityType=GOresult[0]['entities'][0]['type']
if entityType == "-21" or entityType == "-23":
elementName = "PHENOTYPE"
elif entityType == "-22":
elementName = "COMPLEX"
#not found in GO, so check in mesh
if termId is term:
print termId +" : here"
meshID=self.getMeshIDfromTerm(term)
if meshID is not term:
termId=meshID
elementName="PHENOTYPE"
#check if protein : eg: Amyloid beta peptides- abundance/ protein abundance w/o namespace
if elementName=="PROTEIN":
POresult = reflect_client.get_entities(term, [EntityType.proteins])
if len (POresult)>0:
print POresult
termId=POresult[0]['entities'][0]['identifier']
entityType = POresult[0]['entities'][0]['type']
if entityType =='9606':
elementName="PROTEIN"
# check for GO cellular component ->Complex, biological process, moelcular function -> phenotype
else:
GOresult = reflect_client.get_entities(term, [EntityType.cellular_component,EntityType.biological_process, EntityType.molecular_function])
if len(GOresult)>0:
print "Returns in GO term"
termId = GOresult[0]['entities'][0]['identifier']
entityType=GOresult[0]['entities'][0]['type']
if entityType == "-21" or entityType == "-23":
elementName = "PHENOTYPE"
elif entityType == "-22":
elementName = "COMPLEX"
elif entityType == "9606":
elementName = "PROTEIN"
#if element is not GO term or Protein , check in MeSH
else:
#meshID=self.getMeshIDfromTermDict(termId)
meshID=self.getMeshIDfromdb(self.meshCursor,termId)
if meshID is not None:
termId=meshID
elementName="PHENOTYPE"
idAndelement=[termId,elementName]
#print idAndelement
return idAndelement
......@@ -147,17 +156,85 @@ class namespaceTranslator:
chemID=chemResult[0]['entities'][0]['identifier']
return chemID
#Get meshID from reflect
def getMeshIDfromTerm(self,term):
# Get meshID from from Dictionary # FASTER
def getMeshIDfromdb(self,meshcursor,term):
try:
parameter=[term.lower()]
meshcursor.execute("SELECT meshID FROM meshTermID WHERE term=?;",parameter)
row=meshcursor.fetchone()
if row is not None:
return row[0]
else:
return None
except sqlite3.Error as sqr:
print "Sqlite3 Error", sqr.message
except Exception as ex:
print ex.message, ex.args
def getMeshIDfromTermSPARQL(self, term):
meshID = self.queryMeSHSPARQL(term)
return meshID if meshID is not None else None
#Get meshID from from Dictionary # FASTER
def getMeshIDfromTermDict(self, term):
mesh_id=term
lowerTerm=term.lower()
mDict = self.meshDict #namespaceTranslator.getMeSHDict()
mesh_id = mDict[lowerTerm] if mDict.has_key(lowerTerm) is True else term
mesh_id=mDict.get(lowerTerm,term)
#mesh_id = mDict[lowerTerm] if mDict.has_key(lowerTerm) is True else term
return mesh_id
def getMeSHDictFromDesc2012XML(self):
dict={}
xmlfilename="inputs/desc2012.xml"
meshtextFilename ="inputs/mesh_desc2012.txt"
try:
# input file fetch from :https://www.nlm.nih.gov/mesh/2017/download/2017MeshTree.txt
print("Creating MeSH Dictionary ...")
if not os.path.exists(xmlfilename):
print("Local copy does not exist, Downloading local copy of Mesh2012 desc2012.xml ... ")
urllib.urlretrieve("ftp://nlmpubs.nlm.nih.gov/online/mesh/2012/xmlmesh/desc2012.xml", xmlfilename)
if not os.path.exists(meshtextFilename):
print ("Creating text file, to store extracted terms and uniqueID from xml, to be used in future runs ...")
txtFile = open(meshtextFilename, 'w')
tree = ET.parse(xmlfilename)
root = tree.getroot()
for descriptorRec in root.findall("DescriptorRecord"):
meshID=descriptorRec.find("DescriptorUI").text
descName=descriptorRec.find("DescriptorName").find("String").text.encode('utf-8')
#print descName, meshID
txtFile.write(descName + "\t" + meshID + "\n")
conceptlist= descriptorRec.find("ConceptList")
for concept in conceptlist.findall("Concept"):
termList=concept.find("TermList")
for term in termList.findall("Term"):
altTerm=term.find("String").text.encode('utf-8')
if altTerm != descName:
#print altTerm,meshID
txtFile.write(altTerm+ "\t" + meshID + "\n")
txtFile.close()
else:
for line in open(meshtextFilename, 'r'):
#line = line.replace('\x00', '')
line = line.strip()
nspace = line.split("\t")
if len(nspace) == 2:
meshID = nspace[1]
term = nspace[0].lower()
dict[term] = meshID
def getMeSHDict(self):
except IndexError as dicterror:
print str("MeSH Dictionary error")
except Exception as otherError:
print " !!!EXCEPTION!!! : " + str(otherError.args)
print otherError.message, type(otherError)
return dict
def getMeSHDictFromMTree(self):
dict={}
filename="inputs/2017MeshTree.txt"
try:
......@@ -165,30 +242,80 @@ class namespaceTranslator:
if not os.path.exists(filename):
urllib.urlretrieve("https://www.nlm.nih.gov/mesh/2017/download/2017MeshTree.txt", filename)
else:
for line in open(filename,'r'):
line=line.replace('\x00','')
line=line.strip()
nspace=line.split("\t")
if len(nspace)==3:
meshID=nspace[1]
term=nspace[2].lower()
dict[term]=meshID
for line in open(filename,'r'):
line=line.replace('\x00','')
line=line.strip()
nspace=line.split("\t")
if len(nspace)==3:
meshID=nspace[1]
term=nspace[2].lower()
dict[term]=meshID
except IndexError as dicterror:
print str("MeSH Dictionary error")
return dict
#getMeshIDfromTerm("Drug-Induced Liver Injury")
# getGoIDfromTerm("leukotriene production involved in inflammatory response")
#getGoIDfromTerm("Stress, Physiological")
# getChemicalIDfromTerm("lipopigment")
#getIDandNamespacefromAbundance("synaptic transmission","UNKNOWN")
#print getIDandNamespacefromAbundance("gamma Secretase Complex","UNKNOWN")
#print getIDandNamespacefromAbundance("Insomnia","PHENOTYPE")
# getIDandNamespacefromAbundance("Drug-Induced Liver Injury","UNKNOWN")
# getIDandNamespacefromAbundance("Polymorphism, Single Nucleotide","UNKNOWN")
def createMeSHDB(self):
xmlfilename = "inputs/desc2012.xml"
meshtextFilename = "inputs/mesh_desc2012.txt"
try:
# input file fetch from :https://www.nlm.nih.gov/mesh/2017/download/2017MeshTree.txt
print("Connecting MeSH sqliteDB ...")
if not os.path.exists(xmlfilename):
print("Local copy does not exist, Downloading local copy of Mesh2012 desc2012.xml ... ")
urllib.urlretrieve("ftp://nlmpubs.nlm.nih.gov/online/mesh/2012/xmlmesh/desc2012.xml", xmlfilename)
if not os.path.exists(meshtextFilename):
print (
"Creating text file, to store extracted terms and uniqueID from xml, to be used in future runs ...")
txtFile = open(meshtextFilename, 'w')
tree = ET.parse(xmlfilename)
root = tree.getroot()
for descriptorRec in root.findall("DescriptorRecord"):
meshID = descriptorRec.find("DescriptorUI").text
descName = descriptorRec.find("DescriptorName").find("String").text.encode('utf-8')
# print descName, meshID
txtFile.write(descName + "\t" + meshID + "\n")
conceptlist = descriptorRec.find("ConceptList")
for concept in conceptlist.findall("Concept"):
termList = concept.find("TermList")
for term in termList.findall("Term"):
altTerm = term.find("String").text.encode('utf-8')
if altTerm != descName:
# print altTerm,meshID
txtFile.write(altTerm + "\t" + meshID + "\n")
txtFile.close()
conn = sqlite3.connect("inputs/xbel2celld.db")
conn.text_factory=str
cursor = conn.cursor()
conn.execute("""SELECT name FROM sqlite_master WHERE type='table' AND name='meshTermID';""")
rows = cursor.fetchall()
if len(rows)==0:
print ("Database doesn't exist, creating ....")
cursor.execute("""CREATE TABLE IF NOT EXISTS meshTermID (
term text PRIMARYKEY,
meshID text
);""")
for line in open(meshtextFilename, 'r'):
# line = line.replace('\x00', '')
line = line.strip()
nspace = line.split("\t")
if len(nspace) == 2:
meshID = nspace[1]
term = nspace[0].lower()
#print term,meshID
parameters=[term,meshID]
cursor.execute("""INSERT INTO meshTermID (term,meshID) VALUES (?,?);""",parameters)
conn.commit()
conn.close()
conn = sqlite3.connect("inputs/xbel2celld.db")
conn.text_factory = str
cursor = conn.cursor()
print ("Creating connection to MeSHDB ....")
return cursor
except sqlite3.Error as sqr:
print "Sqlite3 Error",sqr.message
except Exception as ex:
print ex.message, ex.args
NT= namespaceTranslator()
print NT.getMeshIDfromTerm("Oligodendroglia")
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment