Gitlab is now using https://gitlab.lcsb.uni.lu as it's primary address. Please update your bookmarks. FAQ.

Commit 84c6f867 authored by Aishwarya Alex's avatar Aishwarya Alex
Browse files

restrucred code, class

parent cce9e582
......@@ -15,154 +15,180 @@ import MySQLdb, reflectclient, os,urllib
from reflectclient.config import EntityType
import rdflib
from SPARQLWrapper import SPARQLWrapper, JSON, XML, N3, RDF
class namespaceTranslator:
#LOAD NAMESPACE DICTIONARY
def queryMeSHSPARQL():
#sparql = rdflib.URIRef("http://id.nlm.nih.gov/mesh/sparql")
sparql=SPARQLWrapper("http://id.nlm.nih.gov/mesh/sparql")
sparql.setQuery("""
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX meshv: <http://id.nlm.nih.gov/mesh/vocab#>
PREFIX mesh: <http://id.nlm.nih.gov/mesh/>
PREFIX mesh2015: <http://id.nlm.nih.gov/mesh/2015/>
PREFIX mesh2016: <http://id.nlm.nih.gov/mesh/2016/>
PREFIX mesh2017: <http://id.nlm.nih.gov/mesh/2017/>
SELECT ?d
FROM <http://id.nlm.nih.gov/mesh>
WHERE {
?d a meshv:Descriptor .
?d meshv:concept ?c .
?c meshv:term ?term .
?term rdfs:label ?label .
FILTER (REGEX(?label,"^insomnia$","i"))
}
""")
sparql.setReturnFormat(JSON)
result= sparql.query().convert()
print result
def getDict(filename):
dict={}
for line in open(filename,'r'):
nspace=line.split()
dict[nspace[0]]=nspace[1]
return dict
#LOAD reaction DICTIONARY
def getReactionDict():
reactionDict={}
for line in open("inputs/reactionDictFile.txt",'r'):
nspace=line.split()
reactionDict[nspace[0]]=nspace[1]
return reactionDict
def termToIdentifier(belnamespace,term):
if "GO" in belnamespace:
term=getGoIDfromTerm(term)
elif "CHEM" in belnamespace:
term=getChemicalIDfromTerm(term)
# else:
# idAndElement=getIDandNamespacefromAbundance("UNKNOWN",term)
# term=idAndElement[0]
return term
#Get GOID from reflect
def getGoIDfromTerm(term):
goID= term
#print term
reflect_client = reflectclient.ReflectClient()
GOresult=reflect_client.get_entities(term, [EntityType.biological_process,EntityType.cellular_component,EntityType.molecular_function])
if len(GOresult) >0:
goID=GOresult[0]['entities'][0]['identifier']
return goID
def getIDandNamespacefromAbundance(term,elementName):
termId=term
#elementName="UNKNOWN"
idAndelement=[]
#check GO
reflect_client = reflectclient.ReflectClient()
# check for GO cellular component ->Complex
GOresult = reflect_client.get_entities(term, [EntityType.cellular_component,EntityType.biological_process, EntityType.molecular_function])
if len(GOresult) > 0 :
termId = GOresult[0]['entities'][0]['identifier']
entityType=GOresult[0]['entities'][0]['type']
if entityType == "-21" or entityType == "-23":
elementName = "PHENOTYPE"
elif entityType == "-22":
elementName = "COMPLEX"
#not found in GO, so check in mesh
if termId is term:
print termId +" : here"
meshID=getMeshIDfromTerm(term)
if meshID is not term:
termId=meshID
elementName="PHENOTYPE"
idAndelement=[termId,elementName]
#print idAndelement
return idAndelement
#Get pubchemID from reflect
def getChemicalIDfromTerm(term):
chemID= term
reflect_client = reflectclient.ReflectClient()
chemResult=reflect_client.get_entities(term, [EntityType.chemicals])
if len(chemResult) >0:
chemID=chemResult[0]['entities'][0]['identifier']
return chemID
#Get meshID from reflect
def getMeshIDfromTerm(term):
mesh_id=term
lowerTerm=term.lower()
mDict = getMeSHDict()
mesh_id = mDict[lowerTerm] if mDict.has_key(lowerTerm) is True else term
return mesh_id
def getMeSHDict():
dict={}
filename="inputs/2017MeshTree.txt"
try:
# input file fetch from :https://www.nlm.nih.gov/mesh/2017/download/2017MeshTree.txt
if not os.path.exists(filename):
urllib.urlretrieve("https://www.nlm.nih.gov/mesh/2017/download/2017MeshTree.txt", filename)
else:
for line in open(filename,'r'):
line=line.replace('\x00','')
line=line.strip()
nspace=line.split("\t")
if len(nspace)==3:
meshID=nspace[1]
term=nspace[2].lower()
dict[term]=meshID
except IndexError as dicterror:
print str("MeSH Dictionary error")
return dict
#getMeshIDfromTerm("Drug-Induced Liver Injury")
# getGoIDfromTerm("leukotriene production involved in inflammatory response")
#getGoIDfromTerm("Stress, Physiological")
# getChemicalIDfromTerm("lipopigment")
#getIDandNamespacefromAbundance("synaptic transmission","UNKNOWN")
#print getIDandNamespacefromAbundance("gamma Secretase Complex","UNKNOWN")
#print getIDandNamespacefromAbundance("Insomnia","PHENOTYPE")
# getIDandNamespacefromAbundance("Drug-Induced Liver Injury","UNKNOWN")
# getIDandNamespacefromAbundance("Polymorphism, Single Nucleotide","UNKNOWN")
queryMeSHSPARQL()
\ No newline at end of file
def __init__(self):
''' Constructor for this class. '''
# Create some member animals
dict={}
filename="inputs/2017MeshTree.txt"
try:
# input file fetch from :https://www.nlm.nih.gov/mesh/2017/download/2017MeshTree.txt
if not os.path.exists(filename):
urllib.urlretrieve("https://www.nlm.nih.gov/mesh/2017/download/2017MeshTree.txt", filename)
else:
for line in open(filename,'r'):
line=line.replace('\x00','')
line=line.strip()
nspace=line.split("\t")
if len(nspace)==3:
meshID=nspace[1]
term=nspace[2].lower()
dict[term]=meshID
except IndexError as dicterror:
print str("MeSH Dictionary error")
self.meshDict = dict
def queryMeSHSPARQL(self):
#sparql = rdflib.URIRef("http://id.nlm.nih.gov/mesh/sparql")
sparql=SPARQLWrapper("http://id.nlm.nih.gov/mesh/sparql")
sparql.setQuery("""
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX meshv: <http://id.nlm.nih.gov/mesh/vocab#>
PREFIX mesh: <http://id.nlm.nih.gov/mesh/>
PREFIX mesh2015: <http://id.nlm.nih.gov/mesh/2015/>
PREFIX mesh2016: <http://id.nlm.nih.gov/mesh/2016/>
PREFIX mesh2017: <http://id.nlm.nih.gov/mesh/2017/>
SELECT ?d
FROM <http://id.nlm.nih.gov/mesh>
WHERE {
?d a meshv:Descriptor .
?d meshv:concept ?c .
?c meshv:term ?term .
?term rdfs:label ?label .
FILTER (REGEX(?label,"^insomnia$","i"))
}
""")
sparql.setReturnFormat(JSON)
result= sparql.query().convert()
print result
def getDict(self,filename):
dict={}
for line in open(filename,'r'):
nspace=line.split()
dict[nspace[0]]=nspace[1]
return dict
#LOAD reaction DICTIONARY
def getReactionDict(self):
reactionDict={}
for line in open("inputs/reactionDictFile.txt",'r'):
nspace=line.split()
reactionDict[nspace[0]]=nspace[1]
return reactionDict
def termToIdentifier(self,belnamespace,term):
if "GO" in belnamespace:
term=namespaceTranslator.getGoIDfromTerm(term)
elif "CHEM" in belnamespace:
term=namespaceTranslator.getChemicalIDfromTerm(term)
# else:
# idAndElement=getIDandNamespacefromAbundance("UNKNOWN",term)
# term=idAndElement[0]
return term
#Get GOID from reflect
def getGoIDfromTerm(self,term):
goID= term
#print term
reflect_client = reflectclient.ReflectClient()
GOresult=reflect_client.get_entities(term, [EntityType.biological_process,EntityType.cellular_component,EntityType.molecular_function])
if len(GOresult) >0:
goID=GOresult[0]['entities'][0]['identifier']
return goID
def getIDandNamespacefromAbundance(self,term,elementName):
termId=term
#elementName="UNKNOWN"
idAndelement=[]
#check GO
reflect_client = reflectclient.ReflectClient()
# check for GO cellular component ->Complex
GOresult = reflect_client.get_entities(term, [EntityType.cellular_component,EntityType.biological_process, EntityType.molecular_function])
if len(GOresult) > 0 :
termId = GOresult[0]['entities'][0]['identifier']
entityType=GOresult[0]['entities'][0]['type']
if entityType == "-21" or entityType == "-23":
elementName = "PHENOTYPE"
elif entityType == "-22":
elementName = "COMPLEX"
#not found in GO, so check in mesh
if termId is term:
print termId +" : here"
meshID=self.getMeshIDfromTerm(term)
if meshID is not term:
termId=meshID
elementName="PHENOTYPE"
idAndelement=[termId,elementName]
#print idAndelement
return idAndelement
#Get pubchemID from reflect
def getChemicalIDfromTerm(self,term):
chemID= term
reflect_client = reflectclient.ReflectClient()
chemResult=reflect_client.get_entities(term, [EntityType.chemicals])
if len(chemResult) >0:
chemID=chemResult[0]['entities'][0]['identifier']
return chemID
#Get meshID from reflect
def getMeshIDfromTerm(self,term):
mesh_id=term
lowerTerm=term.lower()
mDict = self.meshDict #namespaceTranslator.getMeSHDict()
mesh_id = mDict[lowerTerm] if mDict.has_key(lowerTerm) is True else term
return mesh_id
def getMeSHDict(self):
dict={}
filename="inputs/2017MeshTree.txt"
try:
# input file fetch from :https://www.nlm.nih.gov/mesh/2017/download/2017MeshTree.txt
if not os.path.exists(filename):
urllib.urlretrieve("https://www.nlm.nih.gov/mesh/2017/download/2017MeshTree.txt", filename)
else:
for line in open(filename,'r'):
line=line.replace('\x00','')
line=line.strip()
nspace=line.split("\t")
if len(nspace)==3:
meshID=nspace[1]
term=nspace[2].lower()
dict[term]=meshID
except IndexError as dicterror:
print str("MeSH Dictionary error")
return dict
#getMeshIDfromTerm("Drug-Induced Liver Injury")
# getGoIDfromTerm("leukotriene production involved in inflammatory response")
#getGoIDfromTerm("Stress, Physiological")
# getChemicalIDfromTerm("lipopigment")
#getIDandNamespacefromAbundance("synaptic transmission","UNKNOWN")
#print getIDandNamespacefromAbundance("gamma Secretase Complex","UNKNOWN")
#print getIDandNamespacefromAbundance("Insomnia","PHENOTYPE")
# getIDandNamespacefromAbundance("Drug-Induced Liver Injury","UNKNOWN")
# getIDandNamespacefromAbundance("Polymorphism, Single Nucleotide","UNKNOWN")
NT= namespaceTranslator()
print NT.getMeshIDfromTerm("Oligodendroglia")
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment