Gitlab is now using https://gitlab.lcsb.uni.lu as it's primary address. Please update your bookmarks. FAQ.

Commit b0f8c801 authored by Aishwarya Alex's avatar Aishwarya Alex
Browse files

SPARQL addition

parent 6199cfca
......@@ -13,9 +13,42 @@
# Import the modules needed to run the script.
import MySQLdb, reflectclient, os,urllib
from reflectclient.config import EntityType
#LOAD NAMESPACE DICTIONARY
import rdflib
from SPARQLWrapper import SPARQLWrapper, JSON, XML, N3, RDF
#LOAD NAMESPACE DICTIONARY
def queryMeSHSPARQL():
#sparql = rdflib.URIRef("http://id.nlm.nih.gov/mesh/sparql")
sparql=SPARQLWrapper("http://id.nlm.nih.gov/mesh/sparql")
sparql.setQuery("""
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX meshv: <http://id.nlm.nih.gov/mesh/vocab#>
PREFIX mesh: <http://id.nlm.nih.gov/mesh/>
PREFIX mesh2015: <http://id.nlm.nih.gov/mesh/2015/>
PREFIX mesh2016: <http://id.nlm.nih.gov/mesh/2016/>
PREFIX mesh2017: <http://id.nlm.nih.gov/mesh/2017/>
SELECT ?d
FROM <http://id.nlm.nih.gov/mesh>
WHERE {
?d a meshv:Descriptor .
?d meshv:concept ?c .
?c meshv:term ?term .
?term rdfs:label ?label .
FILTER (REGEX(?label,"^insomnia$","i"))
}
""")
sparql.setReturnFormat(JSON)
result= sparql.query().convert()
print result
def getDict(filename):
dict={}
......@@ -38,8 +71,9 @@ def termToIdentifier(belnamespace,term):
term=getGoIDfromTerm(term)
elif "CHEM" in belnamespace:
term=getChemicalIDfromTerm(term)
elif belnamespace is "":
# else:
# idAndElement=getIDandNamespacefromAbundance("UNKNOWN",term)
# term=idAndElement[0]
return term
#Get GOID from reflect
......@@ -51,18 +85,35 @@ def getGoIDfromTerm(term):
if len(GOresult) >0:
goID=GOresult[0]['entities'][0]['identifier']
#print goID
return goID
def getIDandNamespacefromAbundance(term):
def getIDandNamespacefromAbundance(term,elementName):
termId=term
#elementName="UNKNOWN"
idAndelement=[]
#check GO
reflect_client = reflectclient.ReflectClient()
GOresult = reflect_client.get_entities(term, [EntityType.biological_process, EntityType.cellular_component,
EntityType.molecular_function])
# check for GO cellular component ->Complex
GOresult = reflect_client.get_entities(term, [EntityType.cellular_component,EntityType.biological_process, EntityType.molecular_function])
if len(GOresult) > 0 :
termId = GOresult[0]['entities'][0]['identifier']
entityType=GOresult[0]['entities'][0]['type']
if entityType == "-21" or entityType == "-23":
elementName = "PHENOTYPE"
elif entityType == "-22":
elementName = "COMPLEX"
#not found in GO, so check in mesh
if termId is term:
print termId +" : here"
meshID=getMeshIDfromTerm(term)
if meshID is not term:
termId=meshID
elementName="PHENOTYPE"
idAndelement=[termId,elementName]
#print idAndelement
return idAndelement
if len(GOresult) > 0:
goID = GOresult[0]['entities'][0]['identifier']
# print goID
#Get pubchemID from reflect
def getChemicalIDfromTerm(term):
......@@ -75,12 +126,10 @@ def getChemicalIDfromTerm(term):
#Get meshID from reflect
def getMeshIDfromTerm(term):
mesh_id= term
reflect_client = reflectclient.ReflectClient()
meshResult=reflect_client.get_entities(term, [EntityType.diseases])
print meshResult
if len(meshResult) >0:
mesh_id=meshResult[0]['entities'][0]['identifier']
mesh_id=term
lowerTerm=term.lower()
mDict = getMeSHDict()
mesh_id = mDict[lowerTerm] if mDict.has_key(lowerTerm) is True else term
return mesh_id
......@@ -100,14 +149,20 @@ def getMeSHDict():
nspace=line.split("\t")
if len(nspace)==3:
meshID=nspace[1]
term=nspace[2]
term=nspace[2].lower()
dict[term]=meshID
except IndexError as dicterror:
print str("here")
print str("MeSH Dictionary error")
return dict
#getMeshIDfromTerm("Drug-Induced Liver Injury")
# getGoIDfromTerm("leukotriene production involved in inflammatory response")
#getGoIDfromTerm("inflammatory response")
#getGoIDfromTerm("Stress, Physiological")
# getChemicalIDfromTerm("lipopigment")
#getIDandNamespacefromAbundance("synaptic transmission","UNKNOWN")
#print getIDandNamespacefromAbundance("gamma Secretase Complex","UNKNOWN")
#print getIDandNamespacefromAbundance("Insomnia","PHENOTYPE")
# getIDandNamespacefromAbundance("Drug-Induced Liver Injury","UNKNOWN")
# getIDandNamespacefromAbundance("Polymorphism, Single Nucleotide","UNKNOWN")
queryMeSHSPARQL()
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment