Gitlab is now using https://gitlab.lcsb.uni.lu as it's primary address. Please update your bookmarks. FAQ.

Commit 9b42c347 authored by Aishwarya Alex's avatar Aishwarya Alex
Browse files

UPdated mapping of location and tissues and online source fixed

parent deb2f298
......@@ -12,7 +12,7 @@ SH-SY5Y CELL
Synapse COMPARTMENT
Synapses Synapse
Centrosome COMPARTMENT
App transgenic TISSUE
App transgenic CELL
Tissues
Beta cell CELL
primary cortical neuron CELL
......@@ -49,7 +49,7 @@ Endosome COMPARTMENT
trans-Golgi Network COMPARTMENT
Extracellular Space COMPARTMENT
Neurite COMPARTMENT
Neurites Neurites
Neurites Neurite
Blood Blood Cells
Blood Cells CELL
Bone Marrow Cells CELL
......@@ -57,3 +57,61 @@ Cell Membrane Structures Cell Membrane
Cerebrospinal Fluid TISSUE
Lysosome COMPARTMENT
Lysosomes Lysosome
Blood-Brain Barrier TISSUE
NERVOUSSYSTEM:Blood-Brain Barrier Blood-Brain Barrier
Cholinergic Neurons CELL
NERVOUSSYSTEM:Cholinergic Neurons Cholinergic Neurons
Dopaminergic Neurons CELL
NERVOUSSYSTEM:Dopaminergic Neurons Dopaminergic Neurons
NERVOUSSYSTEM:Nervous System TISSUE Nervous System
NERVOUSSYSTEM:Neurofibrillary Tangles COMPARTMENT Neurofibrillary Tangles
NERVOUSSYSTEM:Prefrontal Cortex TISSUE Prefrontal Cortex
NERVOUSSYSTEM:Prosencephalon TISSUE Prosencephalon
PATIENT:AD T2DM -ve ORGANISM PATIENT:AD T2DM -ve
USERDEFINEDCELLLINE:NT2N cells CELL NT2N cells
FDASTATUS:Phase 3
NERVOUSSYSTEM:CA1 Region, Hippocampal TISSUE CA1 Region, Hippocampal
NERVOUSSYSTEM:Cerebellum TISSUE Cerebellum
NERVOUSSYSTEM:CA2 Region, Hippocampal TISSUE CA2 Region, Hippocampal
NERVOUSSYSTEM:Neurites Neurite
NERVOUSSYSTEM:Spinal Cord TISSUE Spinal Cord
USERDEFINEDCELL:Astrocyte Astrocytes
USERDEFINEDCELLLINE:CHOAPPsw CHOAPPsw
NERVOUSSYSTEM:Interneurons CELL Interneurons
FLUIDANDSECRETION:Bodily Secretions TISSUE Bodily Secretions
FLUIDANDSECRETION:Serum TISSUE Serum
NERVOUSSYSTEM:Astrocytes Astrocytes
NERVOUSSYSTEM:Central Nervous System TISSUE Central Nervous System
USERDEFINEDCELLLINE:Neuroblastoma cell Neuroblastoma cell
USERDEFINEDCELLLINE:primary neuron primary neuron
NERVOUSSYSTEM:Autonomic Fibers, Postganglionic TISSUE Autonomic Fibers, Postganglionic
NERVOUSSYSTEM:Hypothalamus TISSUE Hypothalamus
CARDIOVASCULARSYSTEM:Blood Vessels TISSUE Blood vessels
CARDIOVASCULARSYSTEM:Microvessels TISSUE Microvessels
FDASTATUS:Phase 2
USERDEFINEDCELLLINE:N2a695 cell CELL N2a695 cell
NERVOUSSYSTEM:Microglia CELL Microglia
DISEASESTATE:Mild AD
PATIENT:AD T2DM +ve ORGANISM PATIENT:AD T2DM +ve
PATIENT:APOE e4 +ve ORGANISM PATIENT:APOE e4 +ve
NERVOUSSYSTEM:Cerebral Cortex TISSUE Cerebral Cortex
NERVOUSSYSTEM:Motor Neurons CELL Motor Neurons
USERDEFINEDCELLLINE:App transgenic App transgenic
DIGESTIVESYSTEM:Islets of Langerhans TISSUE Islets of Langerhans
USERDEFINEDCELLLINE:primary cortical neuron primary cortical neuron
USERDEFINEDSPECIES:C elegans ORGANISM C elegans
NERVOUSSYSTEM:Cerebrum TISSUE Cerebrum
USERDEFINEDCELLLINE:INS-1 cells INS-1 cells
FLUIDANDSECRETION:Cerebrospinal Fluid Cerebrospinal Fluid
NERVOUSSYSTEM:Hippocampus TISSUE Hippocampus
FLUIDANDSECRETION:Plasma TISSUE Plasma
USERDEFINEDCELLLINE:IDE APP transgenic CELL IDE APP transgenic
KNOCKOUTMICE:IDE KO mice ORGANISM IDE KO mice
USERDEFINEDSPECIES:drosophila ORGANISM drosophila
USERDEFINEDGENDER:Female
FLUIDANDSECRETION:Blood Blood Cells
DIGESTIVESYSTEM:Liver TISSUE Liver
USERDEFINEDCELL:Beta cell Beta cell
NERVOUSSYSTEM:Neurons Neurons
PATIENT:APOE e4 -ve ORGANISM PATIENT:APOE e4 -ve
NERVOUSSYSTEM:Brain Brain
HGNC urn:miriam:hgnc.symbol
RGD urn:miriam:rgd.symbol
MGI urn:miriam:mgi.symbol
PubMed urn:miriam:pubmed
RGD urn:miriam:rgd
MGI urn:miriam:mgd:MGI
GO urn:miriam:obo.go
MESH urn:miriam:mesh.2012
CHEBIID urn:miriam:chebi:CHEBI
CHEBI urn:miriam:chebi:CHEBI
GOCCTERM urn:miriam:obo.go
ENSMBL urn:miriam:ensembl
\ No newline at end of file
ENSMBL urn:miriam:ensembl
PubMed urn:miriam:pubmed
\ No newline at end of file
......@@ -12,30 +12,52 @@
# Import the modules needed to run the script.
from future.standard_library import install_aliases
install_aliases()
import reflectclient, os, urllib, sqlite3, requests,logging,re,codecs
import reflectclient, os, urllib, sqlite3, requests,logging,re,json
import httplib2 as http
from io import open
from reflectclient.config import EntityType
from SPARQLWrapper import SPARQLWrapper, JSON
import xml.etree.ElementTree as ET
import libchebipy as CB
import xml.etree.ElementTree as et
import libchebipy as cb
class namespaceTranslator:
# LOAD NAMESPACE DICTIONARY
def __init__(self):
''' Constructor for this class. '''
# Create some member animals
try:
cursor = self.createMeSHDB()
self.meshCursor = cursor
self.from_abund_dict={}
except IndexError as dicterror:
logging.info(str("MeSH Dictionary error"))
# self.meshDict = dict
def get_reference (self, referencetype, reference):
if referencetype.lower()== "pubmed":
referencetype= "urn:miriam:pubmed"
elif referencetype.lower() == "online resource" and re.match("^DB[0-9]+$", reference):
referencetype= "urn:miriam:drugbank"
elif referencetype.lower() == "online resource" and re.match("^REACT_[0-9]+\.?", reference):
referencetype = "urn:miriam:reactome"
elif referencetype.lower() == "online resource" and re.match("^hsa[0-9]+", reference):
referencetype = "urn:miriam:kegg.pathway"
elif referencetype.lower() == "online resource" and re.match("^ATC Code: N[0-9A-Z]+$", reference):
referencetype= "urn:miriam:atc"
reference=reference.strip("ATC Code: ")
elif referencetype.lower() == "online resource" and re.match("^ATC Code: DB[0-9]+$", reference):
reference = reference.strip("ATC Code: ")
referencetype = "urn:miriam:drugbank"
finalreference= referencetype + ":" + reference
return finalreference
def queryMeSHSPARQL(self, term):
sparql = SPARQLWrapper("http://id.nlm.nih.gov/mesh/sparql", )
preparedQuery = """
......@@ -74,6 +96,47 @@ class namespaceTranslator:
meshResult = result['results']['bindings'][0]['d']['value'].split("/")[-1]
meshID = meshResult if meshResult is not None else None
return meshID
def get_mgi_rgd_id_from_hgnc_symbol(self,hgncsymbol,id_type):
try:
from urllib.parse import urlparse, urlencode
headers = {
'Accept': 'application/json',
}
uri = 'http://rest.genenames.org'
path = '/fetch/symbol/'+str(hgncsymbol)
target = urlparse(uri + path)
method = 'GET'
body = ''
h = http.Http()
response, content = h.request(target.geturl(), method,body,headers)
id=hgncsymbol
if response['status'] == '200':
# assume that content is a json reply
# parse content with the json module
data =json.loads(content.decode())
#print("response status is 200")
if data['response']['numFound']==1:
id=str(data['response']['docs'][0][id_type][0].split(":")[1])
#print (id + "in func")
return(id)
else:
logging.info('Error while fetching MGI or RGD ID detected: ' + response['status'])
return hgncsymbol
except Exception as otherex:
print("here : ")
print(otherex.args)
logging.info('Error while fetching MGI or RGD ID detected,returning hgnc symbol ')
return hgncsymbol
def get_locationMap_from_mapping_file(self,filename):
mergeDict={}
......@@ -81,13 +144,20 @@ class namespaceTranslator:
locFile=open(filename,'r')
for line in locFile:
locList=line.strip("\n").split("\t")
#print (locList)
term=locList[0]
locationType=locList[1]
mergeTerm=locList[2]
if len(locList)<3:
print (locList)
locationType=locList[1] if len(locList)==3 else ""
mergeTerm=locList[2] if len(locList)==3 else ""
#print (term +" --> "+mergeTerm+" # "+locationType)
if mergeTerm!="":
mergeDict[term]=mergeTerm
else:
if locationType!="":
locationMap[mergeTerm] = locationType
elif locationType !="" and locList[2]=="":
locationMap[term]=locationType
#print(term + " --> " + mergeTerm + " # " + locationType+"\n\n")
locationMapList=[mergeDict,locationMap]
return locationMapList
......@@ -101,7 +171,7 @@ class namespaceTranslator:
# LOAD reaction DICTIONARY
def getReactionDict(self):
reactionDict = {}
for line in open("inputs/reactionDictFile.txt", 'r'):
for line in open("inputs/reactionMapping.txt", 'r'):
nspace = line.split()
reactionDict[nspace[0]] = nspace[1]
return reactionDict
......@@ -143,42 +213,48 @@ class namespaceTranslator:
try:
termId = term
idAndelement = []
reflect_client = reflectclient.ReflectClient()
# check if protein : eg: Amyloid beta peptides- abundance/ protein abundance w/o namespace
if elementName == "PROTEIN":
logging.info("Connecting to reflectclient . . .")
POresult = reflect_client.get_entities(term, [EntityType.proteins])
if len(POresult) > 0:
logging.info(POresult)
termId = POresult[0]['entities'][0]['identifier']
entityType = POresult[0]['entities'][0]['type']
if entityType == '9606':
elementName = "PROTEIN"
# check for GO cellular component ->Complex, biological process, moelcular function -> phenotype
if term in self.from_abund_dict.keys():
idAndelement=self.from_abund_dict[term]
else:
logging.info("Checking in reflect client, protein and go")
GOresult = reflect_client.get_entities(term,
[EntityType.cellular_component, EntityType.biological_process,
EntityType.molecular_function])
if len(GOresult) > 0:
logging.info("Returns in GO term")
termId = GOresult[0]['entities'][0]['identifier']
entityType = GOresult[0]['entities'][0]['type']
if entityType == "-21" or entityType == "-23":
elementName = "PHENOTYPE"
elif entityType == "-22":
elementName = "COMPLEX"
elif entityType == "9606":
elementName = "PROTEIN"
# if element is not GO term or Protein , check in MeSH
reflect_client = reflectclient.ReflectClient()
# check if protein : eg: Amyloid beta peptides- abundance/ protein abundance w/o namespace
if elementName == "PROTEIN":
logging.info("Connecting to reflectclient . . .")
POresult = reflect_client.get_entities(term, [EntityType.proteins])
if len(POresult) > 0:
logging.info(POresult)
termId = POresult[0]['entities'][0]['identifier']
entityType = POresult[0]['entities'][0]['type']
if entityType == '9606':
elementName = "PROTEIN"
# check for GO cellular component ->Complex, biological process, moelcular function -> phenotype
else:
# meshID=self.getMeshIDfromTermDict(termId)
meshID = self.getMeshIDfromdb(self.meshCursor, termId)
if meshID is not None:
termId = meshID
elementName = "PHENOTYPE"
logging.info("Checking in reflect client, protein and go")
GOresult = reflect_client.get_entities(term,
[EntityType.cellular_component, EntityType.biological_process,
EntityType.molecular_function])
if len(GOresult) > 0:
logging.info("Returns in GO term")
termId = GOresult[0]['entities'][0]['identifier']
entityType = GOresult[0]['entities'][0]['type']
if entityType == "-21" or entityType == "-23":
elementName = "PHENOTYPE"
elif entityType == "-22":
elementName = "COMPLEX"
elif entityType == "9606":
elementName = "PROTEIN"
# if element is not GO term or Protein , check in MeSH
else:
# meshID=self.getMeshIDfromTermDict(termId)
meshID = self.getMeshIDfromdb(self.meshCursor, termId)
if meshID is not None:
termId = meshID
elementName = "PHENOTYPE"
idAndelement = [termId, elementName]
self.from_abund_dict[term]=idAndelement
idAndelement = [termId, elementName]
return idAndelement
except requests.ConnectionError as err:
logging.info("ReflectCLient connection error: " + err)
......@@ -203,12 +279,12 @@ class namespaceTranslator:
try:
chebiid=chebiid.replace(" ","")
if re.match("^CHEBIID:[0-9]+$", chebiid) is not None:
cb=CB.ChebiEntity(chebiid)
c=cb.ChebiEntity(chebiid)
elif re.match("^[0-9]+$",chebiid):
chebiid="CHEBI:"+chebiid
cb = CB.ChebiEntity(chebiid)
return cb.get_name()
c = cb.ChebiEntity(chebiid)
return c.get_name()
except Exception as otherEx:
logging.info("CEHBI library error.. ignoring entity")
return None
......@@ -278,7 +354,7 @@ class namespaceTranslator:
logging.info (
"Creating text file, to store extracted terms and uniqueID from xml, to be used in future runs ...")
txtFile = open(meshtextFilename, 'w')
tree = ET.parse(xmlfilename)
tree = et.parse(xmlfilename)
root = tree.getroot()
for descriptorRec in root.findall("DescriptorRecord"):
meshID = descriptorRec.find("DescriptorUI").text
......@@ -347,7 +423,7 @@ class namespaceTranslator:
logging.info (
"Creating text file, to store extracted terms and uniqueID from xml, to be used in future runs ...")
txtFile = open(meshtextFilename, 'w')
tree = ET.parse(xmlfilename)
tree = et.parse(xmlfilename)
root = tree.getroot()
for descriptorRec in root.findall("DescriptorRecord"):
meshID = descriptorRec.find("DescriptorUI").text
......@@ -398,7 +474,6 @@ class namespaceTranslator:
logging.info(ex, ex.args)
#NT= namespaceTranslator()
# NT.getChemicalTermfromCHEBIID("26523")
# logging.info (NT.getMeshIDfromdb(NT.meshCursor,"insomnia"))
# NT=namespaceTranslator()
# NT.get_mgi_rgd_id_from_hgnc_symbol("RETN","mgd_id")
# NT.get_mgi_rgd_id_from_hgnc_symbol("RETN","rgd_id")
\ No newline at end of file
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment