Skip to content
Snippets Groups Projects
Commit c60b8acd authored by Valentin Groues's avatar Valentin Groues :eyes:
Browse files

add integration example

parent cdec42d6
No related branches found
No related tags found
No related merge requests found
import logging
from typing import List
from utils import uri_to_entity_code, standarise_underscored_entity_code
from SPARQLWrapper import SPARQLWrapper, JSON, POSTDIRECTLY
from SPARQLWrapper.SPARQLExceptions import QueryBadFormed
from interface.TextMiningService import TextMiningService
from models.publication import Publication
from models.coocurrence import CoOccurrence
from SPARQLWrapper import SPARQLWrapper, JSON, POSTDIRECTLY
from SPARQLWrapper.SPARQLExceptions import EndPointNotFound, EndPointInternalError, QueryBadFormed
from models.publication import Publication
from .utils import uri_to_entity_code, standarise_underscored_entity_code
import logging
logger = logging.getLogger(__name__)
......@@ -18,7 +20,7 @@ class MalformedQueryException(BioKBClientException):
pass
class BioKBservice(TextMiningService):
class BioKBService(TextMiningService):
def __init__(self, sparql_url="http://10.240.6.71:8890/sparql"):
self.sparql = SPARQLWrapper(sparql_url)
self.sparql.setRequestMethod(POSTDIRECTLY)
......@@ -99,7 +101,8 @@ class BioKBservice(TextMiningService):
GROUP BY ?other_entity
} ORDER BY DESC(?count) LIMIT %LIMIT%
""".replace('%ENTITY%', entity).replace('%LIMIT%', str(limit)).replace('%ENTITY_TYPE_FILTER%', entity_types_filter)
""".replace('%ENTITY%', entity).replace('%LIMIT%', str(limit)).replace('%ENTITY_TYPE_FILTER%',
entity_types_filter)
results = self._run_sparql_query(query)
values = []
values = []
......@@ -112,9 +115,9 @@ class BioKBservice(TextMiningService):
if __name__ == "__main__":
bkb = BioKBservice()
bkb = BioKBService()
print(bkb.get_co_occurrences('DOID:2841', types=[
'http://lcsb.uni.lu/biokb#Disease']))
'http://lcsb.uni.lu/biokb#Disease']))
print('')
print(bkb.get_co_occurrences('DOID:2841'))
print('')
......
import logging
logger = logging.getLogger(__name__)
\ No newline at end of file
import logging
from biokb.biokb import BioKBService
from jensenLabService import JensenLabService
logger = logging.getLogger(__name__)
if __name__ == '__main__':
text_mining_services = [JensenLabService(), BioKBService()]
for text_mining_service in text_mining_services:
print("Using service {}".format(text_mining_service.name))
publications = text_mining_service.get_mentions(["DOID:10652", "DOID:10935"], limit=100)
print(", ".join([str(p) for p in publications]))
......@@ -10,6 +10,7 @@ logger = logging.getLogger(__name__)
class JensenLabService(TextMiningService):
LIMIT_PER_ENTITY = 50000
BASE_URL = "https://api.jensenlab.org"
MENTION_URL = BASE_URL + "/Mentions?type={}&id={}&limit={}&format=json"
IDS_MAPPING = {
......@@ -24,10 +25,14 @@ class JensenLabService(TextMiningService):
def get_mentions(self, entities: List, limit: int = 20) -> List[Publication]:
entities_and_types = self.guess_types_for_entities(entities)
publications_ids = []
if len(entities) == 1:
limit_per_entity = limit
else:
limit_per_entity = JensenLabService.LIMIT_PER_ENTITY
for (entity, entity_type) in entities_and_types:
publications_ids.append(self.get_mentions_for_entity(entity, entity_type, limit))
publications_ids.append(self.get_mentions_for_entity(entity, entity_type, limit=limit_per_entity))
publications_ids_intersection = set.intersection(*publications_ids)
return [Publication(pm_id=pid) for pid in publications_ids_intersection]
return [Publication(pm_id=pid) for pid in publications_ids_intersection][0:limit]
def get_co_occurrences(self, entity: str, limit: int = 20) -> List[str]:
pass
......@@ -47,7 +52,8 @@ class JensenLabService(TextMiningService):
return entity_type
return -1
def get_mentions_for_entity(self, entity, entity_type, limit):
@staticmethod
def get_mentions_for_entity(entity, entity_type, limit):
url_mentions = JensenLabService.MENTION_URL.format(entity_type, entity, limit)
results = requests.get(url_mentions)
assert results.ok
......@@ -59,5 +65,5 @@ class JensenLabService(TextMiningService):
if __name__ == '__main__':
text_mining_service = JensenLabService()
print("Using service {}".format(text_mining_service.name))
publications = text_mining_service.get_mentions(["DOID:10652", "DOID:10654"], limit=1000000)
publications = text_mining_service.get_mentions(["DOID:10652", "DOID:10935"], limit=1000000)
print(", ".join([p.pm_id for p in publications]))
class Publication:
def __init__(self, pmc_id=None, pm_id=None, doi=None, preprint_id=None, other_id=None):
self.pmc_id = pmc_id
self.pm_id = pm_id
self.pm_id = str(pm_id)
self.doi = doi
self.preprint_id = preprint_id
self.other_id = other_id
def __repr__(self):
ids = [("PMC_ID", self.pmc_id), ("PM_ID", self.pm_id), ("DOI", self.doi),
ids = [("PMC_ID", self.pmc_id), ("PUBMED_ID", self.pm_id), ("DOI", self.doi),
("PREPRINT ID", self.preprint_id), ("OTHER ID", self.other_id)]
return ' '.join([str(id) for id in ids if id[1] is not None])
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment