Newer
Older
import json
import logging
from typing import List
import requests
from TextMiningService import TextMiningService, Publication
logger = logging.getLogger(__name__)
class JensenLabService(TextMiningService):
BASE_URL = "https://api.jensenlab.org"
MENTION_URL = BASE_URL + "/Mentions?type={}&id={}&limit={}&format=json"
IDS_MAPPING = {
"CID": -1,
"BTO": -25,
"DOID": -26,
}
def __init__(self):
super().__init__("JensenLabService", "Text-Mining api available at api.jensenlab.org")
def get_mentions(self, entities: List, limit: int = 20) -> List[Publication]:
entities_and_types = self.guess_types_for_entities(entities)
publications_ids = []
if len(entities) == 1:
limit_per_entity = limit
else:
limit_per_entity = JensenLabService.LIMIT_PER_ENTITY
for (entity, entity_type) in entities_and_types:
publications_ids.append(self.get_mentions_for_entity(entity, entity_type, limit=limit_per_entity))
publications_ids_intersection = set.intersection(*publications_ids)
return [Publication(pm_id=pid) for pid in publications_ids_intersection][0:limit]
def get_co_occurrences(self, entity: str, limit: int = 20) -> List[str]:
pass
@staticmethod
def guess_types_for_entities(entities):
results = []
for entity in entities:
entity_type = JensenLabService.guess_type_for_entity(entity)
results.append((entity, entity_type))
return results
@staticmethod
def guess_type_for_entity(entity):
for prefix, entity_type in JensenLabService.IDS_MAPPING.items():
if entity.startswith(prefix):
return entity_type
return -1
@staticmethod
def get_mentions_for_entity(entity, entity_type, limit):
url_mentions = JensenLabService.MENTION_URL.format(entity_type, entity, limit)
results = requests.get(url_mentions)
assert results.ok
publications_string = results.content.decode().strip().replace('True', 'true').replace('False', 'false')
publications_list, has_more = json.loads(publications_string)
return set(publications_list)
if __name__ == '__main__':
text_mining_service = JensenLabService()
print("Using service {}".format(text_mining_service.name))
publications = text_mining_service.get_mentions(["DOID:10652", "DOID:10935"], limit=1000000)
print(", ".join([p.pm_id for p in publications]))