diff --git a/dummy-service/dummyService.py b/dummy-service/dummyService.py index 7459b537f608ccc005e8c5dd2330b1210d04f5a3..842baec2fd56a27d60f2a6020448a923e1902980 100644 --- a/dummy-service/dummyService.py +++ b/dummy-service/dummyService.py @@ -2,6 +2,7 @@ import logging from typing import List from interface.TextMiningService import TextMiningService, Publication +from models.coocurrence import CoOccurrence logger = logging.getLogger(__name__) @@ -13,7 +14,7 @@ class DummyService(TextMiningService): def get_mentions(self, entities: List, limit: int = 20) -> List[Publication]: return [Publication(pm_id="00000" + str(i)) for i in range(20)] - def get_co_occurrences(self, entity: str) -> List[str]: + def get_co_occurrences(self, entity: str, limit: int = 20) -> List[CoOccurrence]: pass diff --git a/interface/TextMiningService.py b/interface/TextMiningService.py index 123fc5cfef6ae776c45de499cf0ea1813b66d9f3..8d90c300067b24a763f411b7a51ed6da04b19c81 100644 --- a/interface/TextMiningService.py +++ b/interface/TextMiningService.py @@ -1,7 +1,8 @@ from abc import ABCMeta, abstractmethod from typing import List -from models.publication import Publication + from models.coocurrence import CoOccurrence +from models.publication import Publication class TextMiningService(metaclass=ABCMeta): diff --git a/jensenlab-service/__init__.py b/jensenlab-service/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9c847747453665c332c6dd80107c138189275a80 --- /dev/null +++ b/jensenlab-service/__init__.py @@ -0,0 +1,4 @@ +import logging + +logger = logging.getLogger(__name__) + \ No newline at end of file diff --git a/jensenlab-service/jensenLabService.py b/jensenlab-service/jensenLabService.py new file mode 100644 index 0000000000000000000000000000000000000000..a3a77e00ec099c2ee246bc31897b3ee3640d4a9d --- /dev/null +++ b/jensenlab-service/jensenLabService.py @@ -0,0 +1,63 @@ +import json +import logging +from typing import List + +import requests + +from TextMiningService import TextMiningService, Publication + +logger = logging.getLogger(__name__) + + +class JensenLabService(TextMiningService): + BASE_URL = "https://api.jensenlab.org" + MENTION_URL = BASE_URL + "/Mentions?type={}&id={}&limit={}&format=json" + IDS_MAPPING = { + "CID": -1, + "BTO": -25, + "DOID": -26, + } + + def __init__(self): + super().__init__("JensenLabService", "Text-Mining api available at api.jensenlab.org") + + def get_mentions(self, entities: List, limit: int = 20) -> List[Publication]: + entities_and_types = self.guess_types_for_entities(entities) + publications_ids = [] + for (entity, entity_type) in entities_and_types: + publications_ids.append(self.get_mentions_for_entity(entity, entity_type, limit)) + publications_ids_intersection = set.intersection(*publications_ids) + return [Publication(pm_id=pid) for pid in publications_ids_intersection] + + def get_co_occurrences(self, entity: str, limit: int = 20) -> List[str]: + pass + + @staticmethod + def guess_types_for_entities(entities): + results = [] + for entity in entities: + entity_type = JensenLabService.guess_type_for_entity(entity) + results.append((entity, entity_type)) + return results + + @staticmethod + def guess_type_for_entity(entity): + for prefix, entity_type in JensenLabService.IDS_MAPPING.items(): + if entity.startswith(prefix): + return entity_type + return -1 + + def get_mentions_for_entity(self, entity, entity_type, limit): + url_mentions = JensenLabService.MENTION_URL.format(entity_type, entity, limit) + results = requests.get(url_mentions) + assert results.ok + publications_string = results.content.decode().strip().replace('True', 'true').replace('False', 'false') + publications_list, has_more = json.loads(publications_string) + return set(publications_list) + + +if __name__ == '__main__': + text_mining_service = JensenLabService() + print("Using service {}".format(text_mining_service.name)) + publications = text_mining_service.get_mentions(["DOID:10652", "DOID:10654"], limit=1000000) + print(", ".join([p.pm_id for p in publications]))