Skip to content
Snippets Groups Projects
Commit 49deb144 authored by Carlos Vega's avatar Carlos Vega
Browse files

Merge branch '11-text-mining-sw' of...

Merge branch '11-text-mining-sw' of github.com:elixir-europe/BioHackathon-projects-2019 into 11-text-mining-sw
parents ee0897ed 86307759
No related branches found
No related tags found
No related merge requests found
...@@ -2,6 +2,7 @@ import logging ...@@ -2,6 +2,7 @@ import logging
from typing import List from typing import List
from interface.TextMiningService import TextMiningService, Publication from interface.TextMiningService import TextMiningService, Publication
from models.coocurrence import CoOccurrence
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -13,7 +14,7 @@ class DummyService(TextMiningService): ...@@ -13,7 +14,7 @@ class DummyService(TextMiningService):
def get_mentions(self, entities: List, limit: int = 20) -> List[Publication]: def get_mentions(self, entities: List, limit: int = 20) -> List[Publication]:
return [Publication(pm_id="00000" + str(i)) for i in range(20)] return [Publication(pm_id="00000" + str(i)) for i in range(20)]
def get_co_occurrences(self, entity: str) -> List[str]: def get_co_occurrences(self, entity: str, limit: int = 20) -> List[CoOccurrence]:
pass pass
......
from abc import ABCMeta, abstractmethod from abc import ABCMeta, abstractmethod
from typing import List from typing import List
from models.publication import Publication
from models.coocurrence import CoOccurrence from models.coocurrence import CoOccurrence
from models.publication import Publication
class TextMiningService(metaclass=ABCMeta): class TextMiningService(metaclass=ABCMeta):
......
import logging
logger = logging.getLogger(__name__)
\ No newline at end of file
import json
import logging
from typing import List
import requests
from TextMiningService import TextMiningService, Publication
logger = logging.getLogger(__name__)
class JensenLabService(TextMiningService):
BASE_URL = "https://api.jensenlab.org"
MENTION_URL = BASE_URL + "/Mentions?type={}&id={}&limit={}&format=json"
IDS_MAPPING = {
"CID": -1,
"BTO": -25,
"DOID": -26,
}
def __init__(self):
super().__init__("JensenLabService", "Text-Mining api available at api.jensenlab.org")
def get_mentions(self, entities: List, limit: int = 20) -> List[Publication]:
entities_and_types = self.guess_types_for_entities(entities)
publications_ids = []
for (entity, entity_type) in entities_and_types:
publications_ids.append(self.get_mentions_for_entity(entity, entity_type, limit))
publications_ids_intersection = set.intersection(*publications_ids)
return [Publication(pm_id=pid) for pid in publications_ids_intersection]
def get_co_occurrences(self, entity: str, limit: int = 20) -> List[str]:
pass
@staticmethod
def guess_types_for_entities(entities):
results = []
for entity in entities:
entity_type = JensenLabService.guess_type_for_entity(entity)
results.append((entity, entity_type))
return results
@staticmethod
def guess_type_for_entity(entity):
for prefix, entity_type in JensenLabService.IDS_MAPPING.items():
if entity.startswith(prefix):
return entity_type
return -1
def get_mentions_for_entity(self, entity, entity_type, limit):
url_mentions = JensenLabService.MENTION_URL.format(entity_type, entity, limit)
results = requests.get(url_mentions)
assert results.ok
publications_string = results.content.decode().strip().replace('True', 'true').replace('False', 'false')
publications_list, has_more = json.loads(publications_string)
return set(publications_list)
if __name__ == '__main__':
text_mining_service = JensenLabService()
print("Using service {}".format(text_mining_service.name))
publications = text_mining_service.get_mentions(["DOID:10652", "DOID:10654"], limit=1000000)
print(", ".join([p.pm_id for p in publications]))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment