Skip to content
Snippets Groups Projects
Commit 49deb144 authored by Carlos Vega's avatar Carlos Vega
Browse files

Merge branch '11-text-mining-sw' of...

Merge branch '11-text-mining-sw' of github.com:elixir-europe/BioHackathon-projects-2019 into 11-text-mining-sw
parents ee0897ed 86307759
No related branches found
No related tags found
No related merge requests found
......@@ -2,6 +2,7 @@ import logging
from typing import List
from interface.TextMiningService import TextMiningService, Publication
from models.coocurrence import CoOccurrence
logger = logging.getLogger(__name__)
......@@ -13,7 +14,7 @@ class DummyService(TextMiningService):
def get_mentions(self, entities: List, limit: int = 20) -> List[Publication]:
return [Publication(pm_id="00000" + str(i)) for i in range(20)]
def get_co_occurrences(self, entity: str) -> List[str]:
def get_co_occurrences(self, entity: str, limit: int = 20) -> List[CoOccurrence]:
pass
......
from abc import ABCMeta, abstractmethod
from typing import List
from models.publication import Publication
from models.coocurrence import CoOccurrence
from models.publication import Publication
class TextMiningService(metaclass=ABCMeta):
......
import logging
logger = logging.getLogger(__name__)
\ No newline at end of file
import json
import logging
from typing import List
import requests
from TextMiningService import TextMiningService, Publication
logger = logging.getLogger(__name__)
class JensenLabService(TextMiningService):
BASE_URL = "https://api.jensenlab.org"
MENTION_URL = BASE_URL + "/Mentions?type={}&id={}&limit={}&format=json"
IDS_MAPPING = {
"CID": -1,
"BTO": -25,
"DOID": -26,
}
def __init__(self):
super().__init__("JensenLabService", "Text-Mining api available at api.jensenlab.org")
def get_mentions(self, entities: List, limit: int = 20) -> List[Publication]:
entities_and_types = self.guess_types_for_entities(entities)
publications_ids = []
for (entity, entity_type) in entities_and_types:
publications_ids.append(self.get_mentions_for_entity(entity, entity_type, limit))
publications_ids_intersection = set.intersection(*publications_ids)
return [Publication(pm_id=pid) for pid in publications_ids_intersection]
def get_co_occurrences(self, entity: str, limit: int = 20) -> List[str]:
pass
@staticmethod
def guess_types_for_entities(entities):
results = []
for entity in entities:
entity_type = JensenLabService.guess_type_for_entity(entity)
results.append((entity, entity_type))
return results
@staticmethod
def guess_type_for_entity(entity):
for prefix, entity_type in JensenLabService.IDS_MAPPING.items():
if entity.startswith(prefix):
return entity_type
return -1
def get_mentions_for_entity(self, entity, entity_type, limit):
url_mentions = JensenLabService.MENTION_URL.format(entity_type, entity, limit)
results = requests.get(url_mentions)
assert results.ok
publications_string = results.content.decode().strip().replace('True', 'true').replace('False', 'false')
publications_list, has_more = json.loads(publications_string)
return set(publications_list)
if __name__ == '__main__':
text_mining_service = JensenLabService()
print("Using service {}".format(text_mining_service.name))
publications = text_mining_service.get_mentions(["DOID:10652", "DOID:10654"], limit=1000000)
print(", ".join([p.pm_id for p in publications]))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment