Commit a19f4651 authored by Sascha Herzinger's avatar Sascha Herzinger
Browse files

Added some more tests including transmart numerical etl

parent bea8f971
Pipeline #2236 passed with stage
in 4 minutes and 25 seconds
"""This module provides the ETL class""" """This module provides the ETL class"""
import os
import abc import abc
import json import json
import logging import logging
import os
from celery import Task from celery import Task
from pandas import DataFrame from pandas import DataFrame
from fractalis import app, redis from fractalis import app, redis
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -41,6 +40,9 @@ class ETL(Task, metaclass=abc.ABCMeta): ...@@ -41,6 +40,9 @@ class ETL(Task, metaclass=abc.ABCMeta):
def can_handle(handler: str, descriptor: dict) -> bool: def can_handle(handler: str, descriptor: dict) -> bool:
"""Check if the current implementation of ETL can handle given handler """Check if the current implementation of ETL can handle given handler
and data type. and data type.
WARNING: You should never raise an Exception here and expect it to be
propagated further up. It will be caught and assumed that the
current ETL cannot handle the given arguments.
:param handler: Describes the handler. E.g.: transmart, ada :param handler: Describes the handler. E.g.: transmart, ada
:param descriptor: Describes the data that we want to download. :param descriptor: Describes the data that we want to download.
:return: True if implementation can handle given parameters. :return: True if implementation can handle given parameters.
...@@ -58,11 +60,21 @@ class ETL(Task, metaclass=abc.ABCMeta): ...@@ -58,11 +60,21 @@ class ETL(Task, metaclass=abc.ABCMeta):
""" """
from . import ETL_REGISTRY from . import ETL_REGISTRY
for ETL_TASK in ETL_REGISTRY: for ETL_TASK in ETL_REGISTRY:
if ETL_TASK.can_handle(handler, descriptor): # noinspection PyBroadException
return ETL_TASK() try:
if ETL_TASK.can_handle(handler, descriptor):
return ETL_TASK()
except Exception as e:
logger.warning("Caught exception and assumed that ETL '{}' "
"cannot handle handler '{}' and descriptor: '{}'"
" Exception:'{}'".format(type(ETL_TASK).__name__,
handler,
str(descriptor), e))
continue
raise NotImplementedError( raise NotImplementedError(
"No ETL implementation found for handler '{}' and descriptor '{}'" "No ETL implementation found for handler '{}' and descriptor '{}'"
.format(handler, descriptor)) .format(handler, descriptor))
@abc.abstractmethod @abc.abstractmethod
def extract(self, server: str, token: str, descriptor: dict) -> object: def extract(self, server: str, token: str, descriptor: dict) -> object:
......
"""Provides numerical concept ETL for tranSMART.""" """Provides numerical concept ETL for tranSMART."""
import logging
import requests import requests
from pandas import DataFrame from pandas import DataFrame
from fractalis.data.etl import ETL from fractalis.data.etl import ETL
logger = logging.getLogger(__name__)
class NumericalETL(ETL): class NumericalETL(ETL):
"""NumericalETL implements support for tranSMARTs 'numerical' type.""" """NumericalETL implements support for tranSMARTs 'numerical' type."""
...@@ -19,7 +24,7 @@ class NumericalETL(ETL): ...@@ -19,7 +24,7 @@ class NumericalETL(ETL):
def extract(self, server: str, token: str, descriptor: dict) -> dict: def extract(self, server: str, token: str, descriptor: dict) -> dict:
r = requests.get(url='{}/v2/observations'.format(server), r = requests.get(url='{}/v2/observations'.format(server),
params={ params={
'constraint': '{{"type": "concept", "path": "{}"}}'.format(descriptor["path"]), 'constraint': '{{"type": "concept","path": "{}"}}'.format(descriptor["path"]),
'type': 'clinical' 'type': 'clinical'
}, },
headers={ headers={
...@@ -30,7 +35,12 @@ class NumericalETL(ETL): ...@@ -30,7 +35,12 @@ class NumericalETL(ETL):
raise ValueError( raise ValueError(
"Data extraction failed. Target server responded with " "Data extraction failed. Target server responded with "
"status code {}.".format(r.status_code)) "status code {}.".format(r.status_code))
return r.json() try:
return r.json()
except Exception:
error = "Data extraction failed. Got unexpected data format."
logger.error(error)
raise ValueError(error)
def transform(self, raw_data: dict, descriptor: dict) -> DataFrame: def transform(self, raw_data: dict, descriptor: dict) -> DataFrame:
rows = [] rows = []
......
...@@ -51,17 +51,16 @@ class TransmartHandler(ETLHandler): ...@@ -51,17 +51,16 @@ class TransmartHandler(ETLHandler):
'password': passwd 'password': passwd
}, },
headers={'Accept': 'application/json'}) headers={'Accept': 'application/json'})
auth_error = ''
if r.status_code != 200: if r.status_code != 200:
auth_error = "Could not authenticate. " \ error = "Could not authenticate. " \
"Reason: [{}]: {}".format(r.status_code, r.text) "Reason: [{}]: {}".format(r.status_code, r.text)
logger.error(auth_error) logger.error(error)
raise ValueError(auth_error) raise ValueError(error)
try: try:
response = r.json() response = r.json()
return response['access_token'] return response['access_token']
except ValueError: except Exception:
auth_error = "Could not authenticate. " \ error = "Could not authenticate. " \
"Got unexpected response: '{}'".format(r.text) "Got unexpected response: '{}'".format(r.text)
logger.error(auth_error) logger.error(error)
raise ValueError(auth_error) raise ValueError(error)
\ No newline at end of file
"""This module provides test for the numerical data ETL for tranSMART"""
import pytest
import responses
from fractalis.data.etls.transmart.etl_numerical import NumericalETL
# noinspection PyMissingOrEmptyDocstring,PyMissingTypeHints
class TestNumericalETL:
etl = NumericalETL()
def test_correct_handler(self):
assert self.etl.can_handle(handler='transmart',
descriptor={'data_type': 'numerical'})
assert not self.etl.can_handle(handler='ada',
descriptor={'data_type': 'numerical'})
assert not self.etl.can_handle(handler='ada',
descriptor={'data_type': 'categorical'})
assert not self.etl.can_handle(handler='ada',
descriptor={'foo': 'bar'})
def test_extract_raises_readable_if_not_200(self):
with responses.RequestsMock() as response:
response.add(response.GET, 'http://foo.bar/v2/observations',
body='{}',
status=400,
content_type='application/json')
with pytest.raises(ValueError) as e:
self.etl.extract(server='http://foo.bar',
token='', descriptor={'path': ''})
assert '[400]' in e
def test_extract_raises_readable_if_not_json(self):
with responses.RequestsMock() as response:
response.add(response.GET, 'http://foo.bar/v2/observations',
body='123{//}',
status=200,
content_type='application/json')
with pytest.raises(ValueError) as e:
self.etl.extract(server='http://foo.bar',
token='', descriptor={'path': ''})
assert 'unexpected data' in e
def test_extract_works_for_valid_input(self):
with responses.RequestsMock() as response:
response.add(response.GET, 'http://foo.bar/v2/observations',
body='{}',
status=200,
content_type='application/json')
raw_data = self.etl.extract(server='http://foo.bar',
token='', descriptor={'path': ''})
assert isinstance(raw_data, dict)
...@@ -2,7 +2,6 @@ ...@@ -2,7 +2,6 @@
import pytest import pytest
import responses import responses
import requests
from fractalis.data.etls.transmart.handler_transmart import TransmartHandler from fractalis.data.etls.transmart.handler_transmart import TransmartHandler
...@@ -54,7 +53,6 @@ class TestTransmartHandler: ...@@ -54,7 +53,6 @@ class TestTransmartHandler:
auth={'user': 'foo', 'passwd': 'bar'}) auth={'user': 'foo', 'passwd': 'bar'})
assert 'unexpected response' in e assert 'unexpected response' in e
def test_auth_raises_exception_for_non_200_return(self): def test_auth_raises_exception_for_non_200_return(self):
with responses.RequestsMock() as response: with responses.RequestsMock() as response:
response.add(response.POST, 'http://foo.bar/oauth/token', response.add(response.POST, 'http://foo.bar/oauth/token',
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment