Commit a19f4651 authored by Sascha Herzinger's avatar Sascha Herzinger
Browse files

Added some more tests including transmart numerical etl

parent bea8f971
Pipeline #2236 passed with stage
in 4 minutes and 25 seconds
"""This module provides the ETL class"""
import os
import abc
import json
import logging
import os
from celery import Task
from pandas import DataFrame
from fractalis import app, redis
logger = logging.getLogger(__name__)
......@@ -41,6 +40,9 @@ class ETL(Task, metaclass=abc.ABCMeta):
def can_handle(handler: str, descriptor: dict) -> bool:
"""Check if the current implementation of ETL can handle given handler
and data type.
WARNING: You should never raise an Exception here and expect it to be
propagated further up. It will be caught and assumed that the
current ETL cannot handle the given arguments.
:param handler: Describes the handler. E.g.: transmart, ada
:param descriptor: Describes the data that we want to download.
:return: True if implementation can handle given parameters.
......@@ -58,8 +60,18 @@ class ETL(Task, metaclass=abc.ABCMeta):
"""
from . import ETL_REGISTRY
for ETL_TASK in ETL_REGISTRY:
# noinspection PyBroadException
try:
if ETL_TASK.can_handle(handler, descriptor):
return ETL_TASK()
except Exception as e:
logger.warning("Caught exception and assumed that ETL '{}' "
"cannot handle handler '{}' and descriptor: '{}'"
" Exception:'{}'".format(type(ETL_TASK).__name__,
handler,
str(descriptor), e))
continue
raise NotImplementedError(
"No ETL implementation found for handler '{}' and descriptor '{}'"
.format(handler, descriptor))
......
"""Provides numerical concept ETL for tranSMART."""
import logging
import requests
from pandas import DataFrame
from fractalis.data.etl import ETL
logger = logging.getLogger(__name__)
class NumericalETL(ETL):
"""NumericalETL implements support for tranSMARTs 'numerical' type."""
......@@ -19,7 +24,7 @@ class NumericalETL(ETL):
def extract(self, server: str, token: str, descriptor: dict) -> dict:
r = requests.get(url='{}/v2/observations'.format(server),
params={
'constraint': '{{"type": "concept", "path": "{}"}}'.format(descriptor["path"]),
'constraint': '{{"type": "concept","path": "{}"}}'.format(descriptor["path"]),
'type': 'clinical'
},
headers={
......@@ -30,7 +35,12 @@ class NumericalETL(ETL):
raise ValueError(
"Data extraction failed. Target server responded with "
"status code {}.".format(r.status_code))
try:
return r.json()
except Exception:
error = "Data extraction failed. Got unexpected data format."
logger.error(error)
raise ValueError(error)
def transform(self, raw_data: dict, descriptor: dict) -> DataFrame:
rows = []
......
......@@ -51,17 +51,16 @@ class TransmartHandler(ETLHandler):
'password': passwd
},
headers={'Accept': 'application/json'})
auth_error = ''
if r.status_code != 200:
auth_error = "Could not authenticate. " \
error = "Could not authenticate. " \
"Reason: [{}]: {}".format(r.status_code, r.text)
logger.error(auth_error)
raise ValueError(auth_error)
logger.error(error)
raise ValueError(error)
try:
response = r.json()
return response['access_token']
except ValueError:
auth_error = "Could not authenticate. " \
except Exception:
error = "Could not authenticate. " \
"Got unexpected response: '{}'".format(r.text)
logger.error(auth_error)
raise ValueError(auth_error)
\ No newline at end of file
logger.error(error)
raise ValueError(error)
"""This module provides test for the numerical data ETL for tranSMART"""
import pytest
import responses
from fractalis.data.etls.transmart.etl_numerical import NumericalETL
# noinspection PyMissingOrEmptyDocstring,PyMissingTypeHints
class TestNumericalETL:
etl = NumericalETL()
def test_correct_handler(self):
assert self.etl.can_handle(handler='transmart',
descriptor={'data_type': 'numerical'})
assert not self.etl.can_handle(handler='ada',
descriptor={'data_type': 'numerical'})
assert not self.etl.can_handle(handler='ada',
descriptor={'data_type': 'categorical'})
assert not self.etl.can_handle(handler='ada',
descriptor={'foo': 'bar'})
def test_extract_raises_readable_if_not_200(self):
with responses.RequestsMock() as response:
response.add(response.GET, 'http://foo.bar/v2/observations',
body='{}',
status=400,
content_type='application/json')
with pytest.raises(ValueError) as e:
self.etl.extract(server='http://foo.bar',
token='', descriptor={'path': ''})
assert '[400]' in e
def test_extract_raises_readable_if_not_json(self):
with responses.RequestsMock() as response:
response.add(response.GET, 'http://foo.bar/v2/observations',
body='123{//}',
status=200,
content_type='application/json')
with pytest.raises(ValueError) as e:
self.etl.extract(server='http://foo.bar',
token='', descriptor={'path': ''})
assert 'unexpected data' in e
def test_extract_works_for_valid_input(self):
with responses.RequestsMock() as response:
response.add(response.GET, 'http://foo.bar/v2/observations',
body='{}',
status=200,
content_type='application/json')
raw_data = self.etl.extract(server='http://foo.bar',
token='', descriptor={'path': ''})
assert isinstance(raw_data, dict)
......@@ -2,7 +2,6 @@
import pytest
import responses
import requests
from fractalis.data.etls.transmart.handler_transmart import TransmartHandler
......@@ -54,7 +53,6 @@ class TestTransmartHandler:
auth={'user': 'foo', 'passwd': 'bar'})
assert 'unexpected response' in e
def test_auth_raises_exception_for_non_200_return(self):
with responses.RequestsMock() as response:
response.add(response.POST, 'http://foo.bar/oauth/token',
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment