Commit 9480390a authored by Sascha Herzinger's avatar Sascha Herzinger
Browse files

More tests, logging and better timeouts

parent a19f4651
Pipeline #2237 passed with stage
in 7 minutes and 5 seconds
......@@ -116,7 +116,7 @@ class AnalyticTask(Task, metaclass=abc.ABCMeta):
raise ValueError(error)
result = json.dumps(result)
except TypeError as e:
logging.exception(e)
logger.exception(e)
raise
# NaN is invalid JSON and JS can't parse it. null on the other hand...
result = re.sub(r': NaN', ': null', result)
......
......@@ -124,12 +124,24 @@ class ETL(Task, metaclass=abc.ABCMeta):
"""
logger.info("Starting ETL process ...")
logger.info("(E)xtracting data from server '{}'.".format(server))
raw_data = self.extract(server, token, descriptor)
try:
raw_data = self.extract(server, token, descriptor)
except Exception as e:
logger.exception(e)
raise RuntimeError("Data extraction failed.")
logger.info("(T)ransforming data to Fractalis format.")
data_frame = self.transform(raw_data, descriptor)
try:
data_frame = self.transform(raw_data, descriptor)
except Exception as e:
logger.exception(e)
raise RuntimeError("Data transformation failed.")
if not isinstance(data_frame, DataFrame):
error = "transform() must return 'pandas.DataFrame', " \
"but returned '{}' instead.".format(type(data_frame))
logging.error(error, exc_info=1)
raise TypeError(error)
self.load(data_frame, file_path)
try:
self.load(data_frame, file_path)
except Exception as e:
logger.exception(e)
raise RuntimeError("Data loading failed.")
......@@ -56,7 +56,11 @@ class ETLHandler(metaclass=abc.ABCMeta):
except KeyError:
logger.info('No token has been provided. '
'Attempting to authenticate with the API.')
self._token = self._get_token_for_credentials(server, auth)
try:
self._token = self._get_token_for_credentials(server, auth)
except Exception as e:
logger.exception(e)
raise ValueError("Could not authenticate with API.")
@staticmethod
@abc.abstractmethod
......
"""This module contains code that is shared between the different ETLs."""
import logging
from typing import List
import requests
logger = logging.getLogger(__name__)
def make_cookie(token: str) -> dict:
return {'PLAY2AUTH_SESS_ID': token}
......@@ -12,19 +16,23 @@ def make_cookie(token: str) -> dict:
def get_field(server: str, data_set: str,
cookie: dict, projection: str) -> List[dict]:
r = requests.get(url='{}/studies/records/findCustom'.format(server),
headers={'Accept': 'application/json'},
params={
'dataSet': data_set,
'projection': ['_id', projection],
'filterOrId': '[{{"fieldName":"{}","conditionType":"!=","value":""}}]'.format(projection)
},
cookies=cookie)
headers={'Accept': 'application/json'},
params={
'dataSet': data_set,
'projection': ['_id', projection],
'filterOrId': '[{{"fieldName":"{}","conditionType":"!=","value":""}}]'.format(projection)
},
cookies=cookie,
timeout=20)
if r.status_code != 200:
raise ValueError("Data extraction failed. Target server responded with "
"status code {}.".format(r.status_code))
error = "Data extraction failed. Target server responded with " \
"status code {}.".format(r.status_code)
logger.error(error)
raise ValueError(error)
try:
field_data = r.json()
except Exception:
except Exception as e:
logger.exception(e)
raise TypeError("Data extraction failed. Target server did not return "
"expected data. Possible authentication error.")
return field_data
......
......@@ -36,17 +36,19 @@ class AdaHandler(ETLHandler):
passwd = auth['passwd']
if len(user) == 0 or len(passwd) == 0:
raise KeyError
except KeyError:
error = "The authentication object must contain the non-empty " \
"fields 'user' and 'passwd'."
logger.error(error)
raise ValueError(error)
except KeyError as e:
logger.exception(e)
raise ValueError("The authentication object must contain the "
"non-empty fields 'user' and 'passwd'.")
r = requests.post(url='{}/login'.format(server),
headers={'Accept': 'application/json'},
data={'id': user, 'password': passwd})
data={'id': user, 'password': passwd},
timeout=10)
if r.status_code != 200:
raise ValueError("Could not authenticate. Reason: [{}]: {}"
.format(r.status_code, r.text))
error = "Could not authenticate. " \
"Reason: [{}]: {}".format(r.status_code, r.text)
logger.error(error)
raise ValueError(error)
cookie = r.headers['Set-Cookie']
token = [s for s in cookie.split(';')
if s.startswith('PLAY2AUTH_SESS_ID')][0]
......
......@@ -30,17 +30,19 @@ class NumericalETL(ETL):
headers={
'Accept': 'application/json',
'Authorization': 'Bearer {}'.format(token)
})
},
timeout=60)
if r.status_code != 200:
raise ValueError(
"Data extraction failed. Target server responded with "
"status code {}.".format(r.status_code))
try:
return r.json()
except Exception:
error = "Data extraction failed. Got unexpected data format."
error = "Data extraction failed. Target server responded with " \
"status code {}.".format(r.status_code)
logger.error(error)
raise ValueError(error)
try:
return r.json()
except Exception as e:
logger.exception(e)
raise ValueError("Data extraction failed. "
"Got unexpected data format.")
def transform(self, raw_data: dict, descriptor: dict) -> DataFrame:
rows = []
......
......@@ -50,7 +50,8 @@ class TransmartHandler(ETLHandler):
'username': user,
'password': passwd
},
headers={'Accept': 'application/json'})
headers={'Accept': 'application/json'},
timeout=10)
if r.status_code != 200:
error = "Could not authenticate. " \
"Reason: [{}]: {}".format(r.status_code, r.text)
......
"""This module provides test for the numerical data ETL for tranSMART"""
import json
import pytest
import responses
......@@ -52,3 +54,20 @@ class TestNumericalETL:
raw_data = self.etl.extract(server='http://foo.bar',
token='', descriptor={'path': ''})
assert isinstance(raw_data, dict)
def test_transform_valid_input_correct_output(self):
body = {
"cells": [{"inlineDimensions": ["292278994-08-16T23:00:00Z", None, "@"], "dimensionIndexes": [0, 0, 0, None, 0, None, None], "numericValue": 52.0}],
"dimensionElements": {"patient": [{"id": 1000421548, "deathDate": None, "birthDate": None, "race": None, "maritalStatus": None, "inTrialId": "3052", "age": 52, "trial": "GSE4382", "sexCd": None, "sex": "unknown", "religion": None}]}
}
with responses.RequestsMock() as response:
response.add(response.GET, 'http://foo.bar/v2/observations',
body=json.dumps(body),
status=200,
content_type='application/json')
raw_data = self.etl.extract(server='http://foo.bar',
token='', descriptor={'path': ''})
df = self.etl.transform(raw_data=raw_data, descriptor={'path': ''})
assert df.shape == (1, 2)
assert df.values.tolist() == [['3052', 52.0]]
assert list(df) == ['id', 'value']
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment