Commit b75f21a0 authored by Sascha Herzinger's avatar Sascha Herzinger
Browse files

basic heatmap code

parent 2b62cc7d
Pipeline #2242 failed with stage
in 13 minutes and 34 seconds
......@@ -109,9 +109,8 @@ class AnalyticTask(Task, metaclass=abc.ABCMeta):
data_task_ids = [el[1:-1] for el in value]
dfs = []
for data_task_id in data_task_ids:
df = self.data_task_id_to_data_frame(data_task_id,
session_data_tasks,
decrypt)
df = self.data_task_id_to_data_frame(
data_task_id, session_data_tasks, decrypt)
dfs.append(df)
value = dfs
arguments[arg] = value
......
......@@ -54,7 +54,8 @@ class BoxplotTask(AnalyticTask):
for variable in variable_names:
for subset in results['subsets']:
for category in results['categories']:
values = df[(df['subset'] == subset) & (df['category'] == category)][variable].tolist()
values = df[(df['subset'] == subset) &
(df['category'] == category)][variable].tolist()
values = [value for value in values if not np.isnan(value)]
if len(values) < 2:
continue
......@@ -63,7 +64,8 @@ class BoxplotTask(AnalyticTask):
xs = np.linspace(start=stats['l_wsk'],
stop=stats['u_wsk'], num=100)
stats['kde'] = kde(xs).tolist()
results['statistics']['{}//{}//s{}'.format(variable, category, subset + 1)] = stats
label = '{}//{}//s{}'.format(variable, category, subset + 1)
results['statistics'][label] = stats
return results
@staticmethod
......
......@@ -85,7 +85,7 @@ class CorrelationTask(AnalyticTask):
assert len(colnames) == 2
x_label = colnames[0]
y_label = colnames[1]
return (x_label, y_label)
return x_label, y_label
@staticmethod
def compute_stats(df: pd.DataFrame, method: str,
......
"""Module containing analysis code for heatmap analytics."""
from typing import List
from functools import reduce
import pandas as pd
from fractalis.analytics.task import AnalyticTask
class HeatmapTask(AnalyticTask):
"""Heatmap Analysis Task implementing AnalyticsTask. This class is a
submittable celery task."""
name = 'compute-heatmap'
def main(self, numerical_arrays: List[pd.DataFrame],
numericals: List[pd.DataFrame],
categoricals: List[pd.DataFrame]) -> dict:
df = reduce(lambda a, b: a.append(b), numerical_arrays)
return {
'data': df.to_json(orient='index')
}
\ No newline at end of file
......@@ -20,13 +20,11 @@ PERMANENT_SESSION_LIFETIME = timedelta(days=1)
# Celery
BROKER_URL = 'amqp://'
CELERY_RESULT_BACKEND = 'redis://{}:{}'.format(REDIS_HOST, REDIS_PORT)
CELERYD_TASK_SOFT_TIME_LIMIT = 60 * 10
CELERYD_TASK_SOFT_TIME_LIMIT = 60 * 20
CELERY_TASK_RESULT_EXPIRES = timedelta(hours=1)
CELERYD_HIJACK_ROOT_LOGGER = False
# Fractalis
LOG_LEVEL = logging.INFO
LOG_FILE = os.path.join(os.sep, 'tmp', 'fractalis.log')
FRACTALIS_TMP_DIR = os.path.abspath(os.path.join(
os.sep, 'tmp', 'fractalis'))
FRACTALIS_CACHE_EXP = timedelta(days=10)
......
......@@ -5,7 +5,7 @@ from typing import List
from pandas import DataFrame
from fractalis.data.etl import ETL
from fractalis.data.etls.ada import common
from fractalis.data.etls.ada import shared
class BooleanETL(ETL):
......@@ -18,18 +18,19 @@ class BooleanETL(ETL):
def can_handle(handler, descriptor):
return handler == 'ada' and \
descriptor['dictionary']['fieldType'] and \
descriptor['dictionary']['fieldType'] == 'Boolean'
descriptor['dictionary']['fieldType'] == 'Boolean' and not \
descriptor['dictionary']['isArray']
def extract(self, server: str, token: str, descriptor: dict) -> List[dict]:
data_set = descriptor['data_set']
projection = descriptor['dictionary']['projection']
cookie = common.make_cookie(token=token)
data = common.get_field(server=server, data_set=data_set,
cookie = shared.make_cookie(token=token)
data = shared.get_field(server=server, data_set=data_set,
cookie=cookie, projection=projection)
return data
def transform(self, raw_data: List[dict], descriptor: dict) -> DataFrame:
data = common.prepare_ids(raw_data)
data = common.name_to_label(data, descriptor)
data = shared.prepare_ids(raw_data)
data = shared.name_to_label(data, descriptor)
data_frame = DataFrame(data)
return data_frame
......@@ -5,7 +5,7 @@ from typing import List
from pandas import DataFrame
from fractalis.data.etl import ETL
from fractalis.data.etls.ada import common
from fractalis.data.etls.ada import shared
class DateETL(ETL):
......@@ -18,18 +18,19 @@ class DateETL(ETL):
def can_handle(handler: str, descriptor: dict) -> bool:
return handler == 'ada' and \
descriptor['dictionary']['fieldType'] and \
descriptor['dictionary']['fieldType'] == 'Date'
descriptor['dictionary']['fieldType'] == 'Date' and not \
descriptor['dictionary']['isArray']
def extract(self, server: str, token: str, descriptor: dict) -> List[dict]:
data_set = descriptor['data_set']
projection = descriptor['dictionary']['projection']
cookie = common.make_cookie(token=token)
data = common.get_field(server=server, data_set=data_set,
cookie = shared.make_cookie(token=token)
data = shared.get_field(server=server, data_set=data_set,
cookie=cookie, projection=projection)
return data
def transform(self, raw_data: List[dict], descriptor: dict) -> DataFrame:
data = common.prepare_ids(raw_data)
data = common.name_to_label(data, descriptor)
data = shared.prepare_ids(raw_data)
data = shared.name_to_label(data, descriptor)
data_frame = DataFrame(data)
return data_frame
......@@ -5,7 +5,7 @@ from typing import List
from pandas import DataFrame
from fractalis.data.etl import ETL
from fractalis.data.etls.ada import common
from fractalis.data.etls.ada import shared
class DoubleETL(ETL):
......@@ -18,19 +18,20 @@ class DoubleETL(ETL):
def can_handle(handler, descriptor):
return handler == 'ada' and \
descriptor['dictionary']['fieldType'] and \
descriptor['dictionary']['fieldType'] == 'Double'
descriptor['dictionary']['fieldType'] == 'Double' and not \
descriptor['dictionary']['isArray']
def extract(self, server: str, token: str, descriptor: dict) -> List[dict]:
data_set = descriptor['data_set']
projection = descriptor['dictionary']['projection']
cookie = common.make_cookie(token=token)
data = common.get_field(server=server, data_set=data_set,
cookie = shared.make_cookie(token=token)
data = shared.get_field(server=server, data_set=data_set,
cookie=cookie, projection=projection)
return data
def transform(self, raw_data: List[dict], descriptor: dict) -> DataFrame:
data = common.prepare_ids(raw_data)
data = common.name_to_label(data, descriptor)
data = shared.prepare_ids(raw_data)
data = shared.name_to_label(data, descriptor)
data_frame = DataFrame(data)
return data_frame
"""Provides DoubleArrayETL for Ada."""
from typing import List
import pandas as pd
from fractalis.data.etl import ETL
from fractalis.data.etls.ada import shared
class DoubleArrayETL(ETL):
"""DoubleArrayETL implements support for Adas 'Enum' array type."""
name = 'ada_double_array_etl'
produces = 'numerical_array'
@staticmethod
def can_handle(handler, descriptor):
return handler == 'ada' and \
descriptor['dictionary']['fieldType'] and \
descriptor['dictionary']['fieldType'] == 'Double' and \
descriptor['dictionary']['isArray']
def extract(self, server: str, token: str, descriptor: dict) -> List[dict]:
data_set = descriptor['data_set']
projection = descriptor['dictionary']['projection']
cookie = shared.make_cookie(token=token)
data = shared.get_field(server=server, data_set=data_set,
cookie=cookie, projection=projection)
return data
def transform(self, raw_data: List[dict], descriptor: dict) -> pd.DataFrame:
data = shared.prepare_ids(raw_data)
name = descriptor['dictionary']['name']
df = [[row['id']] + row[name] for row in raw_data]
colnames = ['id'] + list(range(len(df[0]) - 1))
df = pd.DataFrame(df, columns=colnames)
df = pd.melt(df, id_vars=['id'])
return df
......@@ -5,7 +5,7 @@ from typing import List
from pandas import DataFrame
from fractalis.data.etl import ETL
from fractalis.data.etls.ada import common
from fractalis.data.etls.ada import shared
class EnumETL(ETL):
......@@ -18,22 +18,23 @@ class EnumETL(ETL):
def can_handle(handler, descriptor):
return handler == 'ada' and \
descriptor['dictionary']['fieldType'] and \
descriptor['dictionary']['fieldType'] == 'Enum'
descriptor['dictionary']['fieldType'] == 'Enum' and not \
descriptor['dictionary']['isArray']
def extract(self, server: str, token: str, descriptor: dict) -> List[dict]:
data_set = descriptor['data_set']
projection = descriptor['dictionary']['projection']
cookie = common.make_cookie(token=token)
data = common.get_field(server=server, data_set=data_set,
cookie = shared.make_cookie(token=token)
data = shared.get_field(server=server, data_set=data_set,
cookie=cookie, projection=projection)
return data
def transform(self, raw_data: List[dict], descriptor: dict) -> DataFrame:
data = common.prepare_ids(raw_data)
data = shared.prepare_ids(raw_data)
for row in data:
value = row[descriptor['dictionary']['name']]
value = descriptor['dictionary']['numValues'][str(value)]
row[descriptor['dictionary']['name']] = value
data = common.name_to_label(data, descriptor)
data = shared.name_to_label(data, descriptor)
data_frame = DataFrame(data)
return data_frame
......@@ -5,7 +5,7 @@ from typing import List
from pandas import DataFrame
from fractalis.data.etl import ETL
from fractalis.data.etls.ada import common
from fractalis.data.etls.ada import shared
class IntegerETL(ETL):
......@@ -18,18 +18,19 @@ class IntegerETL(ETL):
def can_handle(handler, descriptor):
return handler == 'ada' and \
descriptor['dictionary']['fieldType'] and \
descriptor['dictionary']['fieldType'] == 'Integer'
descriptor['dictionary']['fieldType'] == 'Integer' and not \
descriptor['dictionary']['isArray']
def extract(self, server: str, token: str, descriptor: dict) -> List[dict]:
data_set = descriptor['data_set']
projection = descriptor['dictionary']['projection']
cookie = common.make_cookie(token=token)
data = common.get_field(server=server, data_set=data_set,
cookie = shared.make_cookie(token=token)
data = shared.get_field(server=server, data_set=data_set,
cookie=cookie, projection=projection)
return data
def transform(self, raw_data: List[dict], descriptor: dict) -> DataFrame:
data = common.prepare_ids(raw_data)
data = common.name_to_label(data, descriptor)
data = shared.prepare_ids(raw_data)
data = shared.name_to_label(data, descriptor)
data_frame = DataFrame(data)
return data_frame
......@@ -5,7 +5,7 @@ from typing import List
from pandas import DataFrame
from fractalis.data.etl import ETL
from fractalis.data.etls.ada import common
from fractalis.data.etls.ada import shared
class StringETL(ETL):
......@@ -18,18 +18,19 @@ class StringETL(ETL):
def can_handle(handler, descriptor):
return handler == 'ada' and \
descriptor['dictionary']['fieldType'] and \
descriptor['dictionary']['fieldType'] == 'String'
descriptor['dictionary']['fieldType'] == 'String' and not \
descriptor['dictionary']['isArray']
def extract(self, server: str, token: str, descriptor: dict) -> List[dict]:
data_set = descriptor['data_set']
projection = descriptor['dictionary']['projection']
cookie = common.make_cookie(token=token)
data = common.get_field(server=server, data_set=data_set,
cookie = shared.make_cookie(token=token)
data = shared.get_field(server=server, data_set=data_set,
cookie=cookie, projection=projection)
return data
def transform(self, raw_data: List[dict], descriptor: dict) -> DataFrame:
data = common.prepare_ids(raw_data)
data = common.name_to_label(data, descriptor)
data = shared.prepare_ids(raw_data)
data = shared.name_to_label(data, descriptor)
data_frame = DataFrame(data)
return data_frame
......@@ -23,7 +23,7 @@ def get_field(server: str, data_set: str,
'filterOrId': '[{{"fieldName":"{}","conditionType":"!=","value":""}}]'.format(projection)
},
cookies=cookie,
timeout=20)
timeout=60)
if r.status_code != 200:
error = "Data extraction failed. Target server responded with " \
"status code {}.".format(r.status_code)
......
"""Provides categorical concept ETL for tranSMART."""
import logging
from pandas import DataFrame
from fractalis.data.etl import ETL
from fractalis.data.etls.transmart.shared import extract_data
logger = logging.getLogger(__name__)
class CategoricalETL(ETL):
"""CategoricalETL implements support for tranSMARTs 'categorical' type."""
name = 'transmart_categorical_etl'
produces = 'categorical'
@staticmethod
def can_handle(handler: str, descriptor: dict) -> bool:
return handler == 'transmart' and \
descriptor['data_type'] == 'categorical'
def extract(self, server: str, token: str, descriptor: dict) -> dict:
return extract_data(server=server, descriptor=descriptor, token=token)
def transform(self, raw_data: dict, descriptor: dict) -> DataFrame:
rows = []
for entry in raw_data['cells']:
idx = entry['dimensionIndexes'][2]
id = raw_data['dimensionElements']['patient'][idx]['inTrialId']
value = entry['numericValue']
rows.append([id, value])
df = DataFrame(rows, columns=['id', 'value'])
return df
"""Provides highdim concept ETL for tranSMART."""
import logging
from pandas import DataFrame
from fractalis.data.etl import ETL
from fractalis.data.etls.transmart.shared import extract_data
logger = logging.getLogger(__name__)
class HighdimETL(ETL):
"""HighdimETL implements support for tranSMARTs 'highdim' type."""
name = 'transmart_highdim_etl'
produces = 'highdim'
@staticmethod
def can_handle(handler: str, descriptor: dict) -> bool:
return handler == 'transmart' and descriptor['data_type'] == 'highdim'
def extract(self, server: str, token: str, descriptor: dict) -> dict:
return extract_data(server=server, descriptor=descriptor, token=token)
def transform(self, raw_data: dict, descriptor: dict) -> DataFrame:
rows = []
for entry in raw_data['cells']:
idx = entry['dimensionIndexes'][2]
id = raw_data['dimensionElements']['patient'][idx]['inTrialId']
value = entry['numericValue']
rows.append([id, value])
df = DataFrame(rows, columns=['id', 'value'])
return df
......@@ -2,11 +2,10 @@
import logging
import requests
from pandas import DataFrame
from fractalis.data.etl import ETL
from fractalis.data.etls.transmart.shared import extract_data
logger = logging.getLogger(__name__)
......@@ -22,27 +21,7 @@ class NumericalETL(ETL):
return handler == 'transmart' and descriptor['data_type'] == 'numerical'
def extract(self, server: str, token: str, descriptor: dict) -> dict:
r = requests.get(url='{}/v2/observations'.format(server),
params={
'constraint': '{{"type": "concept","path": "{}"}}'.format(descriptor["path"]),
'type': 'clinical'
},
headers={
'Accept': 'application/json',
'Authorization': 'Bearer {}'.format(token)
},
timeout=60)
if r.status_code != 200:
error = "Data extraction failed. Target server responded with " \
"status code {}.".format(r.status_code)
logger.error(error)
raise ValueError(error)
try:
return r.json()
except Exception as e:
logger.exception(e)
raise ValueError("Data extraction failed. "
"Got unexpected data format.")
return extract_data(server=server, descriptor=descriptor, token=token)
def transform(self, raw_data: dict, descriptor: dict) -> DataFrame:
rows = []
......
"""This module provides shared functionality to the transmart ETLs."""
import logging
import requests
logger = logging.getLogger(__name__)
def extract_data(server: str, descriptor: dict, token: str) -> dict:
"""Extract data from transmart.
:param server: The target server host.
:param descriptor: Dict describing the data to download.
:param token: The token used for authentication.
"""
r = requests.get(url='{}/v2/observations'.format(server),
params={
'constraint': '{{"type": "concept","path": "{}"}}'
''.format(descriptor["path"]),
'type': 'autodetect'
},
headers={
'Accept': 'application/json',
'Authorization': 'Bearer {}'.format(token)
},
timeout=2000)
if r.status_code != 200:
error = "Data extraction failed. Target server responded with " \
"status code {}.".format(r.status_code)
logger.error(error)
raise ValueError(error)
try:
return r.json()
except Exception as e:
logger.exception(e)
raise ValueError("Data extraction failed. "
"Got unexpected data format.")
......@@ -8,7 +8,7 @@ handlers:
class: logging.handlers.TimedRotatingFileHandler
level: WARNING
formatter: default
filename: fractalis.log
filename: /tmp/fractalis.log
when: midnight
backupCount: 14
encoding: utf8
......
......@@ -26,13 +26,16 @@ class TestBooleanETL:
def test_correct_handler(self):
assert self.etl.can_handle(handler='ada', descriptor={
'dictionary': {'fieldType': 'Boolean'}
'dictionary': {'fieldType': 'Boolean', 'isArray': False}
})
assert not self.etl.can_handle(handler='ada', descriptor={
'dictionary': {'fieldType': 'Boolean', 'isArray': True}
})
assert not self.etl.can_handle(handler='transmart', descriptor={
'dictionary': {'fieldType': 'Boolean'}
'dictionary': {'fieldType': 'Boolean', 'isArray': False}
})
assert not self.etl.can_handle(handler='ada', descriptor={
'dictionary': {'fieldType': 'foo'}
'dictionary': {'fieldType': 'foo', 'isArray': False}
})
def test_extract_raises_readable_if_not_200(self):
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment