Commit 4147ecbb authored by Sascha Herzinger's avatar Sascha Herzinger
Browse files

heatmap zscores

parent b75f21a0
Pipeline #2243 failed with stage
in 9 minutes and 31 seconds
"""Module containing analysis code for heatmap analytics."""
from typing import List
from typing import List, TypeVar
from functools import reduce
import pandas as pd
from scipy.stats import zscore
from fractalis.analytics.task import AnalyticTask
T = TypeVar('T')
class HeatmapTask(AnalyticTask):
"""Heatmap Analysis Task implementing AnalyticsTask. This class is a
submittable celery task."""
......@@ -16,8 +20,29 @@ class HeatmapTask(AnalyticTask):
def main(self, numerical_arrays: List[pd.DataFrame],
numericals: List[pd.DataFrame],
categoricals: List[pd.DataFrame]) -> dict:
categoricals: List[pd.DataFrame],
subsets: List[List[T]]) -> dict:
df = reduce(lambda a, b: a.append(b), numerical_arrays)
variables = df['variable']
df = df.drop('variable', axis=1)
zscores = df.apply(zscore, axis=1)
#prepare output for front-end
df = df.transpose()
df.columns = variables
df.index.name = 'id'
df.reset_index(inplace=True)
df = pd.melt(df, id_vars='id')
zscores = zscores.transpose()
zscores.columns = variables
zscores.index.name = 'id'
zscores.reset_index(inplace=True)
zscores = pd.melt(zscores, id_vars='id')
df = pd.merge(df, zscores, on=['id', 'variable'])
df.columns = ['id', 'variable', 'value', 'zscore']
return {
'data': df.to_json(orient='index')
}
\ No newline at end of file
......@@ -32,9 +32,15 @@ class DoubleArrayETL(ETL):
def transform(self, raw_data: List[dict], descriptor: dict) -> pd.DataFrame:
data = shared.prepare_ids(raw_data)
name = descriptor['dictionary']['name']
df = [[row['id']] + row[name] for row in raw_data]
colnames = ['id'] + list(range(len(df[0]) - 1))
df = pd.DataFrame(df, columns=colnames)
df = pd.melt(df, id_vars=['id'])
ids = []
values = []
for row in raw_data:
ids.append(row['id'])
values.append(row[name])
df = pd.DataFrame(values)
df = df.transpose()
df.columns = ids
variables = pd.Series(range(df.shape[0]))
df.insert(0, 'variable', variables)
return df
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment