Commit d6219fd3 authored by Sascha Herzinger's avatar Sascha Herzinger
Browse files

replaced variance with std

parent 5a33dbf2
Pipeline #5944 passed with stages
in 37 minutes and 32 seconds
...@@ -25,7 +25,7 @@ class HistogramTask(AnalyticTask): ...@@ -25,7 +25,7 @@ class HistogramTask(AnalyticTask):
subsets: List[List[str]], subsets: List[List[str]],
data: pd.DataFrame, data: pd.DataFrame,
categories: List[pd.DataFrame]) -> dict: categories: List[pd.DataFrame]) -> dict:
"""Compute several basic statistics such as bin size and variance. """Compute several basic statistics such as bin size and kde.
:param id_filter: If specified use only given ids during the analysis. :param id_filter: If specified use only given ids during the analysis.
:param subsets: List of lists of subset ids. :param subsets: List of lists of subset ids.
:param data: Numerical values to create histogram of. :param data: Numerical values to create histogram of.
...@@ -54,7 +54,7 @@ class HistogramTask(AnalyticTask): ...@@ -54,7 +54,7 @@ class HistogramTask(AnalyticTask):
bin_edges = bin_edges.tolist() bin_edges = bin_edges.tolist()
mean = np.mean(values) mean = np.mean(values)
median = np.median(values) median = np.median(values)
variance = np.var(values) std = np.std(values)
if not stats.get(category): if not stats.get(category):
stats[category] = {} stats[category] = {}
stats[category][subset] = { stats[category][subset] = {
...@@ -62,7 +62,7 @@ class HistogramTask(AnalyticTask): ...@@ -62,7 +62,7 @@ class HistogramTask(AnalyticTask):
'bin_edges': bin_edges, 'bin_edges': bin_edges,
'mean': mean, 'mean': mean,
'median': median, 'median': median,
'variance': variance 'std': std
} }
return { return {
'stats': stats, 'stats': stats,
......
...@@ -43,7 +43,7 @@ class TestHistogramTask: ...@@ -43,7 +43,7 @@ class TestHistogramTask:
assert 'B' in result['stats'] assert 'B' in result['stats']
assert 0 in result['stats']['A'] assert 0 in result['stats']['A']
assert all([stat in result['stats']['A'][0] for stat in assert all([stat in result['stats']['A'][0] for stat in
['hist', 'bin_edges', 'mean', 'median', 'variance']]) ['hist', 'bin_edges', 'mean', 'median', 'std']])
def test_can_handle_nas(self): def test_can_handle_nas(self):
df = pd.DataFrame([[100, 'foo', float('nan')], df = pd.DataFrame([[100, 'foo', float('nan')],
...@@ -63,7 +63,6 @@ class TestHistogramTask: ...@@ -63,7 +63,6 @@ class TestHistogramTask:
categories=[]) categories=[])
assert result['stats'][''][0]['median'] == 6 assert result['stats'][''][0]['median'] == 6
assert result['stats'][''][0]['mean'] == 6 assert result['stats'][''][0]['mean'] == 6
assert result['stats'][''][0]['variance'] == 8
def test_can_handle_negatives(self): def test_can_handle_negatives(self):
df = pd.DataFrame([[100, 'foo', -2], df = pd.DataFrame([[100, 'foo', -2],
...@@ -113,7 +112,7 @@ class TestHistogramTask: ...@@ -113,7 +112,7 @@ class TestHistogramTask:
categories=[cat_df]) categories=[cat_df])
assert result['stats']['A'][0]['median'] == 1 assert result['stats']['A'][0]['median'] == 1
assert result['stats']['A'][0]['mean'] == 1 assert result['stats']['A'][0]['mean'] == 1
assert result['stats']['A'][0]['variance'] == 0 assert result['stats']['A'][0]['std'] == 0
def test_skips_empty_groups(self): def test_skips_empty_groups(self):
df = pd.DataFrame([[100, 'foo', float('nan')], df = pd.DataFrame([[100, 'foo', float('nan')],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment