Commit d5fe6635 authored by Sascha Herzinger's avatar Sascha Herzinger
Browse files

Added outlier check to boxplots

parent b6d37044
......@@ -45,7 +45,6 @@ class BoxplotTask(AnalyticTask):
df = apply_subsets(df=df, subsets=subsets)
df = apply_categories(df=df, categories=categories)
results = {
'data': df.to_json(orient='records'),
'statistics': {},
'features': df['feature'].unique().tolist(),
'categories': df['category'].unique().tolist(),
......@@ -69,6 +68,11 @@ class BoxplotTask(AnalyticTask):
stop=stats['u_wsk'], num=100)
stats['kde'] = kde(xs).tolist()
results['statistics'][label] = stats
u_outliers = df['value'] > results['statistics']['foo////s1']['u_wsk']
l_outliers = df['value'] < results['statistics']['foo////s1']['l_wsk']
outliers = np.bitwise_or(u_outliers, l_outliers)
df['outlier'] = outliers
results['data'] = df.to_json(orient='records')
f_value, p_value = scipy.stats.f_oneway(*group_values)
results['anova'] = {
'p_value': p_value,
......
......@@ -40,3 +40,15 @@ class TestBoxplotAnalytics:
assert not np.isnan(stats['u_qrt'])
assert not np.isnan(stats['l_wsk'])
assert not np.isnan(stats['u_wsk'])
def test_marks_outliers(self):
df = pd.DataFrame([[100, 'foo', -50],
[101, 'foo', 1],
[102, 'foo', 2],
[103, 'foo', 3],
[104, 'foo', 100]],
columns=['id', 'feature', 'value'])
results = self.task.main(features=[df], categories=[],
id_filter=[], subsets=[])
df = pd.DataFrame.from_dict(json.loads(results['data']))
assert np.all(df['outlier'] == [True, False, False, False, True])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment