Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Fractalis
fractalis
Commits
79d3eb62
Commit
79d3eb62
authored
Jun 05, 2018
by
Sascha Herzinger
Browse files
Boxplot should be able to handle NAs
parent
9cb56527
Changes
2
Hide whitespace changes
Inline
Side-by-side
fractalis/analytics/tasks/boxplot/main.py
View file @
79d3eb62
...
...
@@ -40,6 +40,7 @@ class BoxplotTask(AnalyticTask):
"non empty numerical feature."
)
# merge dfs into single one
df
=
reduce
(
lambda
l
,
r
:
l
.
append
(
r
),
features
)
df
.
dropna
(
inplace
=
True
)
if
id_filter
:
df
=
df
[
df
[
'id'
].
isin
(
id_filter
)]
df
=
apply_subsets
(
df
=
df
,
subsets
=
subsets
)
...
...
tests/unit/analytics/boxplot/test_boxplot.py
View file @
79d3eb62
...
...
@@ -43,11 +43,11 @@ class TestBoxplotAnalytics:
def
test_marks_outliers
(
self
):
df_1
=
pd
.
DataFrame
([[
100
,
'foo'
,
-
50
],
[
101
,
'foo'
,
1
],
[
102
,
'foo'
,
2
],
[
103
,
'foo'
,
3
],
[
104
,
'foo'
,
100
]],
columns
=
[
'id'
,
'feature'
,
'value'
])
[
101
,
'foo'
,
1
],
[
102
,
'foo'
,
2
],
[
103
,
'foo'
,
3
],
[
104
,
'foo'
,
100
]],
columns
=
[
'id'
,
'feature'
,
'value'
])
df_2
=
pd
.
DataFrame
([[
201
,
'bar'
,
1
],
[
202
,
'bar'
,
2
],
[
203
,
'bar'
,
3
],
...
...
@@ -58,3 +58,34 @@ class TestBoxplotAnalytics:
df
=
pd
.
DataFrame
.
from_dict
(
json
.
loads
(
results
[
'data'
]))
assert
np
.
all
(
df
[
'outlier'
]
==
[
True
,
False
,
False
,
False
,
True
,
False
,
False
,
False
,
True
])
def
test_can_handle_nan
(
self
):
df
=
pd
.
DataFrame
([[
100
,
'foo'
,
-
50
],
[
101
,
'foo'
,
1
],
[
102
,
'foo'
,
float
(
'nan'
)],
[
103
,
'foo'
,
3
],
[
104
,
'foo'
,
100
]],
columns
=
[
'id'
,
'feature'
,
'value'
])
results
=
self
.
task
.
main
(
features
=
[
df
],
categories
=
[],
id_filter
=
[],
subsets
=
[])
assert
results
[
'statistics'
][
'foo////s1'
][
'median'
]
==
2
def
test_can_handle_groups_with_only_nan
(
self
):
df
=
pd
.
DataFrame
([[
100
,
'foo'
,
-
50
],
[
101
,
'foo'
,
1
],
[
102
,
'foo'
,
float
(
'nan'
)],
[
103
,
'foo'
,
3
],
[
104
,
'foo'
,
100
],
[
105
,
'foo'
,
float
(
'nan'
)]],
columns
=
[
'id'
,
'feature'
,
'value'
])
categories
=
pd
.
DataFrame
([[
100
,
'gender'
,
'female'
],
[
101
,
'gender'
,
'female'
],
[
102
,
'gender'
,
'male'
],
[
103
,
'gender'
,
'female'
],
[
104
,
'gender'
,
'female'
],
[
105
,
'gender'
,
'male'
]],
columns
=
[
'id'
,
'feature'
,
'value'
])
results
=
self
.
task
.
main
(
features
=
[
df
],
categories
=
[
categories
],
id_filter
=
[],
subsets
=
[])
assert
'foo//female//s1'
in
results
[
'statistics'
]
assert
'foo//male//s1'
not
in
results
[
'statistics'
]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment