Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Fractalis
fractalis
Commits
963f1a96
Commit
963f1a96
authored
Jul 25, 2018
by
Sascha Herzinger
Browse files
minor refactoring
parent
0c69e7ed
Pipeline
#5922
failed with stages
in 37 minutes and 47 seconds
Changes
7
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
fractalis/analytics/tasks/boxplot/main.py
View file @
963f1a96
...
...
@@ -43,8 +43,7 @@ class BoxplotTask(AnalyticTask):
df
=
reduce
(
lambda
l
,
r
:
l
.
append
(
r
),
features
)
df
=
utils
.
apply_transformation
(
df
=
df
,
transformation
=
transformation
)
df
.
dropna
(
inplace
=
True
)
if
id_filter
:
df
=
df
[
df
[
'id'
].
isin
(
id_filter
)]
df
=
utils
.
apply_id_filter
(
df
=
df
,
id_filter
=
id_filter
)
df
=
utils
.
apply_subsets
(
df
=
df
,
subsets
=
subsets
)
df
=
utils
.
apply_categories
(
df
=
df
,
categories
=
categories
)
df
[
'outlier'
]
=
None
...
...
fractalis/analytics/tasks/correlation/main.py
View file @
963f1a96
...
...
@@ -48,8 +48,7 @@ class CorrelationTask(AnalyticTask):
df
=
self
.
merge_x_y
(
x
,
y
)
x_label
=
list
(
df
[
'feature_x'
])[
0
]
y_label
=
list
(
df
[
'feature_y'
])[
0
]
if
id_filter
:
df
=
df
[
df
[
'id'
].
isin
(
id_filter
)]
df
=
utils
.
apply_id_filter
(
df
=
df
,
id_filter
=
id_filter
)
df
=
utils
.
apply_subsets
(
df
=
df
,
subsets
=
subsets
)
df
=
utils
.
apply_categories
(
df
=
df
,
categories
=
categories
)
global_stats
=
self
.
compute_stats
(
df
,
method
)
...
...
fractalis/analytics/tasks/heatmap/main.py
View file @
963f1a96
...
...
@@ -38,8 +38,7 @@ class HeatmapTask(AnalyticTask):
flattened_subsets
=
[
x
for
subset
in
subsets
for
x
in
subset
]
df
=
df
[
df
[
'id'
].
isin
(
flattened_subsets
)]
# apply id filter
if
id_filter
:
df
=
df
[
df
[
'id'
].
isin
(
id_filter
)]
df
=
utils
.
apply_id_filter
(
df
=
df
,
id_filter
=
id_filter
)
# drop subset ids that are not in the df
subsets
=
utils
.
drop_unused_subset_ids
(
df
=
df
,
subsets
=
subsets
)
# make sure the input data are still valid after the pre-processing
...
...
fractalis/analytics/tasks/pca/main.py
View file @
963f1a96
...
...
@@ -41,8 +41,7 @@ class PCATask(AnalyticTask):
feature_labels
=
list
(
df
)
# apply id filter
if
id_filter
:
df
=
df
[
df
.
index
.
isin
(
id_filter
)]
df
=
utils
.
apply_id_filter
(
df
=
df
,
id_filter
=
id_filter
)
# save ids so we can re-assign them after pca
ids
=
df
.
index
.
tolist
()
...
...
fractalis/analytics/tasks/shared/utils.py
View file @
963f1a96
...
...
@@ -10,11 +10,10 @@ import numpy as np
logger
=
logging
.
getLogger
(
__name__
)
T
=
TypeVar
(
'T'
)
def
apply_subsets
(
df
:
pd
.
DataFrame
,
subsets
:
List
[
List
[
T
]])
->
pd
.
DataFrame
:
subsets
:
List
[
List
[
str
]])
->
pd
.
DataFrame
:
"""Build a new DataFrame that contains a new column 'subset' defining
the subset the data point belongs to. If a data point belongs to
multiple subsets then the row is duplicated.
...
...
@@ -75,8 +74,18 @@ def apply_categories(df: pd.DataFrame,
return
df
def
apply_id_filter
(
df
:
pd
.
DataFrame
,
id_filter
:
List
[
str
])
->
pd
.
DataFrame
:
"""Keep only rows where id is in id_filter. If id_filter is empty keep all.
:param df: Dataframe containing array data in the Fractalis format.
:param id_filter: List of ids to keep.
"""
if
id_filter
:
df
=
df
[
df
[
'id'
].
isin
(
id_filter
)]
return
df
def
drop_unused_subset_ids
(
df
:
pd
.
DataFrame
,
subsets
:
List
[
List
[
T
]])
->
List
[
List
[
T
]]:
subsets
:
List
[
List
[
str
]])
->
List
[
List
[
str
]]:
"""Drop subset ids that are not present in the given data
:param df: Dataframe containing array data in the Fractalis format.
:param subsets: Subset groups specified by the user.
...
...
fractalis/analytics/tasks/survival/main.py
View file @
963f1a96
...
...
@@ -39,8 +39,7 @@ class SurvivalTask(AnalyticTask):
df
=
durations
[
0
]
df
.
dropna
(
inplace
=
True
)
if
id_filter
:
df
=
df
[
df
[
'id'
].
isin
(
id_filter
)]
df
=
utils
.
apply_id_filter
(
df
=
df
,
id_filter
=
id_filter
)
df
=
utils
.
apply_subsets
(
df
=
df
,
subsets
=
subsets
)
df
=
utils
.
apply_categories
(
df
=
df
,
categories
=
categories
)
...
...
fractalis/analytics/tasks/volcanoplot/main.py
View file @
963f1a96
...
...
@@ -35,8 +35,7 @@ class VolcanoTask(AnalyticTask):
flattened_subsets
=
[
x
for
subset
in
subsets
for
x
in
subset
]
df
=
df
[
df
[
'id'
].
isin
(
flattened_subsets
)]
# apply id filter
if
id_filter
:
df
=
df
[
df
[
'id'
].
isin
(
id_filter
)]
df
=
utils
.
apply_id_filter
(
df
=
df
,
id_filter
=
id_filter
)
# drop subset ids that are not in the df
subsets
=
utils
.
drop_unused_subset_ids
(
df
=
df
,
subsets
=
subsets
)
# make sure the input data are still valid after the pre-processing
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment