Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Fractalis
fractalis
Commits
72793e60
Commit
72793e60
authored
Aug 25, 2017
by
Sascha Herzinger
Browse files
added variant ratios to PCA output
parent
6cbd51bd
Pipeline
#2269
failed with stage
in 1 minute and 16 seconds
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
fractalis/analytics/tasks/pca/main.py
View file @
72793e60
...
@@ -5,6 +5,7 @@ from functools import reduce
...
@@ -5,6 +5,7 @@ from functools import reduce
import
logging
import
logging
import
pandas
as
pd
import
pandas
as
pd
import
numpy
as
np
from
sklearn.decomposition
import
PCA
from
sklearn.decomposition
import
PCA
from
sklearn.preprocessing
import
Imputer
from
sklearn.preprocessing
import
Imputer
...
@@ -38,6 +39,7 @@ class PCATask(AnalyticTask):
...
@@ -38,6 +39,7 @@ class PCATask(AnalyticTask):
# make matrix of data
# make matrix of data
df
=
df
.
pivot
(
index
=
'feature'
,
columns
=
'id'
,
values
=
'value'
)
df
=
df
.
pivot
(
index
=
'feature'
,
columns
=
'id'
,
values
=
'value'
)
df
=
df
.
T
df
=
df
.
T
feature_labels
=
list
(
df
)
# apply id filter
# apply id filter
if
id_filter
:
if
id_filter
:
...
@@ -56,6 +58,14 @@ class PCATask(AnalyticTask):
...
@@ -56,6 +58,14 @@ class PCATask(AnalyticTask):
pca
.
fit
(
df
)
pca
.
fit
(
df
)
reduced_df
=
pca
.
transform
(
df
)
reduced_df
=
pca
.
transform
(
df
)
# get explained variance ratios of components
variance_ratios
=
pca
.
explained_variance_ratio_
# get loadings
loadings
=
-
1
*
pca
.
components_
.
T
*
np
.
sqrt
(
pca
.
explained_variance_
)
loadings
=
pd
.
DataFrame
(
loadings
)
loadings
[
'feature'
]
=
feature_labels
# re-assign ids
# re-assign ids
reduced_df
=
pd
.
DataFrame
(
reduced_df
)
reduced_df
=
pd
.
DataFrame
(
reduced_df
)
reduced_df
[
'id'
]
=
ids
reduced_df
[
'id'
]
=
ids
...
@@ -66,5 +76,7 @@ class PCATask(AnalyticTask):
...
@@ -66,5 +76,7 @@ class PCATask(AnalyticTask):
categories
=
categories
)
categories
=
categories
)
return
{
return
{
'data'
:
reduced_df
.
to_json
(
orient
=
'records'
)
'data'
:
reduced_df
.
to_json
(
orient
=
'records'
),
'loadings'
:
loadings
.
to_json
(
orient
=
'records'
),
'variance_ratios'
:
variance_ratios
.
tolist
()
}
}
\ No newline at end of file
tests/pca/test_main.py
View file @
72793e60
...
@@ -62,3 +62,38 @@ class TestPCATask:
...
@@ -62,3 +62,38 @@ class TestPCATask:
subsets
=
[])
subsets
=
[])
data
=
pd
.
read_json
(
result
[
'data'
])
data
=
pd
.
read_json
(
result
[
'data'
])
assert
data
[
'id'
].
unique
().
tolist
()
==
[
101
,
104
]
assert
data
[
'id'
].
unique
().
tolist
()
==
[
101
,
104
]
def
test_correct_loadings
(
self
):
features
=
[
pd
.
DataFrame
([[
101
,
'foo'
,
5
],
[
101
,
'bar'
,
20
],
[
102
,
'foo'
,
10
],
[
102
,
'bar'
,
15
],
[
103
,
'foo'
,
15
],
[
103
,
'bar'
,
10
],
[
104
,
'foo'
,
20
],
[
104
,
'bar'
,
5
]],
columns
=
[
'id'
,
'feature'
,
'value'
])
]
result
=
self
.
task
.
main
(
features
=
features
,
categories
=
[],
n_components
=
2
,
whiten
=
False
,
id_filter
=
[],
subsets
=
[])
loadings
=
pd
.
read_json
(
result
[
'loadings'
])
assert
loadings
[
'0'
].
tolist
()[
0
]
==
-
loadings
[
'0'
].
tolist
()[
1
]
assert
loadings
[
'1'
].
tolist
()[
0
]
==
loadings
[
'1'
].
tolist
()[
1
]
def
test_correct_variance_ratios
(
self
):
features
=
[
pd
.
DataFrame
([[
101
,
'foo'
,
5
],
[
101
,
'bar'
,
5
],
[
102
,
'foo'
,
10
],
[
102
,
'bar'
,
5
],
[
103
,
'foo'
,
15
],
[
103
,
'bar'
,
5
],
[
104
,
'foo'
,
20
],
[
104
,
'bar'
,
5
]],
columns
=
[
'id'
,
'feature'
,
'value'
])
]
result
=
self
.
task
.
main
(
features
=
features
,
categories
=
[],
n_components
=
2
,
whiten
=
False
,
id_filter
=
[],
subsets
=
[])
variance_ratios
=
result
[
'variance_ratios'
]
assert
variance_ratios
==
[
1
,
0
]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment