Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Fractalis
fractalis
Commits
8bd7667f
Commit
8bd7667f
authored
Aug 29, 2017
by
Sascha Herzinger
Browse files
Added filtering functionality to task module
parent
cc4f17af
Pipeline
#2273
failed with stage
in 1 minute and 28 seconds
Changes
5
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
fractalis/analytics/task.py
View file @
8bd7667f
...
...
@@ -4,7 +4,7 @@ import abc
import
json
import
re
import
logging
from
typing
import
List
from
typing
import
List
,
Tuple
from
pandas
import
read_csv
,
DataFrame
from
celery
import
Task
...
...
@@ -47,7 +47,8 @@ class AnalyticTask(Task, metaclass=abc.ABCMeta):
"""
pass
def
secure_load
(
self
,
file_path
:
str
)
->
DataFrame
:
@
staticmethod
def
secure_load
(
file_path
:
str
)
->
DataFrame
:
"""Decrypt data so they can be loaded into a pandas data frame.
:param file_path: The location of the encrypted file.
:return: The decrypted file loaded into a pandas data frame.
...
...
@@ -62,14 +63,16 @@ class AnalyticTask(Task, metaclass=abc.ABCMeta):
df
=
DataFrame
.
from_dict
(
data
)
return
df
def
data_task_id_to_data_frame
(
self
,
data_task_id
,
session_data_tasks
,
decrypt
):
def
data_task_id_to_data_frame
(
self
,
data_task_id
:
str
,
session_data_tasks
:
List
[
str
],
decrypt
:
bool
)
->
DataFrame
:
"""Attempts to load the data frame associated with the provided data id.
:param data_task_id: The data id associated with the previously loaded
data.
:param session_data_tasks: A list of data tasks previously executed by
this the requesting session. This is used for permission checks.
:param decrypt: Specify whether the data have to be decrypted for usage.
only part of the data, for instance some genes out of thousands.
:return: A pandas data frame associated with the data id.
"""
if
data_task_id
not
in
session_data_tasks
:
...
...
@@ -97,6 +100,47 @@ class AnalyticTask(Task, metaclass=abc.ABCMeta):
df
=
read_csv
(
file_path
)
return
df
@
staticmethod
def
apply_filters
(
df
:
DataFrame
,
filters
:
dict
)
->
DataFrame
:
"""Apply filter to data frame and return it.
:param df: The data frame.
:param filters: The filters where each key is represents a column in the
data frame and the value a list of values to keep.
:return: Filtered data frame.
"""
for
key
in
filters
:
df
=
df
[
df
[
key
].
isin
(
filters
[
key
])]
return
df
@
staticmethod
def
contains_data_task_id
(
value
:
str
)
->
bool
:
"""Check whether the given string represents
a special data id arguments.
:param value: The string to test.
:return: True if argument contains data_task_id.
"""
return
value
.
startswith
(
'$'
)
and
value
.
endswith
(
'$'
)
@
staticmethod
def
parse_value
(
value
:
str
)
->
Tuple
[
str
,
dict
]:
"""Extract data task id and filters from the string.
:param value: A string that contains a data task id.
:return: A tuple of id and filters to apply later.
"""
value
=
value
[
1
:
-
1
]
# noinspection PyBroadException
try
:
value
=
json
.
loads
(
value
)
data_task_id
=
value
[
'id'
]
filters
=
value
.
get
(
'filters'
)
except
Exception
:
logger
.
warning
(
"Failed to parse value. "
"Fallback assumption is that it contains the id "
"but nothing else."
)
data_task_id
=
value
filters
=
None
return
data_task_id
,
filters
def
prepare_args
(
self
,
session_data_tasks
:
List
[
str
],
args
:
dict
,
decrypt
:
bool
)
->
dict
:
"""Replace data task ids in the arguments with their associated
...
...
@@ -107,30 +151,38 @@ class AnalyticTask(Task, metaclass=abc.ABCMeta):
:param decrypt: Indicates whether cache must be decrypted to be used.
:return: The new parsed arguments
"""
ar
gument
s
=
{}
p
ar
sed_arg
s
=
{}
for
arg
in
args
:
value
=
args
[
arg
]
# value is data id
if
(
isinstance
(
value
,
str
)
and
value
and
value
.
startswith
(
'$'
)
and
value
.
endswith
(
'$'
)):
data_task_id
=
value
[
1
:
-
1
]
value
=
self
.
data_task_id_to_data_frame
(
if
isinstance
(
value
,
str
)
and
self
.
contains_data_task_id
(
value
):
data_task_id
,
filters
=
self
.
parse_value
(
value
)
df
=
self
.
data_task_id_to_data_frame
(
data_task_id
,
session_data_tasks
,
decrypt
)
if
filters
:
df
=
self
.
apply_filters
(
df
,
filters
)
value
=
df
# value is list containing data ids
if
(
isinstance
(
value
,
list
)
and
value
and
isinstance
(
value
[
0
],
str
)
and
value
[
0
].
startswith
(
'$'
)
and
value
[
0
].
endswith
(
'$'
)):
data_task_ids
=
[
el
[
1
:
-
1
]
for
el
in
value
]
if
(
isinstance
(
value
,
list
)
and
value
and
self
.
contains_data_task_id
(
value
[
0
])):
dfs
=
[]
for
data_task_id
in
data_task_ids
:
for
el
in
value
:
data_task_id
,
filters
=
self
.
parse_value
(
el
)
df
=
self
.
data_task_id_to_data_frame
(
data_task_id
,
session_data_tasks
,
decrypt
)
if
filters
:
df
=
self
.
apply_filters
(
df
,
filters
)
dfs
.
append
(
df
)
value
=
dfs
arguments
[
arg
]
=
value
return
arguments
def
task_result_to_json
(
self
,
result
:
dict
)
->
str
:
parsed_args
[
arg
]
=
value
return
parsed_args
@
staticmethod
def
task_result_to_json
(
result
:
dict
)
->
str
:
"""Transform task result to JSON so we can send it as a response.
:param result: The return value of main()
:return: A string that can be parsed by the front-end for instance.
...
...
setup.cfg
View file @
8bd7667f
...
...
@@ -3,7 +3,6 @@ test = pytest
[tool:pytest]
addopts =
--cov=fractalis
--capture=no
--color=yes
--verbose
...
...
setup.py
View file @
8bd7667f
...
...
@@ -30,7 +30,6 @@ setup(
],
tests_require
=
[
'pytest==3.0.3'
,
'pytest-cov'
,
'pytest-mock'
,
'responses'
]
...
...
tests/functional/test_analytics.py
View file @
8bd7667f
...
...
@@ -300,7 +300,7 @@ class TestAnalytics:
assert
new_body
[
'state'
]
==
'SUCCESS'
,
new_body
assert
len
(
json
.
loads
(
json
.
loads
(
new_body
[
'result'
])[
'df'
]))
==
30
def
test_can_handle_empty_df_list
(
self
,
test_client
,
small_data_post
):
def
test_can_handle_empty_df_list
(
self
,
test_client
):
rv
=
test_client
.
post
(
'/analytics'
,
data
=
flask
.
json
.
dumps
(
dict
(
task_name
=
'merge_df_task'
,
args
=
{
'df_list'
:
[]}
...
...
tests/unit/analytics/test_analytics_task.py
0 → 100644
View file @
8bd7667f
"""This module provides tests for the AnalyticsTask class."""
import
pandas
as
pd
from
fractalis.analytics.task
import
AnalyticTask
# noinspection PyMissingOrEmptyDocstring,PyMissingTypeHints
class
MockTask
(
AnalyticTask
):
@
property
def
name
(
self
)
->
str
:
return
''
def
main
(
self
,
*
args
,
**
kwargs
)
->
dict
:
pass
# noinspection PyMissingOrEmptyDocstring,PyMissingTypeHints
class
TestAnalyticsTask
:
task
=
MockTask
()
def
test_apply_filter
(
self
):
df
=
pd
.
DataFrame
([[
1
,
2
,
3
],
[
4
,
5
,
6
],
[
7
,
8
,
9
]],
columns
=
[
'A'
,
'B'
,
'C'
])
filters
=
{
'A'
:
[
1
,
7
],
'B'
:
[
2
,
5
,
8
],
'C'
:
[
6
,
9
]}
df
=
self
.
task
.
apply_filters
(
df
=
df
,
filters
=
filters
)
assert
df
.
shape
==
(
1
,
3
)
assert
df
.
iloc
[
0
].
tolist
()
==
[
7
,
8
,
9
]
def
test_parse_value
(
self
):
arg1
=
'${"id": 123, "filters": {"foo": [1,2]}}$'
arg2
=
'$123$'
data_task_id
,
filters
=
self
.
task
.
parse_value
(
arg1
)
assert
data_task_id
==
123
assert
'foo'
in
filters
assert
filters
[
'foo'
]
==
[
1
,
2
]
data_task_id
,
filters
=
self
.
task
.
parse_value
(
arg2
)
assert
data_task_id
==
123
assert
not
filters
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment