Commit a7192199 authored by Sascha Herzinger's avatar Sascha Herzinger
Browse files

some minor improvements

parent 0cd869c9
Pipeline #2224 failed with stage
in 7 minutes and 33 seconds
......@@ -45,7 +45,8 @@ class BoxplotTask(AnalyticTask):
df = apply_categories(df=df, categories=categories)
results = {
'data': df.to_json(orient='index'),
'statistics': {}
'statistics': {},
'variables': variable_names,
for variable in variable_names:
for subset in list(set(df['subset'].tolist())):
......@@ -26,14 +26,14 @@ class CorrelationTask(AnalyticTask):
id_filter: List[T],
method: str,
subsets: List[List[T]],
annotations: List[pd.DataFrame]) -> dict:
categories: List[pd.DataFrame]) -> dict:
"""Compute correlation statistics for the given parameters.
:param x: DataFrame containing x axis values.
:param y: DataFrame containing y axis values.
:param id_filter: If specified use only given ids during the analysis.
:param method: pearson, spearman or kendall.
:param subsets: List of lists of subset ids.
:param annotations: List of DataFrames that annotate the data points.
:param categories: List of DataFrames that categorise the data points.
:return: corr. coef., p-value and other useful values.
if x.shape[0] == 0 or y.shape[0] == 0:
......@@ -47,7 +47,7 @@ class CorrelationTask(AnalyticTask):
(x_label, y_label) = self.get_axis_labels(df)
df = apply_id_filter(df=df, id_filter=id_filter)
df = apply_subsets(df=df, subsets=subsets)
df = apply_categories(df=df, categories=annotations)
df = apply_categories(df=df, categories=categories)
global_stats = self.compute_stats(df, method, x_label, y_label)
subset_dfs = [df[df['subset'] == i] for i in range(len(subsets) or 1)]
subset_stats = [self.compute_stats(subset_df, method, x_label, y_label)
......@@ -54,7 +54,7 @@ def apply_categories(df: pd.DataFrame,
# replace everything that is not an category with ''
data = data.applymap(lambda el: el if isinstance(el, str) and el else '')
# join all columns with && into a single one. Ignore '' entries.
data = data.apply(lambda row: '&&'.join(list(map(str, [el for el in row.tolist() if el]))), axis=1)
data = data.apply(lambda row: ' AND '.join(list(map(str, [el for el in row.tolist() if el]))), axis=1)
# cast Series to DataFrame
data = pd.DataFrame(data, columns=['category'])
# reassign ids to collapsed df
......@@ -24,7 +24,7 @@ def get_field(server: str, data_set: str,
"status code {}.".format(r.status_code))
field_data = r.json()
except TypeError:
except Exception:
raise TypeError("Data extraction failed. Target server did not return "
"expected data. Possible authentication error.")
return field_data
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment