Commit 2ca68ea6 authored by Sascha Herzinger's avatar Sascha Herzinger

tests and bugfixes

parent 203a7bb8
Pipeline #5343 failed with stages
in 2 minutes and 48 seconds
......@@ -32,9 +32,7 @@ def apply_subsets(df: pd.DataFrame,
df_subset = df_subset.assign(subset=subset_col)
_df = _df.append(df_subset)
if _df.shape[0] == 0:
raise ValueError("No data match given subsets. Keep in mind that X "
"and Y are intersected before the subsets are "
"applied.")
raise ValueError("No data match given subsets.")
return _df
......
......@@ -38,17 +38,20 @@ class SurvivalTask(AnalyticTask):
raise ValueError(error)
df = durations[0]
df.dropna(inplace=True)
if id_filter:
df = df[df['id'].isin(id_filter)]
df = utils.apply_subsets(df=df, subsets=subsets)
df = utils.apply_categories(df=df, categories=categories)
stats = {}
categories = df['category'].unique().tolist()
subsets = df['subset'].unique().tolist()
# for every category and subset combination estimate the survival fun.
for category in df['category'].unique().tolist():
for category in categories:
if not stats.get(category):
stats[category] = {}
for subset in df['subset'].unique().tolist():
for subset in subsets:
sub_df = df[(df['category'] == category) &
(df['subset'] == subset)]
T = sub_df['value']
......@@ -90,5 +93,7 @@ class SurvivalTask(AnalyticTask):
}
return {
'categories': categories,
'subsets': subsets,
'stats': stats
}
This diff is collapsed.
......@@ -2,8 +2,7 @@
from lifelines.datasets import load_waltons
from fractalis.analytics.tasks.kaplan_meier_survival.main \
import SurvivalTask
from fractalis.analytics.tasks.survival.main import SurvivalTask
class TestSurvivalTask:
......@@ -50,4 +49,34 @@ class TestSurvivalTask:
assert results['stats']['miR-137'][0]['timeline']
assert results['stats']['miR-137'][0]['estimate']
assert results['stats']['miR-137'][0]['ci_lower']
assert results['stats']['miR-137'][0]['ci_upper']
\ No newline at end of file
assert results['stats']['miR-137'][0]['ci_upper']
def test_can_handle_nans(self):
df = load_waltons()
df.insert(0, 'id', df.index)
duration = df[['id', 'T']].copy()
duration.insert(1, 'feature', 'duration')
duration.columns.values[2] = 'value'
duration.loc[duration.index % 2 == 0, 'value'] = float('nan')
self.task.main(durations=[duration],
categories=[],
event_observed=[],
estimator='KaplanMeier',
id_filter=[],
subsets=[])
def test_can_handle_empty_groups(self):
df = load_waltons()
df.insert(0, 'id', df.index)
df.loc[df['group'] == 'miR-137', 'T'] = float('nan')
duration = df[['id', 'T']].copy()
duration.insert(1, 'feature', 'duration')
duration.columns.values[2] = 'value'
results = self.task.main(durations=[duration],
categories=[],
event_observed=[],
estimator='KaplanMeier',
id_filter=[],
subsets=[])
assert 'control' not in results['stats']
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment