Commit 9cb56527 authored by Sascha Herzinger's avatar Sascha Herzinger

New TCGA - COAD dataset for demonstration

parent 53a7f32d
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
import os
import pandas as pd
from fractalis.data.etl import ETL
class CategoricalETL(ETL):
name = 'demo_tca_coad_categorical-etl'
produces = 'categorical'
@staticmethod
def can_handle(handler: str, descriptor: dict):
return handler == 'demo_tcga_coad' and \
descriptor['dataType'] == 'categorical'
def extract(self, server: str, token: str, descriptor: dict):
path = '{}/data/{}.tsv'.format(
os.path.dirname(os.path.abspath(__file__)), descriptor['field'])
raw_data = pd.read_csv(path, sep='\t')
return raw_data
def transform(self, raw_data: pd.DataFrame, descriptor: dict):
df = raw_data
df[['id']] = df[['id']].astype(str)
df[['value']] = df[['value']].astype(str)
df[['feature']] = df[['feature']].astype(str)
return df
import os
import pandas as pd
from fractalis.data.etl import ETL
class NumericalETL(ETL):
name = 'demo_tcga_coad_numerical-etl'
produces = 'numerical'
@staticmethod
def can_handle(handler: str, descriptor: dict):
return handler == 'demo_tcga_coad' and \
descriptor['dataType'] == 'numerical'
def extract(self, server: str, token: str, descriptor: dict):
print('I am 100% sure that I am a TCGA ETL and NOT a wine quality ETL.')
print(__file__)
path = '{}/data/{}.tsv'.format(
os.path.dirname(os.path.abspath(__file__)), descriptor['field'])
raw_data = pd.read_csv(path, sep='\t')
return raw_data
def transform(self, raw_data: pd.DataFrame, descriptor: dict):
df = raw_data
df[['id']] = df[['id']].astype(str)
df['value'] = pd.to_numeric(df['value'], errors='coerce')
df['value'] = df[['value']].astype(float)
df[['feature']] = df[['feature']].astype(str)
return df
import os
import pandas as pd
from fractalis.data.etl import ETL
class NumericalETL(ETL):
name = 'demo_tca_coad_numerical-etl'
produces = 'numerical_array'
@staticmethod
def can_handle(handler: str, descriptor: dict):
return handler == 'demo_tcga_coad' and \
descriptor['dataType'] == 'numerical_array'
def extract(self, server: str, token: str, descriptor: dict):
path = '{}/data/{}.tsv'.format(
os.path.dirname(os.path.abspath(__file__)), descriptor['field'])
raw_data = pd.read_csv(path, sep='\t')
return raw_data
def transform(self, raw_data: pd.DataFrame, descriptor: dict):
df = raw_data
df[['id']] = df[['id']].astype(str)
df[['value']] = df[['value']].astype(float)
df[['feature']] = df[['feature']].astype(str)
return df
from fractalis.data.etlhandler import ETLHandler
class TCGADemoHandler(ETLHandler):
_handler = 'demo_tcga_coad'
@staticmethod
def make_label(descriptor):
return descriptor.get('field')
def _get_token_for_credentials(self, server: str, auth: dict) -> str:
return 'foo'
def _heartbeat(self):
pass
"""Provides CategoricalETL for wine quality demo data set."""
"""Provides CategoricalETL for wine quality demo_wine_quality data set."""
import os
......@@ -11,12 +11,13 @@ class CategoricalETL(ETL):
"""CategoricalETL implements support for the
categorical data of the wine quality data set. """
name = 'demo-wine-quality-categorical-etl'
name = 'demo_wine_quality_categorical-etl'
produces = 'categorical'
@staticmethod
def can_handle(handler: str, descriptor: dict):
return handler == 'demo' and descriptor['dataType'] == 'categorical'
return handler == 'demo_wine_quality' \
and descriptor['dataType'] == 'categorical'
def extract(self, server: str, token: str, descriptor: dict):
path = os.path.dirname(os.path.abspath(__file__)) + '/wine_quality.csv'
......
"""Provides NumericalETL for wine quality demo data set."""
"""Provides NumericalETL for wine quality data set."""
import os
......@@ -11,12 +11,13 @@ class NumericalETL(ETL):
"""NumericalETL implements support for the
numerical data of the wine quality data set. """
name = 'demo-wine-quality-numerical-etl'
name = 'demo_wine_quality_numerical-etl'
produces = 'numerical'
@staticmethod
def can_handle(handler: str, descriptor: dict):
return handler == 'demo' and descriptor['dataType'] == 'numerical'
return handler == 'demo_wine_quality' and \
descriptor['dataType'] == 'numerical'
def extract(self, server: str, token: str, descriptor: dict):
path = os.path.dirname(os.path.abspath(__file__)) + '/wine_quality.csv'
......
......@@ -3,7 +3,7 @@ from fractalis.data.etlhandler import ETLHandler
class DemoHandler(ETLHandler):
_handler = 'demo'
_handler = 'demo_wine_quality'
@staticmethod
def make_label(descriptor):
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment