#' @title Preprocess an expression dataset. #' #' @description This function preprocess a dataset and saves the #' results in a given TSV file. In addition, it returns the ESET object. #' #' The function assumes that a folder containing the raw data exists (with cel files). #' It currently supports Affymetrix, Illumina and Agilent arrays. This function is #' just a handler over the platform dedicated functions. #' #' Note: the function does not check for the existence of folders or files. #' #' @param input_data_dir A string representing the folder that contains the input data. #' @param output_data_files An array of strings representing the files that should contain the #' preprocessed data. At least one value, maximum two if batch_correction is "BOTH". #' @param platform A string representing the array platform among Affymetrix, Illumina and Agilent. #' Default to Affymetrix. #' @param method A string representing the preprocessing method to use (for Affymetrix arrays #' where multiple methods are supported). Default to SCAN for Affymetrix data. #' @param exprs_raw A matrix corresponding to the pre-processed data (in the case of APT-GCRMA). #' Default to NULL since in most cases, we do not have pre-processed data already. #' @param compressed A boolean representing whether the cel files are compressed. This #' is FALSE by default. #' @param expression_filename A String indicating where the expression matrix can be found, #' default to 'Expdata.tsv'. #' @param batch_correction A String indicating whether batch correction should #' be performed. Options are "TRUE", "FALSE", "BOTH", default to "FALSE". #' @param batch_filename A string indicating where the batch information can be found, #' default to 'Batch.tsv'. #' @param clean_samples A boolean indicating whether the dataset should be cleaned by removing #' the samples that do not have clinical data. Default to FALSE. #' @param verbose A boolean representing whether the function should display log information. This #' is FALSE by default. #' @return The expression data as ESET objects. Potentially only one object (therefore unlisted). preprocess_data <- function(input_data_dir, output_data_files, platform = "Affymetix", method = "SCAN", exprs_raw = NULL, compressed = FALSE, expression_filename = "ExpData.tsv", batch_correction = "FALSE", batch_filename = "Batch.tsv", clean_samples = FALSE, verbose = FALSE) { # We launch the correct function depending on the array platform and desired method. esets <- NULL if (platform == "Affymetrix") { if (method == "SCAN") { esets <- preprocess_data_affymetrix_scan(input_data_dir, output_data_files, compressed = compressed, batch_correction = batch_correction, batch_filename = batch_filename, clean_samples = clean_samples, verbose = verbose) } else if (method == "GCRMA") { esets <- preprocess_data_affymetrix_gcrma(input_data_dir, output_data_files, compressed = compressed, batch_correction = batch_correction, batch_filename = batch_filename, clean_samples = clean_samples, verbose = verbose) } else if (method == "APT-GCRMA") { esets <- preprocess_data_affymetrix_aptgcrma(exprs_raw, input_data_dir, output_data_files, batch_correction = batch_correction, batch_filename = batch_filename, clean_samples = clean_samples, verbose = verbose) } } else if (platform == "Agilent") { esets <- preprocess_data_agilent_limma(input_data_dir, output_data_files, compressed = compressed, batch_correction = batch_correction, batch_filename = batch_filename, clean_samples = clean_samples, verbose = verbose) } else if (platform == "Illumina") { esets <- preprocess_data_illumina_beadarray(input_data_dir, output_data_files, compressed = compressed, batch_correction = batch_correction, batch_filename = batch_filename, clean_samples = clean_samples, verbose = verbose) } else if (platform == "RNAseq") { esets <- preprocess_data_rnaseq(input_data_dir, output_data_files, expression_filename = expression_filename, batch_correction = batch_correction, batch_filename = batch_filename, clean_samples = clean_samples, verbose = verbose) } else { message(paste0("[", Sys.time(), "] [WARNING] Platform ", platform, " not yet supported (no preprocessing done).")) } # We return the created ESET(s). return(esets) }