#' @title Preprocess an Agilent dataset with limma. #' #' @description This function preprocess an Agilent dataset using limma and saves the #' results in a given TSV file. In addition, it returns the ESET object. #' #' The function assumes that a folder containing the raw data exists (as cel files). #' #' Note: the function does not check for the existence of folders or files. The bacth effect #' correction is not yet supported. #' #' @param input_data_dir A string representing the folder that contains the input data. #' @param output_data_file A string representing the file that should contain the #' preprocessed data. #' @param compressed A boolean representing whether the cel files are compressed. This #' is FALSE by default. Note: only for compatibility reasons (has no effect on analysis). #' @param batch_correction A boolean indicating whether batch correction should #' be performed, default to FALSE. Note: not yet suported. #' @param batch_filename A string indicating where the batch information can be found, #' default to 'Batch.tsv'. Note: not yet suported. #' @param clean_samples A boolean indicating whether the dataset should be cleaned by removing #' the samples that do not have clinical data. Default to FALSE. #' @param verbose A boolean representing whether the function should display log information. This #' is TRUE by default. #' @return The expression data as an ESET object. preprocess_data_agilent_limma <- function(input_data_dir, output_data_file, compressed = FALSE, batch_correction = FALSE, batch_filename = "Batch.tsv", clean_samples = FALSE, verbose = TRUE) { # We define the I/Os. raw_data_input_dir <- paste0(input_data_dir, "RAW/") # We read the data (aka agilent MA images). raw_file_list <- list.files(path = raw_data_input_dir) batch <- limma::read.maimages(files = raw_file_list, source = "agilent", path = raw_data_input_dir, green.only = TRUE, verbose = TRUE) batch_data = log2(batch$E) remove(raw_data_input_dir, batch) # We run the LIMMA pre-processing method on the data. # First, background correcttion. batch_data_bg <- limma::backgroundCorrect(batch_data, method = "normexp", normexp.method = "mle", verbose = TRUE) # Second, perform quantile normalization. batch_data_norm <- limma::normalizeBetweenArrays(batch_data_bg, method = "quantile") # If necessary, we remove the samples that do not have clinical data. if (clean_samples) { # We load the clinical data as to get the samples to keep. samples <- rownames(pData(ArrayUtils::load_clinical_data(input_data_dir, verbose = FALSE))) # We only keep the samples with clinical data. batch_data_norm <- batch_data_norm[, samples] } # We save the eset data as TSV file. eset <- methods::new("ExpressionSet", exprs = as.matrix(batch_data_norm)) rm(batch_data_bg) utils::write.table(batch_data_norm, file = output_data_file, sep = "\t", quote = FALSE) rm(batch_data_norm) # We clean up and log information. if (verbose == TRUE) { message(paste0("[", Sys.time(), "] Expression data pre-processed with LIMMA.")) } # We return the created ESET. return(eset) }