#' @title Run a limma analysis. #' #' @description This function prepares and runs a limma analysis up until the fitting procedure. #' The Limma analysis can be simple (A vs B) or more complex ((A vs B) vs (C vs D)). One single #' cofactor can also be taken into account in the limma. It returns the final fit that can then #' be used to detect differentially expressed genes. #' #' @param pheno_data The data-frame that contains the phenotypic data (aka clinical data). #' @param eset The ExpressionSet object that contains the pre-processed data. Alternatively, #' this can be an EList object (limma package), especially for RNA-seq data. #' @param limma_parameters A list of useful Limma parameters that include (1) the main clinical #' factor used to create sample categories (i.e., Disease status) and (2) the Limma coefficients #' provided as formulas (similar to the limma::makeContrasts function). Additional parameters are #' ignored for the time being. #' @param correct_for_age A boolean indicating whether we should correct for age in the limma #' design matrix. This expects that a clinical factor termed Age exists. Default to FALSE. #' @param correct_for_batch A boolean indicating whether we should correct for a potential batch #' effect in the limma design matrix. This expects that the actual batch information exists in #' the object pheno_data (as field Batch). Default to FALSE. #' @param correct_for_gender A boolean indicating whether we should correct for sample gender #' in the limma design matrix. This expects that a clinical factor termed Gender exists. Default #' to FALSE. #' @param cofactor_name A string indicating a potential cofactor (included in the phenotypic data). #' The default is NULL, and no cofactor is considered. This will replace the age correction if #' both were deemed necessary. #' @param block_key A string indicating which column in the phenotyypic data should be used to #' build blocks if there is a cofactor to take into account. For instance, if there are several #' replicates per patient, the block_key might be "Patient" and the cofactor might be "Replicate". #' This will replace the age correction if both were deemed necessary. #' @param run_topconfects A boolean indicating whether the analysis should be complemented with a #' TopConfects analysis that bases the decision on confident fold changes and effect size. Default #' to FALSE. #' @param output_data_dir A string representing the folder in which the output files #' are going to be stored (for TopConfects only). Default to "". #' @param file_prefix A string used to prefix the file names (for TopConfects only). Default to "". #' @param file_suffix A string used as a suffix for the file names (for TopConfects only). Default #' to "". #' @param verbose A boolean representing whether the function should display log information. This #' is FALSE by default. #' @return The limma fit. run_limma <- function(pheno_data, eset, limma_parameters, correct_for_age = FALSE, correct_for_batch = FALSE, correct_for_gender = FALSE, cofactor_name = NULL, block_key = NULL, run_topconfects = FALSE, output_data_dir = "", file_prefix = "", file_suffix = "", verbose = FALSE) { # We extract the limma analysis parameters coefficients <- limma_parameters[[2]] has_block <- !is.null(cofactor_name) # We refine the prefix / suffic for the file names. if (file_prefix != "") { file_prefix <- paste0(file_prefix, "_") } if (file_suffix != "") { file_suffix <- paste0("_", file_suffix) } # We create the design matrix (regular case - no block). # Depending on the co-factors, we have several possibilities. # We sill decide based on (i) a block based on a co-factor (paired samples), # (ii) the age clincal descriptor and (iii) the experimental batches. # If we need to add a block based on a co-factor. design <- create_design(pheno_data = pheno_data, limma_parameters = limma_parameters, correct_for_age = correct_for_age, correct_for_batch = correct_for_batch, correct_for_gender = correct_for_gender, cofactor_name = cofactor_name, verbose = verbose) # We build the contrast (only one so far). contrast <- eval(substitute(limma::makeContrasts(coeff1 = c1, levels = design), list(c1 = coefficients))) if (run_topconfects) { design_topconfects <- limma::contrastAsCoef(design, contrast)$design } # We log information. rm(coefficients) if (verbose == TRUE) { message(paste0("[", Sys.time(), "] Contrast matrix created.")) } # We fit the data using the above models and the contrasts # to prepare the differential analyses. lmfit <- NULL lmfit_topconfects <- NULL # We create a block if necessary. if (has_block) { corfit <- limma::duplicateCorrelation(eset, design, block = pheno_data[[block_key]]) lmfit <- limma::lmFit(eset, design, block = pheno_data[[block_key]], correlation = corfit$consensus) if (run_topconfects) { lmfit_topconfects <- limma::lmFit(eset, design_topconfects, block = pheno_data[[block_key]], correlation = corfit$consensus) } # We log information. rm(corfit) if (verbose == TRUE) { message(paste0("[", Sys.time(), "] Linear fit performed (with blocks).")) } } else { lmfit <- limma::lmFit(eset, design) if (run_topconfects) { lmfit_topconfects <- limma::lmFit(eset, design_topconfects) } # We log information. if (verbose == TRUE) { message(paste0("[", Sys.time(), "] Linear fit performed.")) } } fit <- limma::contrasts.fit(lmfit, contrast) rm(has_block, design, contrast, lmfit) fit <- limma::eBayes(fit) # We now finish the TopConfects analysis if necesary. if (run_topconfects) { # We first run TopConfects on the dedicated lmfit object. confects <- topconfects::limma_confects(lmfit_topconfects, coef = 1, fdr = 0.05) # We need to check whether we have any confect at all (if no effect is > 0 at FDR = 0.05, # then all confects are set to NA values). confects_genes <- confects$table %>% filter(!is.na(confect)) #nolint if (nrow(confects_genes) > 0) { # Log info. if (verbose == TRUE) { message(paste0("[", Sys.time(), "] [TopConfects] ", nrow(confects_genes), " genes identified with TopConfects.")) } # We save the list of genes confects_table_fn <- paste0(output_data_dir, file_prefix, "topconfects_", limma_parameters$name, file_suffix, ".tsv") utils::write.table(confects_genes, file = confects_table_fn, sep = "\t", quote = FALSE, col.names = NA) # First plot, the topconfects plot. tcplot_fn <- paste0(output_data_dir, file_prefix, "topconfects_", limma_parameters$name, file_suffix, ".png") grDevices::png(tcplot_fn) local_n <- min(50, nrow(confects_genes)) p <- topconfects::confects_plot(confects, n = local_n) print(p) grDevices::dev.off() rm(p, tcplot_fn) # Second plot, the me plot (mean - effect). meplot_fn <- paste0(output_data_dir, file_prefix, "topconfects_meplot_", limma_parameters$name, file_suffix, ".png") grDevices::png(meplot_fn) p <- topconfects::confects_plot_me(confects) print(p) grDevices::dev.off() rm(p, meplot_fn) # Third plot, the comparison between topconfects and pure limma with # a rank rank plot. limma_top <- limma::topTable(fit, coef = 1, p.value = 0.05, n = Inf) if (nrow(limma_top) > 0) { # Log info. if (verbose == TRUE) { message(paste0("[", Sys.time(), "] [TopConfects] ", nrow(limma_top), " genes identified with Limma.")) } rrplot_fn <- paste0(output_data_dir, file_prefix, "topconfects_rrplot_limma_", limma_parameters$name, file_suffix, ".png") grDevices::png(rrplot_fn) p <- topconfects::rank_rank_plot(confects_genes$name, rownames(limma_top), "TopConfects", "Limma", n = local_n) print(p) grDevices::dev.off() rm(p, rrplot_fn) # Fourth plot, the MD plot with both TopConfect results and Limma results highlighted. mdplot_fn <- paste0(output_data_dir, file_prefix, "topconfects_mdplot_limma_", limma_parameters$name, file_suffix, ".png") grDevices::png(mdplot_fn) p <- limma::plotMD(fit, legend = "bottomleft", status = paste0( ifelse(rownames(fit) %in% rownames(limma_top), "Limma ", ""), ifelse(rownames(fit) %in% confects_genes$name, "TopConfects ", ""))) print(p) grDevices::dev.off() rm(p, mdplot_fn) } else { # Log info. if (verbose == TRUE) { message(paste0("[", Sys.time(), "] [TopConfects] no gene identified with Limma.")) } } # End of the plotting (if there are Limma genes). } else { # Log info. if (verbose == TRUE) { message(paste0("[", Sys.time(), "] [TopConfects] no gene identified with TopConfects.")) } } # End of the plotting (if there are TopConfect genes). } # End of the TopConfects section. # We return the fit. return(fit) }