extract_DEGs.R 4.26 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
#' @title Extract DEGs from a Limma fit.
#'
#' @description The function extracts the differentially expressed genes from an existing
#' limma fit and for a predefined analysis (aka a coefficient). It saves the data in a TSV file
#' and creates several plots in the process (Venn diagrams, MD plots).
#'
#' @param fit A fit as provided by a complete Limma analysis (for instance using
#' ArrayUtils::run_limma).
#' @param limma_coeffs A list of all limma coefficient names associated with that fit.
#' It will be used to name the output files, and to identify the results in the Venn
#' diagrams.
#' @param k The index of the current coefficient (from limma_coeffs).
#' @param output_data_dir A string representing the folder in which the output files
#' are going to be stored.
#' @param file_prefix A string used to prefix the file names. Default to "".
16
#' @param file_suffix A string used as a suffix for the file names. Default to "".
17 18
#' @param pval_adjust_method A string code indicating the multiple testing correction
#' method to use. Default to BH.
19 20
#' @param verbose A boolean representing whether the function should display log information. This
#' is FALSE by default.
21
#' @return NULL
22
extract_degs <- function(fit, limma_coeffs, k, output_data_dir,
23
                         file_prefix        = "",
24
                         file_suffix        = "",
25 26
                         pval_adjust_method = "BH",
                         verbose            = FALSE) {
27 28 29

  # We create the output file names.
  if (file_prefix != "") {
30 31 32 33
    file_prefix <- paste0(file_prefix, "_")
  }
  if (file_suffix != "") {
    file_suffix <- paste0("_", file_suffix)
34
  }
35 36 37 38 39 40
  venn_fn  <- paste0(output_data_dir, file_prefix, "venn_",
                     limma_coeffs[k], file_suffix, ".png")
  md_fn    <- paste0(output_data_dir, file_prefix, "MD_",
                     limma_coeffs[k], file_suffix, ".png")
  table_fn <- paste0(output_data_dir, file_prefix, "toptable_",
                     limma_coeffs[k], file_suffix, ".tsv")
41 42 43 44 45 46

  # We extract the DEGs using toptable.
  table <- limma::topTable(fit,
                           coef          = paste0("coeff", k),
                           n             = Inf,
                           adjust.method = pval_adjust_method,
47 48
                           confint       = TRUE,
                           sort.by       = "P")
49 50 51 52 53 54 55

  # Another way to extract the differentially expressed genes (in parallel to topTable).
  # This is only used to then create the Venn diagrams.
  results <- limma::decideTests(fit,
                                coefficient   = paste0("coeff", k),
                                adjust.method = pval_adjust_method)

56 57 58 59 60
  # We log information.
  if (verbose == TRUE) {
    message(paste0("[", Sys.time(), "] Differentially expressed genes extracted."))
  }

61 62
  # We plot Venn diagrams that summarize the results and show to which
  # extend the analyses overlap.
63
  grDevices::png(venn_fn)
64 65 66 67 68 69 70 71 72 73
  limma::vennDiagram(results,
                     names        = limma_coeffs,
                     include      = c("up", "down"),
                     circle.col   = c("darkorange", "darkorange2", "darkorange4"),
                     counts.col   = c("blue", "green"),
                     lwd          = 3,
                     cex          = c(1.0, 0.1, 0.1),
                     show.include = TRUE)
  grDevices::dev.off()

74 75 76 77 78 79
  # We clean and log information.
  rm(venn_fn)
  if (verbose == TRUE) {
    message(paste0("[", Sys.time(), "] Venn diagram created."))
  }

80 81 82 83 84 85 86 87 88 89 90
  # We then plot the Mean vs Deviation of the comparisons and highlight the significant genes.
  grDevices::png(md_fn)
  limma::plotMD(fit,
                coef   = paste0("coeff", k),
                status = results[, k],
                values = c(1, -1),
                hl.col = c("red", "blue"),
                hl.pch = c(16, 16),
                hl.cex = c(0.9, 0.9))
  grDevices::dev.off()

91 92 93 94 95 96
  # We clean and log information.
  rm(md_fn, results)
  if (verbose == TRUE) {
    message(paste0("[", Sys.time(), "] MD plot created."))
  }

97 98
  # Save the top tables to TSV files.
  utils::write.table(table, file = table_fn, sep = "\t", quote = FALSE, col.names = NA)
99 100 101 102 103 104

  # We clean and log information.
  rm(table, table_fn)
  if (verbose == TRUE) {
    message(paste0("[", Sys.time(), "] Differentially expressed gene list saved."))
  }
105
}