extract_DEGs.R 4.26 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#' @title Extract DEGs from a Limma fit.
#'
#' @description The function extracts the differentially expressed genes from an existing
#' limma fit and for a predefined analysis (aka a coefficient). It saves the data in a TSV file
#' and creates several plots in the process (Venn diagrams, MD plots).
#'
#' @param fit A fit as provided by a complete Limma analysis (for instance using
#' ArrayUtils::run_limma).
#' @param limma_coeffs A list of all limma coefficient names associated with that fit.
#' It will be used to name the output files, and to identify the results in the Venn
#' diagrams.
#' @param k The index of the current coefficient (from limma_coeffs).
#' @param output_data_dir A string representing the folder in which the output files
#' are going to be stored.
#' @param file_prefix A string used to prefix the file names. Default to "".
16
#' @param file_suffix A string used as a suffix for the file names. Default to "".
17
18
#' @param pval_adjust_method A string code indicating the multiple testing correction
#' method to use. Default to BH.
19
20
#' @param verbose A boolean representing whether the function should display log information. This
#' is FALSE by default.
21
#' @return NULL
22
extract_degs <- function(fit, limma_coeffs, k, output_data_dir,
23
                         file_prefix        = "",
24
                         file_suffix        = "",
25
26
                         pval_adjust_method = "BH",
                         verbose            = FALSE) {
27
28
29

  # We create the output file names.
  if (file_prefix != "") {
30
31
32
33
    file_prefix <- paste0(file_prefix, "_")
  }
  if (file_suffix != "") {
    file_suffix <- paste0("_", file_suffix)
34
  }
35
36
37
38
39
40
  venn_fn  <- paste0(output_data_dir, file_prefix, "venn_",
                     limma_coeffs[k], file_suffix, ".png")
  md_fn    <- paste0(output_data_dir, file_prefix, "MD_",
                     limma_coeffs[k], file_suffix, ".png")
  table_fn <- paste0(output_data_dir, file_prefix, "toptable_",
                     limma_coeffs[k], file_suffix, ".tsv")
41
42
43
44
45
46

  # We extract the DEGs using toptable.
  table <- limma::topTable(fit,
                           coef          = paste0("coeff", k),
                           n             = Inf,
                           adjust.method = pval_adjust_method,
47
48
                           confint       = TRUE,
                           sort.by       = "P")
49
50
51
52
53
54
55

  # Another way to extract the differentially expressed genes (in parallel to topTable).
  # This is only used to then create the Venn diagrams.
  results <- limma::decideTests(fit,
                                coefficient   = paste0("coeff", k),
                                adjust.method = pval_adjust_method)

56
57
58
59
60
  # We log information.
  if (verbose == TRUE) {
    message(paste0("[", Sys.time(), "] Differentially expressed genes extracted."))
  }

61
62
  # We plot Venn diagrams that summarize the results and show to which
  # extend the analyses overlap.
63
  grDevices::png(venn_fn)
64
65
66
67
68
69
70
71
72
73
  limma::vennDiagram(results,
                     names        = limma_coeffs,
                     include      = c("up", "down"),
                     circle.col   = c("darkorange", "darkorange2", "darkorange4"),
                     counts.col   = c("blue", "green"),
                     lwd          = 3,
                     cex          = c(1.0, 0.1, 0.1),
                     show.include = TRUE)
  grDevices::dev.off()

74
75
76
77
78
79
  # We clean and log information.
  rm(venn_fn)
  if (verbose == TRUE) {
    message(paste0("[", Sys.time(), "] Venn diagram created."))
  }

80
81
82
83
84
85
86
87
88
89
90
  # We then plot the Mean vs Deviation of the comparisons and highlight the significant genes.
  grDevices::png(md_fn)
  limma::plotMD(fit,
                coef   = paste0("coeff", k),
                status = results[, k],
                values = c(1, -1),
                hl.col = c("red", "blue"),
                hl.pch = c(16, 16),
                hl.cex = c(0.9, 0.9))
  grDevices::dev.off()

91
92
93
94
95
96
  # We clean and log information.
  rm(md_fn, results)
  if (verbose == TRUE) {
    message(paste0("[", Sys.time(), "] MD plot created."))
  }

97
98
  # Save the top tables to TSV files.
  utils::write.table(table, file = table_fn, sep = "\t", quote = FALSE, col.names = NA)
99
100
101
102
103
104

  # We clean and log information.
  rm(table, table_fn)
  if (verbose == TRUE) {
    message(paste0("[", Sys.time(), "] Differentially expressed gene list saved."))
  }
105
}