run_limma.R 3.41 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#' @title Run a limma analysis.
#'
#' @description This function prepares and runs a limma analysis up until the fitting procedure.
#' The Limma analysis can be simple (A vs B) or more complex ((A vs B) vs (C vs D)). One single
#' cofactor can also be taken into account in the limma. It returns the final fit that can then
#' be used to detect differentially expressed genes.
#'
#' @param pheno_data The data-frame that contains the phenotypic data (aka clinical data).
#' @param eset The ExpressionSet object that contains the pre-processed data.
#' @param limma_parameters A list of useful Limma parameters that include (1) the main clinical
#' factor used to create sample categories (i.e., Disease status) and (2) the Limma coefficients
#' provided as formulas (similar to the limma::makeContrasts function). Additional parameters are
#' ignored for the time being.
#' @param cofactor_name A string indicating a potential cofactor (included in the phenotypic data).
#' The default is NULL, and no cofactor is considered.
#' @param block_key A string indicating which column in the phenotyypic data should be used to build
#' blocks if there is a cofactor to take into account. For instance, if there are several replicates
#' per patient, the block_key might be "Patient" and the cofactor might be "Replicate".
#' @return The limma fit.
run_limma <- function(pheno_data, eset, limma_parameters,
                      cofactor_name = NULL,
                      block_key     = NULL) {

  # We extract the limma analysis parameters
  clinical_factor    <- limma_parameters[[1]]
  coefficients       <- limma_parameters[[2]]
  has_block          <- !is.null(cofactor_name)

  # We create the design matrix (regular case - no block).
  clinical         <- factor(pheno_data[[clinical_factor]])
  design           <- stats::model.matrix(~ 0 + clinical)
  colnames(design) <- levels(clinical)
33

34
35
36
37
38
  # If necessary we add a block based on a co-factor.
  # For instance, if we have paired samples.
  if (has_block) {
    clinical_cofactor <- factor(pheno_data[[cofactor_name]])
    design            <- stats::model.matrix(~ 0 + clinical + clinical_cofactor)
39
    colnames(design)  <- c(levels(clinical), cofactor_name)
40
41
42
43
44
45
  }

  # We build the contrast (default only one).
  contrast <- eval(substitute(limma::makeContrasts(coeff1 = c1, levels = design),
                              list(c1 = coefficients[1])))
  # For larger lists of coefficients, we add them all.
46
  if (limma_parameters$factorial) {
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
    contrast <- eval(substitute(limma::makeContrasts(coeff1 = c1,
                                                     coeff2 = c2,
                                                     coeff3 = c3,
                                                     levels = design),
                                list(c1 = coefficients[1],
                                     c2 = coefficients[2],
                                     c3 = coefficients[3])))
  }

  # We fit the data using the above models and the contrats
  # to prepare the differential analyses.
  # Default to no block.
  lmfit <- limma::lmFit(eset, design)
  # We create a block if necessary.
  if (has_block) {
62
63
    corfit <- limma::duplicateCorrelation(eset, design, block = pheno_data[[block_key]])
    lmfit  <- limma::lmFit(eset, design, block = pheno_data[[block_key]],
64
65
66
67
68
69
                                 correlation = corfit$consensus)
  }
  fit <- limma::contrasts.fit(lmfit, contrast)
  fit <- limma::eBayes(fit)
  return(fit)
}