Commit ebc03074 authored by Leon-Charles Tranchevent's avatar Leon-Charles Tranchevent
Browse files

Added support for RNA-seq data analysis (differential expression / limma).

parent f107b6ac
......@@ -32,4 +32,4 @@ Imports:
methods,
illuminaHumanv3.db,
topconfects
RoxygenNote: 6.1.1
RoxygenNote: 7.0.2
......@@ -7,3 +7,4 @@ importMethodsFrom("AnnotationDbi", "mget")
importFrom("stats", "model.matrix")
importFrom("utils", "read.delim")
importFrom("stats", "var")
importFrom("stats", "filter")
#' @title Creates a design matrix for a limma analysis.
#'
#' @description This function creates a design matrix for a limma analysis (without actually
#' running the analysis. The function parameters define which factors are included and how.
#'
#' @param pheno_data The data-frame that contains the phenotypic data (aka clinical data).
#' @param limma_parameters A list of useful Limma parameters that include (1) the main clinical
#' factor used to create sample categories (i.e., Disease status) and (2) the Limma coefficients
#' provided as formulas (similar to the limma::makeContrasts function). Additional parameters are
#' ignored for the time being.
#' @param correct_for_age A boolean indicating whether we should correct for age in the limma
#' design matrix. This expects that a clinical factor termed Age exists. Default to FALSE.
#' @param correct_for_batch A boolean indicating whether we should correct for a poential batch
#' effect in the limma design matrix. This expects that the actual batch information exists in
#' the object pheno_data (as field Batch). Default to FALSE.
#' @param cofactor_name A string indicating a potential cofactor (included in the phenotypic data).
#' The default is NULL, and no cofactor is considered. This will replace the age correction if
#' both were deemed necessary.
#' @param verbose A boolean representing whether the function should display log information. This
#' is FALSE by default.
#' @return The limma fit.
create_design <- function(pheno_data, limma_parameters,
correct_for_age = FALSE,
correct_for_batch = FALSE,
cofactor_name = NULL,
verbose = FALSE) {
# We extract the limma analysis parameters
clinical_factor <- limma_parameters[[1]]
has_block <- !is.null(cofactor_name)
clinical <- factor(pheno_data[[clinical_factor]])
rm(clinical_factor)
# We create the design matrix (regular case - no block).
# Depending on the co-factors, we have several possibilities.
# We sill decide based on (i) a block based on a co-factor (paired samples),
# (ii) the age clincal descriptor and (iii) the experimental batches.
# If we need to add a block based on a co-factor.
design <- NULL
if (has_block == FALSE) {
if (correct_for_age == FALSE) {
if (correct_for_batch == FALSE) {
# We do not have a block, we do not include age, nor batch.
design <- stats::model.matrix(~ 0 + clinical)
colnames(design) <- levels(clinical)
# We log information.
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created."))
}
} else {
# Here we have correct_for_batch = TRUE
# We do not have a block, we do not include age but we do include batch.
batches <- factor(pheno_data[["Batch"]])
design <- stats::model.matrix(~ 0 + clinical + batches)
colnames(design) <- c(levels(clinical), levels(batches)[2:length(levels(batches))])
# We log information.
rm(batches)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (batches)."))
}
}
} else {
# Here we have correct_for_age = TRUE
if (correct_for_batch == FALSE) {
# We do not have a block, we do include age, but not batch.
ages <- as.numeric(pheno_data[["Age"]])
design <- stats::model.matrix(~ 0 + clinical + ages)
colnames(design) <- c(levels(clinical), "Age")
# We log information.
rm(ages)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (ages)."))
}
} else {
# Here we have correct_for_batch = TRUE
# We do not have a block, we do include age and batch.
ages <- as.numeric(pheno_data[["Age"]])
batches <- factor(pheno_data[["Batch"]])
design <- stats::model.matrix(~ 0 + clinical + ages + batches)
colnames(design) <- c(levels(clinical), "Age", levels(batches)[2:length(levels(batches))])
# We log information.
rm(ages, batches)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (ages, batches)."))
}
}
}
} else {
# Here we have has_block = TRUE
if (correct_for_age == FALSE) {
if (correct_for_batch == FALSE) {
# We do have a block, we do not include age, nor batch.
clinical_cofactor <- factor(pheno_data[[cofactor_name]]) # nolint
design <- stats::model.matrix(~ 0 + clinical + clinical_cofactor)
colnames(design) <- c(levels(clinical), cofactor_name)
# We log information.
rm(clinical_cofactor)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (pairs)."))
}
} else {
# Here we have correct_for_batch = TRUE
# We do have a block, we do not include age but we do include batch.
clinical_cofactor <- factor(pheno_data[[cofactor_name]]) # nolint
batches <- factor(pheno_data[["Batch"]])
design <- stats::model.matrix(~ 0 + clinical + batches + clinical_cofactor)
colnames(design) <- c(levels(clinical),
levels(batches)[2:length(levels(batches))],
cofactor_name)
# We log information.
rm(clinical_cofactor, batches)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (pairs, batches)."))
}
}
} else {
# Here we have correct_for_age = TRUE
if (correct_for_batch == FALSE) {
# We do have a block, we do include age, but not batch.
clinical_cofactor <- factor(pheno_data[[cofactor_name]]) # nolint
ages <- as.numeric(pheno_data[["Age"]])
design <- stats::model.matrix(~ 0 + clinical + ages + clinical_cofactor)
colnames(design) <- c(levels(clinical), "Age", cofactor_name)
# We log information.
rm(clinical_cofactor, ages)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (pairs, ages)."))
}
} else {
# Here we have correct_for_batch = TRUE
# We do have a block, we do include age and batch.
clinical_cofactor <- factor(pheno_data[[cofactor_name]]) # nolint
ages <- as.numeric(pheno_data[["Age"]])
batches <- factor(pheno_data[["Batch"]])
design <- stats::model.matrix(~ 0 + clinical + ages + batches +
clinical_cofactor)
colnames(design) <- c(levels(clinical),
"Age",
levels(batches)[2:length(levels(batches))],
cofactor_name)
# We log information.
rm(clinical_cofactor, ages, batches)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (pairs, ages, batches)."))
}
}
}
}
rm(clinical)
# We return the design.
return(design)
}
\ No newline at end of file
......@@ -44,7 +44,8 @@ extract_degs <- function(fit, limma_coeffs, k, output_data_dir,
coef = paste0("coeff", k),
n = Inf,
adjust.method = pval_adjust_method,
confint = TRUE)
confint = TRUE,
sort.by = "P")
# Another way to extract the differentially expressed genes (in parallel to topTable).
# This is only used to then create the Venn diagrams.
......
......@@ -6,7 +6,8 @@
#' be used to detect differentially expressed genes.
#'
#' @param pheno_data The data-frame that contains the phenotypic data (aka clinical data).
#' @param eset The ExpressionSet object that contains the pre-processed data.
#' @param eset The ExpressionSet object that contains the pre-processed data. Alternatively,
#' this can be an EList object (limma package), especially for RNA-seq data.
#' @param limma_parameters A list of useful Limma parameters that include (1) the main clinical
#' factor used to create sample categories (i.e., Disease status) and (2) the Limma coefficients
#' provided as formulas (similar to the limma::makeContrasts function). Additional parameters are
......@@ -65,125 +66,12 @@ run_limma <- function(pheno_data, eset, limma_parameters,
# We sill decide based on (i) a block based on a co-factor (paired samples),
# (ii) the age clincal descriptor and (iii) the experimental batches.
# If we need to add a block based on a co-factor.
design <- NULL
if (has_block == FALSE) {
if (correct_for_age == FALSE) {
if (correct_for_batch == FALSE) {
# We do not have a block, we do not include age, nor batch.
design <- stats::model.matrix(~ 0 + clinical)
colnames(design) <- levels(clinical)
# We log information.
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created."))
}
} else {
# Here we have correct_for_batch = TRUE
# We do not have a block, we do not include age but we do include batch.
batches <- factor(pheno_data[["Batch"]])
design <- stats::model.matrix(~ 0 + clinical + batches)
colnames(design) <- c(levels(clinical), levels(batches)[2:length(levels(batches))])
# We log information.
rm(batches)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (batches)."))
}
}
} else {
# Here we have correct_for_age = TRUE
if (correct_for_batch == FALSE) {
# We do not have a block, we do include age, but not batch.
ages <- as.numeric(pheno_data[["Age"]])
design <- stats::model.matrix(~ 0 + clinical + ages)
colnames(design) <- c(levels(clinical), "Age")
# We log information.
rm(ages)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (ages)."))
}
} else {
# Here we have correct_for_batch = TRUE
# We do not have a block, we do include age and batch.
ages <- as.numeric(pheno_data[["Age"]])
batches <- factor(pheno_data[["Batch"]])
design <- stats::model.matrix(~ 0 + clinical + ages + batches)
colnames(design) <- c(levels(clinical), "Age", levels(batches)[2:length(levels(batches))])
# We log information.
rm(ages, batches)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (ages, batches)."))
}
}
}
} else {
# Here we have has_block = TRUE
if (correct_for_age == FALSE) {
if (correct_for_batch == FALSE) {
# We do have a block, we do not include age, nor batch.
clinical_cofactor <- factor(pheno_data[[cofactor_name]]) # nolint
design <- stats::model.matrix(~ 0 + clinical + clinical_cofactor)
colnames(design) <- c(levels(clinical), cofactor_name)
# We log information.
rm(clinical_cofactor)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (pairs)."))
}
} else {
# Here we have correct_for_batch = TRUE
# We do have a block, we do not include age but we do include batch.
clinical_cofactor <- factor(pheno_data[[cofactor_name]]) # nolint
batches <- factor(pheno_data[["Batch"]])
design <- stats::model.matrix(~ 0 + clinical + batches + clinical_cofactor)
colnames(design) <- c(levels(clinical),
levels(batches)[2:length(levels(batches))],
cofactor_name)
# We log information.
rm(clinical_cofactor, batches)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (pairs, batches)."))
}
}
} else {
# Here we have correct_for_age = TRUE
if (correct_for_batch == FALSE) {
# We do have a block, we do include age, but not batch.
clinical_cofactor <- factor(pheno_data[[cofactor_name]]) # nolint
ages <- as.numeric(pheno_data[["Age"]])
design <- stats::model.matrix(~ 0 + clinical + ages + clinical_cofactor)
colnames(design) <- c(levels(clinical), "Age", cofactor_name)
# We log information.
rm(clinical_cofactor, ages)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (pairs, ages)."))
}
} else {
# Here we have correct_for_batch = TRUE
# We do have a block, we do include age and batch.
clinical_cofactor <- factor(pheno_data[[cofactor_name]]) # nolint
ages <- as.numeric(pheno_data[["Age"]])
batches <- factor(pheno_data[["Batch"]])
design <- stats::model.matrix(~ 0 + clinical + ages + batches +
clinical_cofactor)
colnames(design) <- c(levels(clinical),
"Age",
levels(batches)[2:length(levels(batches))],
cofactor_name)
# We log information.
rm(clinical_cofactor, ages, batches)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (pairs, ages, batches)."))
}
}
}
}
rm(clinical)
design <- create_design(pheno_data = pheno_data,
limma_parameters = limma_parameters,
correct_for_age = correct_for_age,
correct_for_batch = correct_for_batch,
cofactor_name = cofactor_name,
verbose = verbose)
# We build the contrast (only one so far).
contrast <- eval(substitute(limma::makeContrasts(coeff1 = c1, levels = design),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment