Commit 7581f7f8 authored by Leon-Charles Tranchevent's avatar Leon-Charles Tranchevent
Browse files

Improved limma analysis (more parameters / co-factors).

parent 673b161e
...@@ -12,7 +12,10 @@ ...@@ -12,7 +12,10 @@
#' provided as formulas (similar to the limma::makeContrasts function). Additional parameters are #' provided as formulas (similar to the limma::makeContrasts function). Additional parameters are
#' ignored for the time being. #' ignored for the time being.
#' @param correct_for_age A boolean indicating whether we should correct for age in the limma #' @param correct_for_age A boolean indicating whether we should correct for age in the limma
#' design matrix. This expects that a clinical factor termed Age exists. Default to TRUE. #' design matrix. This expects that a clinical factor termed Age exists. Default to FALSE.
#' @param correct_for_batch A boolean indicating whether we should correct for a poential batch
#' effect in the limma design matrix. This expects that the actual batch information exists in
#' the object pheno_data (as field Batch). Default to FALSE.
#' @param cofactor_name A string indicating a potential cofactor (included in the phenotypic data). #' @param cofactor_name A string indicating a potential cofactor (included in the phenotypic data).
#' The default is NULL, and no cofactor is considered. This will replace the age correction if #' The default is NULL, and no cofactor is considered. This will replace the age correction if
#' both were deemed necessary. #' both were deemed necessary.
...@@ -24,81 +27,148 @@ ...@@ -24,81 +27,148 @@
#' is FALSE by default. #' is FALSE by default.
#' @return The limma fit. #' @return The limma fit.
run_limma <- function(pheno_data, eset, limma_parameters, run_limma <- function(pheno_data, eset, limma_parameters,
correct_for_age = TRUE, correct_for_age = FALSE,
cofactor_name = NULL, correct_for_batch = FALSE,
block_key = NULL, cofactor_name = NULL,
verbose = FALSE) { block_key = NULL,
verbose = FALSE) {
# We extract the limma analysis parameters # We extract the limma analysis parameters
clinical_factor <- limma_parameters[[1]] clinical_factor <- limma_parameters[[1]]
coefficients <- limma_parameters[[2]] coefficients <- limma_parameters[[2]]
has_block <- !is.null(cofactor_name) has_block <- !is.null(cofactor_name)
clinical <- factor(pheno_data[[clinical_factor]])
# We create the design matrix (regular case - no block).
clinical <- factor(pheno_data[[clinical_factor]])
design <- stats::model.matrix(~ 0 + clinical)
colnames(design) <- levels(clinical)
rm(clinical_factor) rm(clinical_factor)
# We log information. # I- DESIGN
if (verbose == TRUE) { # We create the design matrix (regular case - no block).
message(paste0("[", Sys.time(), "] Design matrix created.")) # Depending on the co-factors, we have several possibilities.
} # We sill decide based on (i) a block based on a co-factor (paired samples),
# (ii) the age clincal descriptor and (iii) the experimental batches.
if (correct_for_age) { # If we need to add a block based on a co-factor.
design <- stats::model.matrix(~ 0 + as.numeric(pheno_data[["Age"]]) + clinical) design <- NULL
colnames(design) <- c("Age", levels(clinical)) if (has_block == FALSE) {
if (correct_for_age == FALSE) {
# We log information. if (correct_for_batch == FALSE) {
if (verbose == TRUE) { # We do not have a block, we do not include age, nor batch.
message(paste0("[", Sys.time(), "] Design matrix adapted (now with age).")) design <- stats::model.matrix(~ 0 + clinical)
colnames(design) <- levels(clinical)
# We log information.
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created."))
}
} else { # correct_for_batch == TRUE
# We do not have a block, we do not include age but we do include batch.
batches <- factor(pheno_data[["Batch"]])
design <- stats::model.matrix(~ 0 + clinical + batches)
colnames(design) <- c(levels(clinical), levels(batches)[2:length(levels(batches))])
# We log information.
rm(batches)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (batches)."))
}
}
} else { # correct_for_age == TRUE
if (correct_for_batch == FALSE) {
# We do not have a block, we do include age, but not batch.
ages <- as.numeric(pheno_data[["Age"]])
design <- stats::model.matrix(~ 0 + clinical + ages)
colnames(design) <- c(levels(clinical), "Age")
# We log information.
rm(ages)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (ages)."))
}
} else { # correct_for_batch == TRUE
# We do not have a block, we do include age and batch.
ages <- as.numeric(pheno_data[["Age"]])
batches <- factor(pheno_data[["Batch"]])
design <- stats::model.matrix(~ 0 + clinical + ages + batches)
colnames(design) <- c(levels(clinical), "Age", levels(batches)[2:length(levels(batches))])
# We log information.
rm(ages, batches)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (ages, batches)."))
}
}
} }
} } else { # has_block == TRUE
if (correct_for_age == FALSE) {
# If necessary we add a block based on a co-factor. if (correct_for_batch == FALSE) {
# For instance, if we have paired samples. # We do have a block, we do not include age, nor batch.
# This is for now independant of the Age correction. We might need to have both. clinical_cofactor <- factor(pheno_data[[cofactor_name]]) # nolint
if (has_block) { design <- stats::model.matrix(~ 0 + clinical + clinical_cofactor)
clinical_cofactor <- factor(pheno_data[[cofactor_name]]) # nolint colnames(design) <- c(levels(clinical), cofactor_name)
design <- stats::model.matrix(~ 0 + clinical + clinical_cofactor)
colnames(design) <- c(levels(clinical), cofactor_name) # We log information.
rm(clinical_cofactor)
# We clean and log information. if (verbose == TRUE) {
rm(clinical_cofactor) message(paste0("[", Sys.time(), "] Design matrix created (pairs)."))
if (verbose == TRUE) { }
message(paste0("[", Sys.time(), "] Design matrix adapted (now with block).")) } else { # correct_for_batch == TRUE
# We do have a block, we do not include age but we do include batch.
clinical_cofactor <- factor(pheno_data[[cofactor_name]]) # nolint
batches <- factor(pheno_data[["Batch"]])
design <- stats::model.matrix(~ 0 + clinical + batches + clinical_cofactor)
colnames(design) <- c(levels(clinical),
levels(batches)[2:length(levels(batches))],
cofactor_name)
# We log information.
rm(clinical_cofactor, batches)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (pairs, batches)."))
}
}
} else { # correct_for_age == TRUE
if (correct_for_batch == FALSE) {
# We do have a block, we do include age, but not batch.
clinical_cofactor <- factor(pheno_data[[cofactor_name]]) # nolint
ages <- as.numeric(pheno_data[["Age"]])
design <- stats::model.matrix(~ 0 + clinical + ages + clinical_cofactor)
colnames(design) <- c(levels(clinical), "Age", cofactor_name)
# We log information.
rm(clinical_cofactor, ages)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (pairs, ages)."))
}
} else { # correct_for_batch == TRUE
# We do have a block, we do include age and batch.
clinical_cofactor <- factor(pheno_data[[cofactor_name]]) # nolint
ages <- as.numeric(pheno_data[["Age"]])
batches <- factor(pheno_data[["Batch"]])
design <- stats::model.matrix(~ 0 + clinical + ages + batches + clinical_cofactor)
colnames(design) <- c(levels(clinical),
"Age",
levels(batches)[2:length(levels(batches))],
cofactor_name)
# We log information.
rm(clinical_cofactor, ages, batches)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (pairs, ages, batches)."))
}
}
} }
} }
rm(clinical) rm(clinical)
# We build the contrast (default only one). # We build the contrast (only one so far).
contrast <- eval(substitute(limma::makeContrasts(coeff1 = c1, levels = design), contrast <- eval(substitute(limma::makeContrasts(coeff1 = c1, levels = design),
list(c1 = coefficients[1]))) list(c1 = coefficients)))
# We log information. # We log information.
rm(coefficients)
if (verbose == TRUE) { if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Contrast matrix created.")) message(paste0("[", Sys.time(), "] Contrast matrix created."))
} }
# For larger lists of coefficients, we add them all. # We fit the data using the above models and the contrasts
if (limma_parameters$factorial) {
contrast <- eval(substitute(limma::makeContrasts(coeff1 = c1,
coeff2 = c2,
coeff3 = c3,
levels = design),
list(c1 = coefficients[1],
c2 = coefficients[2],
c3 = coefficients[3])))
# We log information.
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Contrast matrix adapted (multiple coefficients)."))
}
}
rm(coefficients())
# We fit the data using the above models and the contrats
# to prepare the differential analyses. # to prepare the differential analyses.
lmfit <- NULL lmfit <- NULL
...@@ -109,6 +179,7 @@ run_limma <- function(pheno_data, eset, limma_parameters, ...@@ -109,6 +179,7 @@ run_limma <- function(pheno_data, eset, limma_parameters,
correlation = corfit$consensus) correlation = corfit$consensus)
# We log information. # We log information.
rm(corfit)
if (verbose == TRUE) { if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Linear fit performed (with blocks).")) message(paste0("[", Sys.time(), "] Linear fit performed (with blocks)."))
} }
...@@ -121,7 +192,7 @@ run_limma <- function(pheno_data, eset, limma_parameters, ...@@ -121,7 +192,7 @@ run_limma <- function(pheno_data, eset, limma_parameters,
} }
} }
fit <- limma::contrasts.fit(lmfit, contrast) fit <- limma::contrasts.fit(lmfit, contrast)
rm(has_block, design, contrast) rm(has_block, design, contrast, lmfit)
fit <- limma::eBayes(fit) fit <- limma::eBayes(fit)
# We return the fit. # We return the fit.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment