Commit cd16e491 authored by Leon-Charles Tranchevent's avatar Leon-Charles Tranchevent
Browse files

New parameter for limma analysis: possibility to include gender as a co-factor.

parent ebc03074
...@@ -32,4 +32,4 @@ Imports: ...@@ -32,4 +32,4 @@ Imports:
methods, methods,
illuminaHumanv3.db, illuminaHumanv3.db,
topconfects topconfects
RoxygenNote: 7.0.2 RoxygenNote: 7.1.0
...@@ -13,6 +13,9 @@ ...@@ -13,6 +13,9 @@
#' @param correct_for_batch A boolean indicating whether we should correct for a poential batch #' @param correct_for_batch A boolean indicating whether we should correct for a poential batch
#' effect in the limma design matrix. This expects that the actual batch information exists in #' effect in the limma design matrix. This expects that the actual batch information exists in
#' the object pheno_data (as field Batch). Default to FALSE. #' the object pheno_data (as field Batch). Default to FALSE.
#' @param correct_for_gender A boolean indicating whether we should correct for sample gender
#' in the limma design matrix. This expects that a clinical factor termed Gender exists. Default
#' to FALSE.
#' @param cofactor_name A string indicating a potential cofactor (included in the phenotypic data). #' @param cofactor_name A string indicating a potential cofactor (included in the phenotypic data).
#' The default is NULL, and no cofactor is considered. This will replace the age correction if #' The default is NULL, and no cofactor is considered. This will replace the age correction if
#' both were deemed necessary. #' both were deemed necessary.
...@@ -20,17 +23,18 @@ ...@@ -20,17 +23,18 @@
#' is FALSE by default. #' is FALSE by default.
#' @return The limma fit. #' @return The limma fit.
create_design <- function(pheno_data, limma_parameters, create_design <- function(pheno_data, limma_parameters,
correct_for_age = FALSE, correct_for_age = FALSE,
correct_for_batch = FALSE, correct_for_batch = FALSE,
cofactor_name = NULL, correct_for_gender = FALSE,
verbose = FALSE) { cofactor_name = NULL,
verbose = FALSE) {
# We extract the limma analysis parameters # We extract the limma analysis parameters
clinical_factor <- limma_parameters[[1]] clinical_factor <- limma_parameters[[1]]
has_block <- !is.null(cofactor_name) has_block <- !is.null(cofactor_name)
clinical <- factor(pheno_data[[clinical_factor]]) clinical <- factor(pheno_data[[clinical_factor]])
rm(clinical_factor) rm(clinical_factor)
# We create the design matrix (regular case - no block). # We create the design matrix (regular case - no block).
# Depending on the co-factors, we have several possibilities. # Depending on the co-factors, we have several possibilities.
# We sill decide based on (i) a block based on a co-factor (paired samples), # We sill decide based on (i) a block based on a co-factor (paired samples),
...@@ -40,122 +44,273 @@ create_design <- function(pheno_data, limma_parameters, ...@@ -40,122 +44,273 @@ create_design <- function(pheno_data, limma_parameters,
if (has_block == FALSE) { if (has_block == FALSE) {
if (correct_for_age == FALSE) { if (correct_for_age == FALSE) {
if (correct_for_batch == FALSE) { if (correct_for_batch == FALSE) {
# We do not have a block, we do not include age, nor batch. if (correct_for_gender == FALSE) {
design <- stats::model.matrix(~ 0 + clinical)
colnames(design) <- levels(clinical) # We do not have a block, we do not include age, nor batch, nor gender.
design <- stats::model.matrix(~ 0 + clinical)
# We log information. colnames(design) <- levels(clinical)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created.")) # We log information.
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created."))
}
} else {
# We do not have a block, we do not include age, nor batch, but we include gender.
genders <- factor(pheno_data[["Gender"]])
design <- stats::model.matrix(~ 0 + clinical + genders)
colnames(design) <- c(levels(clinical), levels(genders)[2:length(levels(genders))])
# We log information.
rm(genders)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (genders)."))
}
} }
} else { } else {
# Here we have correct_for_batch = TRUE # Here we have correct_for_batch = TRUE
# We do not have a block, we do not include age but we do include batch. if (correct_for_gender == FALSE) {
batches <- factor(pheno_data[["Batch"]])
design <- stats::model.matrix(~ 0 + clinical + batches) # We do not have a block, we do not include age, nor gender but we do include batch.
colnames(design) <- c(levels(clinical), levels(batches)[2:length(levels(batches))]) batches <- factor(pheno_data[["Batch"]])
design <- stats::model.matrix(~ 0 + clinical + batches)
# We log information. colnames(design) <- c(levels(clinical), levels(batches)[2:length(levels(batches))])
rm(batches)
if (verbose == TRUE) { # We log information.
message(paste0("[", Sys.time(), "] Design matrix created (batches).")) rm(batches)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (batches)."))
}
} else {
# We do not have a block, we do not include age but we do include batch and gender.
batches <- factor(pheno_data[["Batch"]])
genders <- factor(pheno_data[["Gender"]])
design <- stats::model.matrix(~ 0 + clinical + batches + genders)
colnames(design) <- c(levels(clinical), levels(batches)[2:length(levels(batches))],
levels(genders)[2:length(levels(genders))])
# We log information.
rm(batches, genders)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (batches, genders)."))
}
} }
} }
} else { } else {
# Here we have correct_for_age = TRUE # Here we have correct_for_age = TRUE
if (correct_for_batch == FALSE) { if (correct_for_batch == FALSE) {
# We do not have a block, we do include age, but not batch. if (correct_for_gender == FALSE) {
ages <- as.numeric(pheno_data[["Age"]])
design <- stats::model.matrix(~ 0 + clinical + ages) # We do not have a block, we do include age, but not batch, nor gender.
colnames(design) <- c(levels(clinical), "Age") ages <- as.numeric(pheno_data[["Age"]])
design <- stats::model.matrix(~ 0 + clinical + ages)
# We log information. colnames(design) <- c(levels(clinical), "Age")
rm(ages)
if (verbose == TRUE) { # We log information.
message(paste0("[", Sys.time(), "] Design matrix created (ages).")) rm(ages)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (ages)."))
}
} else {
# We do not have a block, we do include age and gender but not batch.
ages <- as.numeric(pheno_data[["Age"]])
genders <- factor(pheno_data[["Gender"]])
design <- stats::model.matrix(~ 0 + clinical + ages + genders)
colnames(design) <- c(levels(clinical), "Age", levels(genders)[2:length(levels(genders))])
# We log information.
rm(ages, genders)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (ages, genders)."))
}
} }
} else { } else {
# Here we have correct_for_batch = TRUE # Here we have correct_for_batch = TRUE
# We do not have a block, we do include age and batch. if (correct_for_gender == FALSE) {
ages <- as.numeric(pheno_data[["Age"]])
batches <- factor(pheno_data[["Batch"]]) # We do not have a block, we do include age and batch but not gender.
design <- stats::model.matrix(~ 0 + clinical + ages + batches) ages <- as.numeric(pheno_data[["Age"]])
colnames(design) <- c(levels(clinical), "Age", levels(batches)[2:length(levels(batches))]) batches <- factor(pheno_data[["Batch"]])
design <- stats::model.matrix(~ 0 + clinical + ages + batches)
# We log information. colnames(design) <- c(levels(clinical), "Age", levels(batches)[2:length(levels(batches))])
rm(ages, batches)
if (verbose == TRUE) { # We log information.
message(paste0("[", Sys.time(), "] Design matrix created (ages, batches).")) rm(ages, batches)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (ages, batches)."))
}
} else {
# We do not have a block, we do include age, batch and gender.
ages <- as.numeric(pheno_data[["Age"]])
batches <- factor(pheno_data[["Batch"]])
genders <- factor(pheno_data[["Gender"]])
design <- stats::model.matrix(~ 0 + clinical + ages + batches + genders)
colnames(design) <- c(levels(clinical), "Age", levels(batches)[2:length(levels(batches))],
levels(genders)[2:length(levels(genders))])
# We log information.
rm(ages, batches, genders)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (ages, batches, genders)."))
}
} }
} }
} }
} else { } else {
# Here we have has_block = TRUE # Here we have has_block = TRUE
if (correct_for_age == FALSE) { if (correct_for_age == FALSE) {
if (correct_for_batch == FALSE) { if (correct_for_batch == FALSE) {
# We do have a block, we do not include age, nor batch. if (correct_for_gender == FALSE) {
clinical_cofactor <- factor(pheno_data[[cofactor_name]]) # nolint
design <- stats::model.matrix(~ 0 + clinical + clinical_cofactor) # We do have a block, we do not include age, nor batch, nor gender.
colnames(design) <- c(levels(clinical), cofactor_name) clinical_cofactor <- factor(pheno_data[[cofactor_name]]) # nolint
design <- stats::model.matrix(~ 0 + clinical + clinical_cofactor)
# We log information. colnames(design) <- c(levels(clinical), cofactor_name)
rm(clinical_cofactor)
if (verbose == TRUE) { # We log information.
message(paste0("[", Sys.time(), "] Design matrix created (pairs).")) rm(clinical_cofactor)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (pairs)."))
}
} else {
# We do have a block, we do not include age, nor batch, but we include gender.
clinical_cofactor <- factor(pheno_data[[cofactor_name]]) # nolint
genders <- factor(pheno_data[["Gender"]])
design <- stats::model.matrix(~ 0 + clinical + genders + clinical_cofactor)
colnames(design) <- c(levels(clinical), levels(genders)[2:length(levels(genders))],
cofactor_name)
# We log information.
rm(clinical_cofactor, genders)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (pairs, genders)."))
}
} }
} else { } else {
# Here we have correct_for_batch = TRUE # Here we have correct_for_batch = TRUE
# We do have a block, we do not include age but we do include batch. if (correct_for_gender == FALSE) {
clinical_cofactor <- factor(pheno_data[[cofactor_name]]) # nolint
batches <- factor(pheno_data[["Batch"]]) # We do have a block, we do not include age, nor gender but we do include batch.
design <- stats::model.matrix(~ 0 + clinical + batches + clinical_cofactor) clinical_cofactor <- factor(pheno_data[[cofactor_name]]) # nolint
colnames(design) <- c(levels(clinical), batches <- factor(pheno_data[["Batch"]])
levels(batches)[2:length(levels(batches))], design <- stats::model.matrix(~ 0 + clinical + batches + clinical_cofactor)
cofactor_name) colnames(design) <- c(levels(clinical),
levels(batches)[2:length(levels(batches))],
# We log information. cofactor_name)
rm(clinical_cofactor, batches)
if (verbose == TRUE) { # We log information.
message(paste0("[", Sys.time(), "] Design matrix created (pairs, batches).")) rm(clinical_cofactor, batches)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (pairs, batches)."))
}
} else {
# We do have a block, we do not include age, nor gender but we do include batch.
clinical_cofactor <- factor(pheno_data[[cofactor_name]]) # nolint
batches <- factor(pheno_data[["Batch"]])
genders <- factor(pheno_data[["Gender"]])
design <- stats::model.matrix(~ 0 + clinical + batches + genders + clinical_cofactor)
colnames(design) <- c(levels(clinical),
levels(batches)[2:length(levels(batches))],
levels(genders)[2:length(levels(genders))],
cofactor_name)
# We log information.
rm(clinical_cofactor, batches, genders)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (pairs, batches, genders)."))
}
} }
} }
} else { } else {
# Here we have correct_for_age = TRUE # Here we have correct_for_age = TRUE
if (correct_for_batch == FALSE) { if (correct_for_batch == FALSE) {
# We do have a block, we do include age, but not batch. if (correct_for_gender == FALSE) {
clinical_cofactor <- factor(pheno_data[[cofactor_name]]) # nolint
ages <- as.numeric(pheno_data[["Age"]]) # We do have a block, we do include age, but not batch, nor gender.
design <- stats::model.matrix(~ 0 + clinical + ages + clinical_cofactor) clinical_cofactor <- factor(pheno_data[[cofactor_name]]) # nolint
colnames(design) <- c(levels(clinical), "Age", cofactor_name) ages <- as.numeric(pheno_data[["Age"]])
design <- stats::model.matrix(~ 0 + clinical + ages + clinical_cofactor)
# We log information. colnames(design) <- c(levels(clinical), "Age", cofactor_name)
rm(clinical_cofactor, ages)
if (verbose == TRUE) { # We log information.
message(paste0("[", Sys.time(), "] Design matrix created (pairs, ages).")) rm(clinical_cofactor, ages)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (pairs, ages)."))
}
} else {
# We do have a block, we do include age and gender but not batch.
clinical_cofactor <- factor(pheno_data[[cofactor_name]]) # nolint
ages <- as.numeric(pheno_data[["Age"]])
genders <- factor(pheno_data[["Gender"]])
design <- stats::model.matrix(~ 0 + clinical + ages + genders + clinical_cofactor)
colnames(design) <- c(levels(clinical), "Age",
levels(genders)[2:length(levels(genders))], cofactor_name)
# We log information.
rm(clinical_cofactor, ages, genders)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (pairs, ages, genders)."))
}
} }
} else { } else {
# Here we have correct_for_batch = TRUE # Here we have correct_for_batch = TRUE
# We do have a block, we do include age and batch. if (correct_for_gender == FALSE) {
clinical_cofactor <- factor(pheno_data[[cofactor_name]]) # nolint
ages <- as.numeric(pheno_data[["Age"]]) # We do have a block, we do include age and batch but not gender.
batches <- factor(pheno_data[["Batch"]]) clinical_cofactor <- factor(pheno_data[[cofactor_name]]) # nolint
design <- stats::model.matrix(~ 0 + clinical + ages + batches + ages <- as.numeric(pheno_data[["Age"]])
clinical_cofactor) batches <- factor(pheno_data[["Batch"]])
colnames(design) <- c(levels(clinical), design <- stats::model.matrix(~ 0 + clinical + ages + batches +
"Age", clinical_cofactor)
levels(batches)[2:length(levels(batches))], colnames(design) <- c(levels(clinical),
cofactor_name) "Age",
levels(batches)[2:length(levels(batches))],
# We log information. cofactor_name)
rm(clinical_cofactor, ages, batches)
if (verbose == TRUE) { # We log information.
message(paste0("[", Sys.time(), "] Design matrix created (pairs, ages, batches).")) rm(clinical_cofactor, ages, batches)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (pairs, ages, batches)."))
}
} else {
# We do have a block, we do include age, batch and gender.
clinical_cofactor <- factor(pheno_data[[cofactor_name]]) # nolint
ages <- as.numeric(pheno_data[["Age"]])
batches <- factor(pheno_data[["Batch"]])
genders <- factor(pheno_data[["Gender"]])
design <- stats::model.matrix(~ 0 + clinical + ages + batches + genders +
clinical_cofactor)
colnames(design) <- c(levels(clinical),
"Age",
levels(batches)[2:length(levels(batches))],
levels(genders)[2:length(levels(genders))],
cofactor_name)
# We log information.
rm(clinical_cofactor, ages, batches, genders)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Design matrix created (pairs, ages, batches, genders)."))
}
} }
} }
} }
} }
rm(clinical) rm(clinical)
# We return the design. # We return the design.
return(design) return(design)
} }
\ No newline at end of file
...@@ -14,9 +14,12 @@ ...@@ -14,9 +14,12 @@
#' ignored for the time being. #' ignored for the time being.
#' @param correct_for_age A boolean indicating whether we should correct for age in the limma #' @param correct_for_age A boolean indicating whether we should correct for age in the limma
#' design matrix. This expects that a clinical factor termed Age exists. Default to FALSE. #' design matrix. This expects that a clinical factor termed Age exists. Default to FALSE.
#' @param correct_for_batch A boolean indicating whether we should correct for a poential batch #' @param correct_for_batch A boolean indicating whether we should correct for a potential batch
#' effect in the limma design matrix. This expects that the actual batch information exists in #' effect in the limma design matrix. This expects that the actual batch information exists in
#' the object pheno_data (as field Batch). Default to FALSE. #' the object pheno_data (as field Batch). Default to FALSE.
#' @param correct_for_gender A boolean indicating whether we should correct for sample gender
#' in the limma design matrix. This expects that a clinical factor termed Gender exists. Default
#' to FALSE.
#' @param cofactor_name A string indicating a potential cofactor (included in the phenotypic data). #' @param cofactor_name A string indicating a potential cofactor (included in the phenotypic data).
#' The default is NULL, and no cofactor is considered. This will replace the age correction if #' The default is NULL, and no cofactor is considered. This will replace the age correction if
#' both were deemed necessary. #' both were deemed necessary.
...@@ -36,15 +39,16 @@ ...@@ -36,15 +39,16 @@
#' is FALSE by default. #' is FALSE by default.
#' @return The limma fit. #' @return The limma fit.
run_limma <- function(pheno_data, eset, limma_parameters, run_limma <- function(pheno_data, eset, limma_parameters,
correct_for_age = FALSE, correct_for_age = FALSE,
correct_for_batch = FALSE, correct_for_batch = FALSE,
cofactor_name = NULL, correct_for_gender = FALSE,
block_key = NULL, cofactor_name = NULL,
run_topconfects = FALSE, block_key = NULL,
output_data_dir = "", run_topconfects = FALSE,
file_prefix = "", output_data_dir = "",
file_suffix = "", file_prefix = "",
verbose = FALSE) { file_suffix = "",
verbose = FALSE) {
# We extract the limma analysis parameters # We extract the limma analysis parameters
clinical_factor <- limma_parameters[[1]] clinical_factor <- limma_parameters[[1]]
...@@ -66,12 +70,13 @@ run_limma <- function(pheno_data, eset, limma_parameters, ...@@ -66,12 +70,13 @@ run_limma <- function(pheno_data, eset, limma_parameters,
# We sill decide based on (i) a block based on a co-factor (paired samples), # We sill decide based on (i) a block based on a co-factor (paired samples),
# (ii) the age clincal descriptor and (iii) the experimental batches. # (ii) the age clincal descriptor and (iii) the experimental batches.
# If we need to add a block based on a co-factor. # If we need to add a block based on a co-factor.
design <- create_design(pheno_data = pheno_data, design <- create_design(pheno_data = pheno_data,
limma_parameters = limma_parameters, limma_parameters = limma_parameters,
correct_for_age = correct_for_age, correct_for_age = correct_for_age,
correct_for_batch = correct_for_batch, correct_for_batch = correct_for_batch,
cofactor_name = cofactor_name, correct_for_gender = correct_for_gender,
verbose = verbose) cofactor_name = cofactor_name,
verbose = verbose)
# We build the contrast (only one so far). # We build the contrast (only one so far).
contrast <- eval(substitute(limma::makeContrasts(coeff1 = c1, levels = design), contrast <- eval(substitute(limma::makeContrasts(coeff1 = c1, levels = design),
...@@ -95,7 +100,7 @@ run_limma <- function(pheno_data, eset, limma_parameters, ...@@ -95,7 +100,7 @@ run_limma <- function(pheno_data, eset, limma_parameters,
if (has_block) { if (has_block) {
corfit <- limma::duplicateCorrelation(eset, design, block = pheno_data[[block_key]]) corfit <- limma::duplicateCorrelation(eset, design, block = pheno_data[[block_key]])
lmfit <- limma::lmFit(eset, design, block = pheno_data[[block_key]], lmfit <- limma::lmFit(eset, design, block = pheno_data[[block_key]],
correlation = corfit$consensus) correlation = corfit$consensus)
if (run_topconfects) { if (run_topconfects) {
lmfit_topconfects <- limma::lmFit(eset, design_topconfects, lmfit_topconfects <- limma::lmFit(eset, design_topconfects,
block = pheno_data[[block_key]], block = pheno_data[[block_key]],
...@@ -124,6 +129,7 @@ run_limma <- function(pheno_data, eset, limma_parameters, ...@@ -124,6 +129,7 @@ run_limma <- function(pheno_data, eset, limma_parameters,
# We now finish the TopConfects analysis if necesary. # We now finish the TopConfects analysis if necesary.
if (run_topconfects) { if (run_topconfects) {
# We first run TopConfects on the dedicated lmfit object. # We first run TopConfects on the dedicated lmfit object.
confects <- topconfects::limma_confects(lmfit_topconfects, coef = 1, fdr = 0.05) confects <- topconfects::limma_confects(lmfit_topconfects, coef = 1, fdr = 0.05)
...@@ -170,6 +176,7 @@ run_limma <- function(pheno_data, eset, limma_parameters, ...@@ -170,6 +176,7 @@ run_limma <- function(pheno_data, eset, limma_parameters,
# a rank rank plot. # a rank rank plot.
limma_top <- limma::topTable(fit, coef = 1, p.value = 0.05, n = Inf) limma_top <- limma::topTable(fit, coef = 1, p.value = 0.05, n = Inf)
if (nrow(limma_top) > 0) { if (nrow(limma_top) > 0) {
# Log info. # Log info.
if (verbose == TRUE) { if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] [TopConfects] ", nrow(limma_top), message(paste0("[", Sys.time(), "] [TopConfects] ", nrow(limma_top),
...@@ -199,12 +206,14 @@ run_limma <- function(pheno_data, eset, limma_parameters, ...@@ -199,12 +206,14 @@ run_limma <- function(pheno_data, eset, limma_parameters,
grDevices::dev.off() grDevices::dev.off()
rm(p, mdplot_fn) rm(p, mdplot_fn)
} else { } else {
# Log info. # Log info.
if (verbose == TRUE) { if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] [TopConfects] no gene identified with Limma.")) message(paste0("[", Sys.time(), "] [TopConfects] no gene identified with Limma."))
} }
} # End of the plotting (if there are Limma genes). } # End of the plotting (if there are Limma genes).
} else { } else {
# Log info. # Log info.
if (verbose == TRUE) { if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] [TopConfects] no gene identified with TopConfects.")) message(paste0("[", Sys.time(), "] [TopConfects] no gene identified with TopConfects."))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment