Commit 30af472b authored by Leon-Charles Tranchevent's avatar Leon-Charles Tranchevent
Browse files

Bug correction in Limma/TopConfects comparison (limma was limited to top 10 genes).

parent 7581f7f8
Package: ArrayUtils
Type: Package
Title: Utils For Array Processing
Version: 0.3.1
Version: 0.3.2
Author: Leon-Charles Tranchevent
Maintainer: Leon-Charles Tranchevent <leon-charles.tranchevent@uni.lu>
Description: This package contains functions to analyse microarray data.
......@@ -30,5 +30,6 @@ Imports:
GEOquery,
beadarray,
methods,
illuminaHumanv3.db
illuminaHumanv3.db,
topconfects
RoxygenNote: 6.1.1
......@@ -12,32 +12,32 @@
#' @param k The index of the current coefficient (from limma_coeffs).
#' @param output_data_dir A string representing the folder in which the output files
#' are going to be stored.
#' @param file_suffix A string used as a suffix for the file names. Default to "".
#' @param file_prefix A string used to prefix the file names. Default to "".
#' @param file_suffix A string used as a suffix for the file names. Default to "".
#' @param pval_adjust_method A string code indicating the multiple testing correction
#' method to use. Default to BH.
#' @param verbose A boolean representing whether the function should display log information. This
#' is FALSE by default.
#' @return NULL
extract_DEGs <- function(fit, limma_coeffs, k, output_data_dir,
file_suffix = "",
file_prefix = "",
file_suffix = "",
pval_adjust_method = "BH",
verbose = FALSE) {
# We create the output file names.
if (file_suffix != "") {
file_suffix <- paste0(file_suffix, "_")
}
if (file_prefix != "") {
file_prefix <- paste0( "_", file_prefix)
file_prefix <- paste0(file_prefix, "_")
}
if (file_suffix != "") {
file_suffix <- paste0("_", file_suffix)
}
venn_fn <- paste0(output_data_dir, file_suffix, "venn_",
limma_coeffs[k], file_prefix, ".png")
md_fn <- paste0(output_data_dir, file_suffix, "MD_",
limma_coeffs[k], file_prefix, ".png")
table_fn <- paste0(output_data_dir, file_suffix, "toptable_",
limma_coeffs[k], file_prefix, ".tsv")
venn_fn <- paste0(output_data_dir, file_prefix, "venn_",
limma_coeffs[k], file_suffix, ".png")
md_fn <- paste0(output_data_dir, file_prefix, "MD_",
limma_coeffs[k], file_suffix, ".png")
table_fn <- paste0(output_data_dir, file_prefix, "toptable_",
limma_coeffs[k], file_suffix, ".tsv")
# We extract the DEGs using toptable.
table <- limma::topTable(fit,
......
......@@ -19,10 +19,18 @@
#' @param cofactor_name A string indicating a potential cofactor (included in the phenotypic data).
#' The default is NULL, and no cofactor is considered. This will replace the age correction if
#' both were deemed necessary.
#' @param block_key A string indicating which column in the phenotyypic data should be used to build
#' blocks if there is a cofactor to take into account. For instance, if there are several replicates
#' per patient, the block_key might be "Patient" and the cofactor might be "Replicate". This will
#' replace the age correction if both were deemed necessary.
#' @param block_key A string indicating which column in the phenotyypic data should be used to
#' build blocks if there is a cofactor to take into account. For instance, if there are several
#' replicates per patient, the block_key might be "Patient" and the cofactor might be "Replicate".
#' This will replace the age correction if both were deemed necessary.
#' @param run_topconfects A boolean indicating whether the analysis should be complemented with a
#' TopConfects analysis that bases the decision on confident fold changes and effect size. Default
#' to FALSE.
#' @param output_data_dir A string representing the folder in which the output files
#' are going to be stored (for TopConfects only). Default to "".
#' @param file_prefix A string used to prefix the file names (for TopConfects only). Default to "".
#' @param file_suffix A string used as a suffix for the file names (for TopConfects only). Default
#' to "".
#' @param verbose A boolean representing whether the function should display log information. This
#' is FALSE by default.
#' @return The limma fit.
......@@ -31,6 +39,10 @@ run_limma <- function(pheno_data, eset, limma_parameters,
correct_for_batch = FALSE,
cofactor_name = NULL,
block_key = NULL,
run_topconfects = FALSE,
output_data_dir = "",
file_prefix = "",
file_suffix = "",
verbose = FALSE) {
# We extract the limma analysis parameters
......@@ -40,7 +52,14 @@ run_limma <- function(pheno_data, eset, limma_parameters,
clinical <- factor(pheno_data[[clinical_factor]])
rm(clinical_factor)
# I- DESIGN
# We refine the prefix / suffic for the file names.
if (file_prefix != "") {
file_prefix <- paste0(file_prefix, "_")
}
if (file_suffix != "") {
file_suffix <- paste0("_", file_suffix)
}
# We create the design matrix (regular case - no block).
# Depending on the co-factors, we have several possibilities.
# We sill decide based on (i) a block based on a co-factor (paired samples),
......@@ -161,6 +180,9 @@ run_limma <- function(pheno_data, eset, limma_parameters,
# We build the contrast (only one so far).
contrast <- eval(substitute(limma::makeContrasts(coeff1 = c1, levels = design),
list(c1 = coefficients)))
if (run_topconfects) {
design_topconfects <-limma::contrastAsCoef(design, contrast)$design
}
# We log information.
rm(coefficients)
......@@ -170,13 +192,19 @@ run_limma <- function(pheno_data, eset, limma_parameters,
# We fit the data using the above models and the contrasts
# to prepare the differential analyses.
lmfit <- NULL
lmfit <- NULL
lmfit_topconfects <- NULL
# We create a block if necessary.
if (has_block) {
corfit <- limma::duplicateCorrelation(eset, design, block = pheno_data[[block_key]])
lmfit <- limma::lmFit(eset, design, block = pheno_data[[block_key]],
correlation = corfit$consensus)
if (run_topconfects) {
lmfit_topconfects <- limma::lmFit(eset, design_topconfects,
block = pheno_data[[block_key]],
correlation = corfit$consensus)
}
# We log information.
rm(corfit)
......@@ -185,6 +213,9 @@ run_limma <- function(pheno_data, eset, limma_parameters,
}
} else {
lmfit <- limma::lmFit(eset, design)
if (run_topconfects) {
lmfit_topconfects <- limma::lmFit(eset, design_topconfects)
}
# We log information.
if (verbose == TRUE) {
......@@ -195,6 +226,91 @@ run_limma <- function(pheno_data, eset, limma_parameters,
rm(has_block, design, contrast, lmfit)
fit <- limma::eBayes(fit)
# We now finish the TopConfects analysis if necesary.
if (run_topconfects) {
# We first run TopConfects on the dedicated lmfit object.
confects <- topconfects::limma_confects(lmfit_topconfects, coef = 1, fdr = 0.05)
# We need to check whether we have any confect at all (if no effect is > 0 at FDR = 0.05,
# then all confects are set to NA values).
confects_genes <- confects$table %>% filter(!is.na(confect))
if (nrow(confects_genes) > 0) {
# Log info.
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] [TopConfects] ", nrow(confects_genes),
" genes identified with TopConfects."))
}
# We save the list of genes
confects_table_fn <- paste0(output_data_dir, file_prefix, "topconfects_",
limma_parameters$name, file_suffix, ".tsv")
utils::write.table(confects_genes,
file = confects_table_fn,
sep = "\t",
quote = FALSE,
col.names = NA)
# First plot, the topconfects plot.
tcplot_fn <- paste0(output_data_dir, file_prefix, "topconfects_",
limma_parameters$name, file_suffix, ".png")
grDevices::png(tcplot_fn)
p <- topconfects::confects_plot(confects, n = 50)
print(p)
grDevices::dev.off()
rm(p, tcplot_fn)
# Second plot, the me plot (mean - effect).
meplot_fn <- paste0(output_data_dir, file_prefix, "topconfects_meplot_",
limma_parameters$name, file_suffix, ".png")
grDevices::png(meplot_fn)
p <- topconfects::confects_plot_me(confects)
print(p)
grDevices::dev.off()
rm(p, meplot_fn)
# Third plot, the comparison between topconfects and pure limma with
# a rank rank plot.
limma_top <- limma::topTable(fit, coef = 1, p.value = 0.05, n = Inf)
if (nrow(limma_top) > 0) {
# Log info.
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] [TopConfects] ", nrow(limma_top),
" genes identified with Limma."))
}
rrplot_fn <- paste0(output_data_dir, file_prefix, "topconfects_rrplot_limma_",
limma_parameters$name, file_suffix, ".png")
grDevices::png(rrplot_fn)
p <- topconfects::rank_rank_plot(confects_genes$name, rownames(limma_top), "TopConfects", "Limma", n = 50)
print(p)
grDevices::dev.off()
rm(p, rrplot_fn)
# Fourth plot, the MD plot with both TopConfect results and Limma results highlighted.
mdplot_fn <- paste0(output_data_dir, file_prefix, "topconfects_mdplot_limma_",
limma_parameters$name, file_suffix, ".png")
grDevices::png(mdplot_fn)
p <- limma::plotMD(fit, legend = "bottomleft", status = paste0(
ifelse(rownames(fit) %in% rownames(limma_top)[1:50], "Limma ", ""),
ifelse(rownames(fit) %in% confects_genes$name[1:50], "TopConfects ", "")))
print(p)
grDevices::dev.off()
rm(p, mdplot_fn)
} else {
# Log info.
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] [TopConfects] no gene identified with Limma."))
}
} # End of the plotting (if there are Limma genes).
} else {
# Log info.
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] [TopConfects] no gene identified with TopConfects."))
}
} # End of the plotting (if there are TopConfect genes).
} # End of the TopConfects section.
# We return the fit.
return(fit)
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment