Commit 673b161e authored by Leon-Charles Tranchevent's avatar Leon-Charles Tranchevent
Browse files

Some refactoring, better logs, improved GC / mem usage.

parent 24846b85
...@@ -14,38 +14,43 @@ ...@@ -14,38 +14,43 @@
#' @param batch_filename A string indicating where the batch information can be found, #' @param batch_filename A string indicating where the batch information can be found,
#' default to 'Batch.tsv'. #' default to 'Batch.tsv'.
#' @param verbose A boolean representing whether the function should display log information. This #' @param verbose A boolean representing whether the function should display log information. This
#' is TRUE by default. #' is FALSE by default.
#' @return The corrected expression data as an ESET object. #' @return The corrected expression data as an ESET object.
correct_batch_effect <- function(eset, correct_batch_effect <- function(eset,
input_data_dir, input_data_dir,
is_eset = TRUE, is_eset = TRUE,
batch_filename = "Batch.tsv", batch_filename = "Batch.tsv",
verbose = TRUE) { verbose = FALSE) {
# We read the clinical and batch data. # We read the clinical and batch data.
batch_data_file <- paste0(input_data_dir, batch_filename) batch_data_file <- paste0(input_data_dir, batch_filename)
batch_data <- utils::read.delim(file = batch_data_file, batch_data <- utils::read.delim(file = batch_data_file,
row.names = 1) row.names = 1)
pheno_data <- Biobase::pData(ArrayUtils::load_clinical_data(input_data_dir, pheno_data <- Biobase::pData(ArrayUtils::load_clinical_data(input_data_dir,
verbose = FALSE)) verbose = verbose))
remove(batch_data_file)
# If necessary, we convert the input matrix to an ExpressionSet. # If necessary, we convert the input matrix to an ExpressionSet.
if (!is_eset) { if (!is_eset) {
eset <- Biobase::ExpressionSet(eset) eset <- Biobase::ExpressionSet(eset)
} }
# We clean up and log information.
rm(batch_data_file)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Data read."))
}
# We then build the mod matrix that contains the clinically relevant co-factors. # We then build the mod matrix that contains the clinically relevant co-factors.
# By default, we consider we have only the disease status, but we then also # By default, we consider we have only the disease status, but we then also
# try to add the gender and age as co-factors to guide Combat to correct for batch effect # try to add the gender and age as co-factors to guide Combat to correct for batch effect
# without removing the biologically relevant signal. # without removing the biologically relevant signal.
mod_data <- stats::model.matrix(~Disease.status, data = pheno_data) mod_data <- stats::model.matrix(~Disease.status, data = pheno_data)
if (sum(is.na(pheno_data$Gender)) == 0) { if (sum(is.na(pheno_data$Gender)) == 0) {
if (sum(is.na(pheno_data$Age)) == 0) { if (sum(is.na(pheno_data$Age)) == 0) {
mod_data <- stats::model.matrix(~Disease.status + Gender + as.numeric(as.character(Age)), mod_data <- stats::model.matrix(~Disease.status + Gender + as.numeric(as.character(Age)),
data = pheno_data) data = pheno_data)
} else { } else {
mod_data <- stats::model.matrix(~Disease.status + Gender, data = pheno_data) mod_data <- stats::model.matrix(~Disease.status + Gender, data = pheno_data)
} }
} }
...@@ -55,26 +60,31 @@ correct_batch_effect <- function(eset, ...@@ -55,26 +60,31 @@ correct_batch_effect <- function(eset,
clean_probe_list <- rownames(exp_data) clean_probe_list <- rownames(exp_data)
for (b in unique(batch_data$Batch)) { for (b in unique(batch_data$Batch)) {
# We take care of batch b. # We take care of batch b.
samples_in_batch <- which(batch_data$Batch == b) samples_in_batch <- which(batch_data$Batch == b)
exp_data_batch <- exp_data[, samples_in_batch] exp_data_batch <- exp_data[, samples_in_batch]
probe_vars <- apply(exp_data_batch, 1, var) probe_vars <- apply(exp_data_batch, 1, var)
probe_var_0 <- names(probe_vars[probe_vars == 0]) probe_var_0 <- names(probe_vars[probe_vars == 0])
if (length(probe_var_0) > 0) { if (length(probe_var_0) > 0) {
clean_probe_list <- setdiff(clean_probe_list, probe_var_0) clean_probe_list <- setdiff(clean_probe_list, probe_var_0)
} }
} }
remove(b, samples_in_batch, exp_data_batch, probe_vars, probe_var_0) rm(b, samples_in_batch, exp_data_batch, probe_vars, probe_var_0)
eset <- Biobase::ExpressionSet(exp_data[clean_probe_list, ]) eset <- Biobase::ExpressionSet(exp_data[clean_probe_list, ])
remove(exp_data, clean_probe_list)
# We clean up and log information.
rm(exp_data, clean_probe_list)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Data ready."))
}
# ComBat corrects for the batch effect. # ComBat corrects for the batch effect.
data_bc <- sva::ComBat(dat = Biobase::exprs(eset), data_bc <- sva::ComBat(dat = Biobase::exprs(eset),
batch = batch_data$Batch, batch = batch_data$Batch,
mod = mod_data) mod = mod_data)
eset_bc <- Biobase::ExpressionSet(data_bc) eset_bc <- Biobase::ExpressionSet(data_bc)
# We clean up and log information. # We clean up and log information.
remove(batch_data, pheno_data, mod_data) rm(batch_data, pheno_data, mod_data)
if (verbose == TRUE) { if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Batch effect corrected.")) message(paste0("[", Sys.time(), "] Batch effect corrected."))
} }
......
...@@ -16,11 +16,14 @@ ...@@ -16,11 +16,14 @@
#' @param file_prefix A string used to prefix the file names. Default to "". #' @param file_prefix A string used to prefix the file names. Default to "".
#' @param pval_adjust_method A string code indicating the multiple testing correction #' @param pval_adjust_method A string code indicating the multiple testing correction
#' method to use. Default to BH. #' method to use. Default to BH.
#' @param verbose A boolean representing whether the function should display log information. This
#' is FALSE by default.
#' @return NULL #' @return NULL
extract_DEGs <- function(fit, limma_coeffs, k, output_data_dir, extract_DEGs <- function(fit, limma_coeffs, k, output_data_dir,
file_suffix = "", file_suffix = "",
file_prefix = "", file_prefix = "",
pval_adjust_method = "BH") { pval_adjust_method = "BH",
verbose = FALSE) {
# We create the output file names. # We create the output file names.
if (file_suffix != "") { if (file_suffix != "") {
...@@ -29,12 +32,12 @@ extract_DEGs <- function(fit, limma_coeffs, k, output_data_dir, ...@@ -29,12 +32,12 @@ extract_DEGs <- function(fit, limma_coeffs, k, output_data_dir,
if (file_prefix != "") { if (file_prefix != "") {
file_prefix <- paste0( "_", file_prefix) file_prefix <- paste0( "_", file_prefix)
} }
venn_filename <- paste0(output_data_dir, file_suffix, "venn_", venn_fn <- paste0(output_data_dir, file_suffix, "venn_",
limma_coeffs[k], file_prefix, ".png") limma_coeffs[k], file_prefix, ".png")
md_fn <- paste0(output_data_dir, file_suffix, "MD_", md_fn <- paste0(output_data_dir, file_suffix, "MD_",
limma_coeffs[k], file_prefix, ".png") limma_coeffs[k], file_prefix, ".png")
table_fn <- paste0(output_data_dir, file_suffix, "toptable_", table_fn <- paste0(output_data_dir, file_suffix, "toptable_",
limma_coeffs[k], file_prefix, ".tsv") limma_coeffs[k], file_prefix, ".tsv")
# We extract the DEGs using toptable. # We extract the DEGs using toptable.
table <- limma::topTable(fit, table <- limma::topTable(fit,
...@@ -49,9 +52,14 @@ extract_DEGs <- function(fit, limma_coeffs, k, output_data_dir, ...@@ -49,9 +52,14 @@ extract_DEGs <- function(fit, limma_coeffs, k, output_data_dir,
coefficient = paste0("coeff", k), coefficient = paste0("coeff", k),
adjust.method = pval_adjust_method) adjust.method = pval_adjust_method)
# We log information.
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Differentially expressed genes extracted."))
}
# We plot Venn diagrams that summarize the results and show to which # We plot Venn diagrams that summarize the results and show to which
# extend the analyses overlap. # extend the analyses overlap.
grDevices::png(venn_filename) grDevices::png(venn_fn)
limma::vennDiagram(results, limma::vennDiagram(results,
names = limma_coeffs, names = limma_coeffs,
include = c("up", "down"), include = c("up", "down"),
...@@ -62,6 +70,12 @@ extract_DEGs <- function(fit, limma_coeffs, k, output_data_dir, ...@@ -62,6 +70,12 @@ extract_DEGs <- function(fit, limma_coeffs, k, output_data_dir,
show.include = TRUE) show.include = TRUE)
grDevices::dev.off() grDevices::dev.off()
# We clean and log information.
rm(venn_fn)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Venn diagram created."))
}
# We then plot the Mean vs Deviation of the comparisons and highlight the significant genes. # We then plot the Mean vs Deviation of the comparisons and highlight the significant genes.
grDevices::png(md_fn) grDevices::png(md_fn)
limma::plotMD(fit, limma::plotMD(fit,
...@@ -73,6 +87,18 @@ extract_DEGs <- function(fit, limma_coeffs, k, output_data_dir, ...@@ -73,6 +87,18 @@ extract_DEGs <- function(fit, limma_coeffs, k, output_data_dir,
hl.cex = c(0.9, 0.9)) hl.cex = c(0.9, 0.9))
grDevices::dev.off() grDevices::dev.off()
# We clean and log information.
rm(md_fn, results)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] MD plot created."))
}
# Save the top tables to TSV files. # Save the top tables to TSV files.
utils::write.table(table, file = table_fn, sep = "\t", quote = FALSE, col.names = NA) utils::write.table(table, file = table_fn, sep = "\t", quote = FALSE, col.names = NA)
# We clean and log information.
rm(table, table_fn)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Differentially expressed gene list saved."))
}
} }
...@@ -18,6 +18,7 @@ get_gene_annots_from_file <- function(folder, filename, entities) { ...@@ -18,6 +18,7 @@ get_gene_annots_from_file <- function(folder, filename, entities) {
# We read the file into a data-frame. # We read the file into a data-frame.
gene_annots <- read.delim(file_path, row.names = NULL, stringsAsFactors = FALSE) gene_annots <- read.delim(file_path, row.names = NULL, stringsAsFactors = FALSE)
rm(file_path)
# We do a bit of cleaning. # We do a bit of cleaning.
gene_annots[is.na(gene_annots)] <- "" gene_annots[is.na(gene_annots)] <- ""
......
...@@ -23,5 +23,7 @@ get_gene_annots_from_package <- function(affy_library_name, entities, ...@@ -23,5 +23,7 @@ get_gene_annots_from_package <- function(affy_library_name, entities,
columns = columns, columns = columns,
keytype = keytype), list(local_db = as.name(affy_library_name))) keytype = keytype), list(local_db = as.name(affy_library_name)))
eval(cmd) eval(cmd)
rm(cmd)
return(gene_annots) return(gene_annots)
} }
...@@ -10,12 +10,12 @@ ...@@ -10,12 +10,12 @@
#' @param clinical_file_name A string containing the file name. By default, this is 'ClinicalData.tsv' #' @param clinical_file_name A string containing the file name. By default, this is 'ClinicalData.tsv'
#' @param use_factors A boolean stating whether the columns should be read as factors (default FALSE). #' @param use_factors A boolean stating whether the columns should be read as factors (default FALSE).
#' @param verbose A boolean representing whether the function should display log information. This #' @param verbose A boolean representing whether the function should display log information. This
#' is TRUE by default. #' is FALSE by default.
#' @return An annotated data-frame that contains the clinical data. #' @return An annotated data-frame that contains the clinical data.
load_clinical_data <- function(data_dir, load_clinical_data <- function(data_dir,
clinical_file_name = "ClinicalData.tsv", clinical_file_name = "ClinicalData.tsv",
use_factors = TRUE, use_factors = TRUE,
verbose = TRUE) { verbose = FALSE) {
# We define the I/Os. # We define the I/Os.
clinical_data_file <- paste0(data_dir, clinical_file_name) clinical_data_file <- paste0(data_dir, clinical_file_name)
...@@ -40,6 +40,7 @@ load_clinical_data <- function(data_dir, ...@@ -40,6 +40,7 @@ load_clinical_data <- function(data_dir,
if (verbose == TRUE) { if (verbose == TRUE) {
data_dimensions <- paste0(dim(pheno_data), collapse = " * ") data_dimensions <- paste0(dim(pheno_data), collapse = " * ")
message(paste0("[", Sys.time(), "] Clinical data read (", data_dimensions, ").")) message(paste0("[", Sys.time(), "] Clinical data read (", data_dimensions, ")."))
rm(data_dimensions)
} }
# We return the clinical data. # We return the clinical data.
......
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
#' @param clean_samples A boolean indicating whether the dataset should be cleaned by removing #' @param clean_samples A boolean indicating whether the dataset should be cleaned by removing
#' the samples that do not have clinical data. Default to FALSE. #' the samples that do not have clinical data. Default to FALSE.
#' @param verbose A boolean representing whether the function should display log information. This #' @param verbose A boolean representing whether the function should display log information. This
#' is TRUE by default. #' is FALSE by default.
#' @return The expression data as ESET objects. Potentially only one object (therefore unlisted). #' @return The expression data as ESET objects. Potentially only one object (therefore unlisted).
preprocess_data <- function(input_data_dir, output_data_files, preprocess_data <- function(input_data_dir, output_data_files,
platform = "Affymetix", platform = "Affymetix",
...@@ -34,7 +34,7 @@ preprocess_data <- function(input_data_dir, output_data_files, ...@@ -34,7 +34,7 @@ preprocess_data <- function(input_data_dir, output_data_files,
batch_correction = "FALSE", batch_correction = "FALSE",
batch_filename = "Batch.tsv", batch_filename = "Batch.tsv",
clean_samples = FALSE, clean_samples = FALSE,
verbose = TRUE) { verbose = FALSE) {
# We launch the correct function depending on the array platform and desired method. # We launch the correct function depending on the array platform and desired method.
esets <- NULL esets <- NULL
...@@ -73,7 +73,7 @@ preprocess_data <- function(input_data_dir, output_data_files, ...@@ -73,7 +73,7 @@ preprocess_data <- function(input_data_dir, output_data_files,
clean_samples = clean_samples, clean_samples = clean_samples,
verbose = verbose) verbose = verbose)
} else { } else {
message(paste0("[", Sys.time(), "] Platform ", platform, message(paste0("[", Sys.time(), "] [WARNING] Platform ", platform,
" not yet supported (no preprocessing done).")) " not yet supported (no preprocessing done)."))
} }
......
...@@ -19,24 +19,29 @@ ...@@ -19,24 +19,29 @@
#' @param clean_samples A boolean indicating whether the dataset should be cleaned by removing #' @param clean_samples A boolean indicating whether the dataset should be cleaned by removing
#' the samples that do not have clinical data. Default to FALSE. #' the samples that do not have clinical data. Default to FALSE.
#' @param verbose A boolean representing whether the function should display log information. This #' @param verbose A boolean representing whether the function should display log information. This
#' is TRUE by default. #' is FALSE by default.
#' @return The expression data as ESET objects. Potentially only one object (therefore unlisted). #' @return The expression data as ESET objects. Potentially only one object (therefore unlisted).
preprocess_data_affymetrix_gcrma <- function(input_data_dir, output_data_files, preprocess_data_affymetrix_gcrma <- function(input_data_dir, output_data_files,
compressed = FALSE, compressed = FALSE,
batch_correction = "FALSE", batch_correction = "FALSE",
batch_filename = "Batch.tsv", batch_filename = "Batch.tsv",
clean_samples = FALSE, clean_samples = FALSE,
verbose = TRUE) { verbose = FALSE) {
# We define the I/Os. # We define the I/Os.
raw_data_input_dir <- paste0(input_data_dir, "RAW/") raw_data_input_dir <- paste0(input_data_dir, "RAW/")
# We run the RMA pre-processing method on the data. # We run the RMA pre-processing method on the data.
input_data_files <- affy::list.celfiles(raw_data_input_dir, full.names = TRUE) input_data_files <- affy::list.celfiles(raw_data_input_dir, full.names = TRUE)
remove(raw_data_input_dir)
batch <- affy::ReadAffy(filenames = input_data_files, compress = compressed, verbose = verbose) batch <- affy::ReadAffy(filenames = input_data_files, compress = compressed, verbose = verbose)
eset <- gcrma::gcrma(batch) eset <- gcrma::gcrma(batch)
# We clean up and log information.
rm(raw_data_input_dir, input_data_files, batch)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Raw data processed."))
}
# We remove the probes that have 0 variance accross the samples. # We remove the probes that have 0 variance accross the samples.
exp_data <- Biobase::exprs(eset) exp_data <- Biobase::exprs(eset)
probe_vars <- apply(exp_data, 1, var) probe_vars <- apply(exp_data, 1, var)
...@@ -44,33 +49,51 @@ preprocess_data_affymetrix_gcrma <- function(input_data_dir, output_data_files, ...@@ -44,33 +49,51 @@ preprocess_data_affymetrix_gcrma <- function(input_data_dir, output_data_files,
if (length(probe_var_0) > 0) { if (length(probe_var_0) > 0) {
clean_probe_list <- setdiff(rownames(exp_data), probe_var_0) clean_probe_list <- setdiff(rownames(exp_data), probe_var_0)
eset <- Biobase::ExpressionSet(exp_data[clean_probe_list, ]) eset <- Biobase::ExpressionSet(exp_data[clean_probe_list, ])
remove(clean_probe_list) rm(clean_probe_list)
}
# We clean up and log information.
rm(exp_data, probe_vars, probe_var_0)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Data cleaned (step I)."))
} }
remove(exp_data, probe_vars, probe_var_0)
# We correct for the batch effect if necesary. # We correct for the batch effect if necesary.
eset_bc <- NULL eset_bc <- NULL
if (batch_correction != "FALSE") { if (batch_correction != "FALSE") {
eset_bc <- correct_batch_effect(eset = eset, eset_bc <- correct_batch_effect(eset = eset,
input_data_dir = input_data_dir) input_data_dir = input_data_dir,
verbose = verbose)
# We log some information.
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Batch effect corrected."))
}
if (batch_correction == "TRUE") { if (batch_correction == "TRUE") {
eset <- eset_bc eset <- eset_bc
remove(eset_bc) rm(eset_bc)
} }
} else { } else {
remove(eset_bc) rm(eset_bc)
} }
# If necessary, we remove the samples that do not have clinical data. # If necessary, we remove the samples that do not have clinical data.
if (clean_samples) { if (clean_samples) {
# We load the clinical data as to get the samples to keep. # We load the clinical data as to get the samples to keep.
samples <- rownames(Biobase::pData(ArrayUtils::load_clinical_data(input_data_dir, samples <- rownames(Biobase::pData(ArrayUtils::load_clinical_data(input_data_dir,
verbose = FALSE))) verbose = verbose)))
# We only keep the samples with clinical data. # We only keep the samples with clinical data.
eset <- eset[, samples] eset <- eset[, samples]
if (batch_correction == "BOTH") { if (batch_correction == "BOTH") {
eset_bc <- eset_bc[, samples] eset_bc <- eset_bc[, samples]
} }
# We clean up and log information.
rm(samples)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Data cleaned (step II)."))
}
} }
# We save the eset data as TSV file. # We save the eset data as TSV file.
...@@ -82,10 +105,9 @@ preprocess_data_affymetrix_gcrma <- function(input_data_dir, output_data_files, ...@@ -82,10 +105,9 @@ preprocess_data_affymetrix_gcrma <- function(input_data_dir, output_data_files,
quote = FALSE) quote = FALSE)
} }
# We clean up and log information. # We log information.
remove(input_data_files, batch)
if (verbose == TRUE) { if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Expression data pre-processed with RMA.")) message(paste0("[", Sys.time(), "] Processed data written to files."))
} }
# We return the created ESET(s). # We return the created ESET(s).
......
...@@ -19,14 +19,14 @@ ...@@ -19,14 +19,14 @@
#' @param clean_samples A boolean indicating whether the dataset should be cleaned by removing #' @param clean_samples A boolean indicating whether the dataset should be cleaned by removing
#' the samples that do not have clinical data. Default to FALSE. #' the samples that do not have clinical data. Default to FALSE.
#' @param verbose A boolean representing whether the function should display log information. This #' @param verbose A boolean representing whether the function should display log information. This
#' is TRUE by default. #' is FALSE by default.
#' @return The expression data as ESET objects. Potentially only one object (therefore unlisted). #' @return The expression data as ESET objects. Potentially only one object (therefore unlisted).
preprocess_data_affymetrix_scan <- function(input_data_dir, output_data_files, preprocess_data_affymetrix_scan <- function(input_data_dir, output_data_files,
compressed = FALSE, compressed = FALSE,
batch_correction = "FALSE", batch_correction = "FALSE",
batch_filename = "Batch.tsv", batch_filename = "Batch.tsv",
clean_samples = FALSE, clean_samples = FALSE,
verbose = TRUE) { verbose = FALSE) {
# We define the I/Os. # We define the I/Os.
raw_data_input_dir <- paste0(input_data_dir, "RAW/") raw_data_input_dir <- paste0(input_data_dir, "RAW/")
...@@ -34,9 +34,14 @@ preprocess_data_affymetrix_scan <- function(input_data_dir, output_data_files, ...@@ -34,9 +34,14 @@ preprocess_data_affymetrix_scan <- function(input_data_dir, output_data_files,
# We run the SCAN pre-processing method on the data. # We run the SCAN pre-processing method on the data.
# We do not run the fast analysis (by default). # We do not run the fast analysis (by default).
input_data_regexp <- paste0(raw_data_input_dir, "*") input_data_regexp <- paste0(raw_data_input_dir, "*")
remove(raw_data_input_dir)
eset <- SCAN.UPC::SCAN(input_data_regexp, outFilePath = output_data_files[1]) eset <- SCAN.UPC::SCAN(input_data_regexp, outFilePath = output_data_files[1])
# We clean up and log information.
rm(raw_data_input_dir, input_data_regexp)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Raw data processed."))
}
# We remove the probes that have 0 variance accross the samples. # We remove the probes that have 0 variance accross the samples.
exp_data <- Biobase::exprs(eset) exp_data <- Biobase::exprs(eset)
probe_vars <- apply(exp_data, 1, var) probe_vars <- apply(exp_data, 1, var)
...@@ -44,33 +49,51 @@ preprocess_data_affymetrix_scan <- function(input_data_dir, output_data_files, ...@@ -44,33 +49,51 @@ preprocess_data_affymetrix_scan <- function(input_data_dir, output_data_files,
if (length(probe_var_0) > 0) { if (length(probe_var_0) > 0) {
clean_probe_list <- setdiff(rownames(exp_data), probe_var_0) clean_probe_list <- setdiff(rownames(exp_data), probe_var_0)
eset <- Biobase::ExpressionSet(exp_data[clean_probe_list, ]) eset <- Biobase::ExpressionSet(exp_data[clean_probe_list, ])
remove(clean_probe_list) rm(clean_probe_list)
}
# We clean up and log information.
rm(exp_data, probe_vars, probe_var_0)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Data cleaned (step I)."))
} }
remove(exp_data, probe_vars, probe_var_0)
# We correct for the batch effect if necesary. # We correct for the batch effect if necesary.
eset_bc <- NULL eset_bc <- NULL
if (batch_correction != "FALSE") { if (batch_correction != "FALSE") {
eset_bc <- correct_batch_effect(eset = eset, eset_bc <- correct_batch_effect(eset = eset,
input_data_dir = input_data_dir) input_data_dir = input_data_dir,
verbose = verbose)
# We log some information.
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Batch effect corrected."))
}
if (batch_correction == "TRUE") { if (batch_correction == "TRUE") {
eset <- eset_bc eset <- eset_bc
remove(eset_bc) rm(eset_bc)
} }
} else { } else {
remove(eset_bc) rm(eset_bc)
} }
# If necessary, we remove the samples that do not have clinical data. # If necessary, we remove the samples that do not have clinical data.
if (clean_samples) { if (clean_samples) {