Commit 6d95399a authored by Leon-Charles Tranchevent's avatar Leon-Charles Tranchevent
Browse files

Added batch correction for all non-SCAN pre-processing methods. Changed Limma...

Added batch correction for all non-SCAN pre-processing methods. Changed Limma to add age as a co-factor when possible.
parent 7cb20fd2
...@@ -17,6 +17,7 @@ Imports: ...@@ -17,6 +17,7 @@ Imports:
gcrma, gcrma,
arrayQualityMetrics, arrayQualityMetrics,
SCAN.UPC, SCAN.UPC,
sva,
massiR, massiR,
limma, limma,
stringr, stringr,
......
...@@ -4,3 +4,6 @@ importFrom("grDevices", "dev.off", "png") ...@@ -4,3 +4,6 @@ importFrom("grDevices", "dev.off", "png")
importFrom("stats", "cor", "df", "prcomp") importFrom("stats", "cor", "df", "prcomp")
importClassesFrom("AnnotationDbi") importClassesFrom("AnnotationDbi")
importMethodsFrom("AnnotationDbi", "mget") importMethodsFrom("AnnotationDbi", "mget")
importFrom("stats", "model.matrix")
importFrom("utils", "read.delim")
importFrom("stats", "var")
#' @title Correct for a batch effect.
#'
#' @description This function runs ComBat to correct for a batch effect. It also
#' takes into account clinical factors as to not kill the real biological signal.
#' It returns the corrected ESET object.
#'
#' Note: the function does not check for the existence of folders or files.
#'
#' @param eset The original expressionSet object that contains the (uncorrected) data.
#' @param input_data_dir A string representing the folder that contains the clinical
#' and batch data.
#' @param is_eset A boolean representing whether the input data is an ExpressionSet object
#' (TRUE) or a simple matrix with the expression values (FALSE). This is TRUE by default.
#' @param batch_filename A string indicating where the batch information can be found,
#' default to 'Batch.tsv'.
#' @param verbose A boolean representing whether the function should display log information. This
#' is TRUE by default.
#' @return The corrected expression data as an ESET object.
correct_batch_effect <- function(eset,
input_data_dir,
is_eset = TRUE,
batch_filename = "Batch.tsv",
verbose = TRUE) {
# We read the clinical and batch data.
batch_data_file <- paste0(input_data_dir, batch_filename)
batch_data <- utils::read.delim(file = batch_data_file,
row.names = 1)
pheno_data <- Biobase::pData(ArrayUtils::load_clinical_data(input_data_dir,
verbose = FALSE))
remove(batch_data_file)
# If necessary, we convert the input matrix to an ExpressionSet.
if (!is_eset) {
eset <- Biobase::ExpressionSet(eset)
}
# We then build the mod matrix that contains the clinically relevant co-factors.
# By default, we consider we have only the disease status, but we then also
# try to add the gender and age as co-factors to guide Combat to correct for batch effect
# without removing the biologically relevant signal.
mod_data <- stats::model.matrix(~Disease.status, data = pheno_data)
if (sum(is.na(pheno_data$Gender)) == 0) {
if (sum(is.na(pheno_data$Age)) == 0) {
mod_data <- stats::model.matrix(~Disease.status + Gender + as.numeric(as.character(Age)),
data = pheno_data)
} else {
mod_data <- stats::model.matrix(~Disease.status + Gender, data = pheno_data)
}
}
# ComBat corrects for the batch effect.
data_bc <- sva::ComBat(dat = Biobase::exprs(eset),
batch = batch_data$Batch,
mod = mod_data)
eset_bc <- Biobase::ExpressionSet(data_bc)
# We clean up and log information.
remove(batch_data, pheno_data, mod_data)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Batch effect corrected."))
}
# We return the corrected ExpressionSet or the matrix (same as input).
if (!is_eset) {
return(Biobase::exprs(eset_bc))
} else {
return(eset_bc)
}
}
...@@ -10,73 +10,73 @@ ...@@ -10,73 +10,73 @@
#' Note: the function does not check for the existence of folders or files. #' Note: the function does not check for the existence of folders or files.
#' #'
#' @param input_data_dir A string representing the folder that contains the input data. #' @param input_data_dir A string representing the folder that contains the input data.
#' @param output_data_file A string representing the file that should contain the #' @param output_data_files An array of strings representing the files that should contain the
#' preprocessed data. #' preprocessed data. At least one value, maximum two if batch_correction is "BOTH".
#' @param platform A string representing the array platform among Affymetrix, Illumina and Agilent. #' @param platform A string representing the array platform among Affymetrix, Illumina and Agilent.
#' Default to Affymetrix. #' Default to Affymetrix.
#' @param method A string representing the preprocessing method to use (for Affymetrix arrays #' @param method A string representing the preprocessing method to use (for Affymetrix arrays
#' where multiple methods are supported). Default to SCAN for Affymetrix data. #' where multiple methods are supported). Default to SCAN for Affymetrix data.
#' @param compressed A boolean representing whether the cel files are compressed. This #' @param compressed A boolean representing whether the cel files are compressed. This
#' is FALSE by default. #' is FALSE by default.
#' @param batch_correction A boolean indicating whether batch correction should #' @param batch_correction A String indicating whether batch correction should
#' be performed, default to FALSE. #' be performed. Options are "TRUE", "FALSE", "BOTH", default to "FALSE".
#' @param batch_filename A string indicating where the batch information can be found, #' @param batch_filename A string indicating where the batch information can be found,
#' default to 'Batch.tsv'. #' default to 'Batch.tsv'.
#' @param clean_samples A boolean indicating whether the dataset should be cleaned by removing #' @param clean_samples A boolean indicating whether the dataset should be cleaned by removing
#' the samples that do not have clinical data. Default to FALSE. #' the samples that do not have clinical data. Default to FALSE.
#' @param verbose A boolean representing whether the function should display log information. This #' @param verbose A boolean representing whether the function should display log information. This
#' is TRUE by default. #' is TRUE by default.
#' @return The expression data as an ESET object. #' @return The expression data as ESET objects. Potentially only one object (therefore unlisted).
preprocess_data <- function(input_data_dir, output_data_file, preprocess_data <- function(input_data_dir, output_data_files,
platform = "Affymetix", platform = "Affymetix",
method = "SCAN", method = "SCAN",
compressed = FALSE, compressed = FALSE,
batch_correction = FALSE, batch_correction = "FALSE",
batch_filename = "Batch.tsv", batch_filename = "Batch.tsv",
clean_samples = FALSE, clean_samples = FALSE,
verbose = TRUE) { verbose = TRUE) {
# We launch the correct function depending on the array platform and desired method. # We launch the correct function depending on the array platform and desired method.
eset <- NULL esets <- NULL
if (platform == "Affymetrix") { if (platform == "Affymetrix") {
if (method == "SCAN") { if (method == "SCAN") {
eset <- preprocess_data_affymetrix_scan(input_data_dir, esets <- preprocess_data_affymetrix_scan(input_data_dir,
output_data_file, output_data_files,
compressed = compressed,
batch_correction = batch_correction,
batch_filename = batch_filename,
clean_samples = clean_samples,
verbose = verbose)
} else if (method == "GCRMA") {
eset <- preprocess_data_affymetrix_gcrma(input_data_dir,
output_data_file,
compressed = compressed, compressed = compressed,
batch_correction = batch_correction, batch_correction = batch_correction,
batch_filename = batch_filename, batch_filename = batch_filename,
clean_samples = clean_samples, clean_samples = clean_samples,
verbose = verbose) verbose = verbose)
} else if (method == "GCRMA") {
esets <- preprocess_data_affymetrix_gcrma(input_data_dir,
output_data_files,
compressed = compressed,
batch_correction = batch_correction,
batch_filename = batch_filename,
clean_samples = clean_samples,
verbose = verbose)
} }
} else if (platform == "Agilent") { } else if (platform == "Agilent") {
eset <- preprocess_data_agilent_limma(input_data_dir, esets <- preprocess_data_agilent_limma(input_data_dir,
output_data_file, output_data_files,
compressed = compressed, compressed = compressed,
batch_correction = batch_correction, batch_correction = batch_correction,
batch_filename = batch_filename, batch_filename = batch_filename,
clean_samples = clean_samples, clean_samples = clean_samples,
verbose = verbose) verbose = verbose)
} else if (platform == "Illumina") { } else if (platform == "Illumina") {
eset <- preprocess_data_illumina_beadarray(input_data_dir, esets <- preprocess_data_illumina_beadarray(input_data_dir,
output_data_file, output_data_files,
compressed = compressed, compressed = compressed,
batch_correction = batch_correction, batch_correction = batch_correction,
batch_filename = batch_filename, batch_filename = batch_filename,
clean_samples = clean_samples, clean_samples = clean_samples,
verbose = verbose) verbose = verbose)
} else { } else {
message(paste0("[", Sys.time(), "] Platform ", platform, message(paste0("[", Sys.time(), "] Platform ", platform,
" not yet supported (no preprocessing done).")) " not yet supported (no preprocessing done)."))
} }
# We return the created ESET. # We return the created ESET(s).
return(eset) return(esets)
} }
...@@ -5,29 +5,28 @@ ...@@ -5,29 +5,28 @@
#' #'
#' The function assumes that a folder containing the raw data exists (as cel files). #' The function assumes that a folder containing the raw data exists (as cel files).
#' #'
#' Note: the function does not check for the existence of folders or files. The batch effect #' Note: the function does not check for the existence of folders or files.
#' correction is currently not yet supported.
#' #'
#' @param input_data_dir A string representing the folder that contains the input data. #' @param input_data_dir A string representing the folder that contains the input data.
#' @param output_data_file A string representing the file that should contain the #' @param output_data_files An array of strings representing the files that should contain the
#' preprocessed data. #' preprocessed data. At least one value, maximum two if batch_correction is "BOTH".
#' @param compressed A boolean representing whether the cel files are compressed. This #' @param compressed A boolean representing whether the cel files are compressed. This
#' is FALSE by default. #' is FALSE by default.
#' @param batch_correction A boolean indicating whether batch correction should #' @param batch_correction A String indicating whether batch correction should
#' be performed, default to FALSE. Note: not yet suported. #' be performed. Options are "TRUE", "FALSE", "BOTH", default to "FALSE".
#' @param batch_filename A string indicating where the batch information can be found, #' @param batch_filename A string indicating where the batch information can be found,
#' default to 'Batch.tsv'. Note: not yet suported. #' default to 'Batch.tsv'. Note: not yet suported.
#' @param clean_samples A boolean indicating whether the dataset should be cleaned by removing #' @param clean_samples A boolean indicating whether the dataset should be cleaned by removing
#' the samples that do not have clinical data. Default to FALSE. #' the samples that do not have clinical data. Default to FALSE.
#' @param verbose A boolean representing whether the function should display log information. This #' @param verbose A boolean representing whether the function should display log information. This
#' is TRUE by default. #' is TRUE by default.
#' @return The expression data as an ESET object. #' @return The expression data as ESET objects. Potentially only one object (therefore unlisted).
preprocess_data_affymetrix_gcrma <- function(input_data_dir, output_data_file, preprocess_data_affymetrix_gcrma <- function(input_data_dir, output_data_files,
compressed = FALSE, compressed = FALSE,
batch_correction = FALSE, batch_correction = "FALSE",
batch_filename = "Batch.tsv", batch_filename = "Batch.tsv",
clean_samples = FALSE, clean_samples = FALSE,
verbose = TRUE) { verbose = TRUE) {
# We define the I/Os. # We define the I/Os.
raw_data_input_dir <- paste0(input_data_dir, "RAW/") raw_data_input_dir <- paste0(input_data_dir, "RAW/")
...@@ -38,6 +37,30 @@ preprocess_data_affymetrix_gcrma <- function(input_data_dir, output_data_file, ...@@ -38,6 +37,30 @@ preprocess_data_affymetrix_gcrma <- function(input_data_dir, output_data_file,
batch <- affy::ReadAffy(filenames = input_data_files, compress = compressed, verbose = verbose) batch <- affy::ReadAffy(filenames = input_data_files, compress = compressed, verbose = verbose)
eset <- gcrma::gcrma(batch) eset <- gcrma::gcrma(batch)
# We remove the probes that have 0 variance accross the samples.
exp_data <- Biobase::exprs(eset)
probe_vars <- apply(exp_data, 1, var)
probe_var_0 <- names(probe_vars[probe_vars == 0])
if (length(probe_var_0) > 0) {
clean_probe_list <- setdiff(rownames(exp_data), probe_var_0)
eset <- Biobase::ExpressionSet(exp_data[clean_probe_list, ])
remove(clean_probe_list)
}
remove(exp_data, probe_vars, probe_var_0)
# We correct for the batch effect if necesary.
eset_bc <- NULL
if (batch_correction != "FALSE") {
eset_bc <- correct_batch_effect(eset = eset,
input_data_dir = input_data_dir)
if (batch_correction == "TRUE") {
eset <- eset_bc
remove(eset_bc)
}
} else {
remove(eset_bc)
}
# If necessary, we remove the samples that do not have clinical data. # If necessary, we remove the samples that do not have clinical data.
if (clean_samples) { if (clean_samples) {
# We load the clinical data as to get the samples to keep. # We load the clinical data as to get the samples to keep.
...@@ -45,10 +68,19 @@ preprocess_data_affymetrix_gcrma <- function(input_data_dir, output_data_file, ...@@ -45,10 +68,19 @@ preprocess_data_affymetrix_gcrma <- function(input_data_dir, output_data_file,
verbose = FALSE))) verbose = FALSE)))
# We only keep the samples with clinical data. # We only keep the samples with clinical data.
eset <- eset[, samples] eset <- eset[, samples]
if (batch_correction == "BOTH") {
eset_bc <- eset_bc[, samples]
}
} }
# We save the eset data as TSV file. # We save the eset data as TSV file.
utils::write.table(Biobase::exprs(eset), file = output_data_file, sep = "\t", quote = FALSE) utils::write.table(Biobase::exprs(eset), file = output_data_files[1], sep = "\t", quote = FALSE)
if (batch_correction == "BOTH") {
utils::write.table(Biobase::exprs(eset_bc),
file = output_data_files[2],
sep = "\t",
quote = FALSE)
}
# We clean up and log information. # We clean up and log information.
remove(input_data_files, batch) remove(input_data_files, batch)
...@@ -56,6 +88,10 @@ preprocess_data_affymetrix_gcrma <- function(input_data_dir, output_data_file, ...@@ -56,6 +88,10 @@ preprocess_data_affymetrix_gcrma <- function(input_data_dir, output_data_file,
message(paste0("[", Sys.time(), "] Expression data pre-processed with RMA.")) message(paste0("[", Sys.time(), "] Expression data pre-processed with RMA."))
} }
# We return the created ESET. # We return the created ESET(s).
return(eset) if (batch_correction == "BOTH") {
return(list(eset, eset_bc))
} else {
return(eset)
}
} }
...@@ -8,23 +8,22 @@ ...@@ -8,23 +8,22 @@
#' Note: the function does not check for the existence of folders or files. #' Note: the function does not check for the existence of folders or files.
#' #'
#' @param input_data_dir A string representing the folder that contains the input data. #' @param input_data_dir A string representing the folder that contains the input data.
#' @param output_data_file A string representing the file that should contain the #' @param output_data_files An array of strings representing the files that should contain the
#' preprocessed data. #' preprocessed data. At least one value, maximum two if batch_correction is "BOTH".
#' @param compressed A boolean representing whether the cel files are compressed. This #' @param compressed A boolean representing whether the cel files are compressed. This
#' is FALSE by default. #' is FALSE by default.
#' @param batch_correction A boolean indicating whether batch correction should #' @param batch_correction A String indicating whether batch correction should
#' be performed, default to FALSE. #' be performed. Options are "TRUE", "FALSE", "BOTH", default to "FALSE".
#' @param batch_filename A string indicating where the batch information can be found, #' @param batch_filename A string indicating where the batch information can be found,
#' default to 'Batch.tsv'. #' default to 'Batch.tsv'.
#' @param clean_samples A boolean indicating whether the dataset should be cleaned by removing #' @param clean_samples A boolean indicating whether the dataset should be cleaned by removing
#' the samples that do not have clinical data. Default to FALSE. Note: the cleaning is not yet #' the samples that do not have clinical data. Default to FALSE.
#' suported for SCAN normalization.
#' @param verbose A boolean representing whether the function should display log information. This #' @param verbose A boolean representing whether the function should display log information. This
#' is TRUE by default. #' is TRUE by default.
#' @return The expression data as an ESET object. #' @return The expression data as ESET objects. Potentially only one object (therefore unlisted).
preprocess_data_affymetrix_scan <- function(input_data_dir, output_data_file, preprocess_data_affymetrix_scan <- function(input_data_dir, output_data_files,
compressed = FALSE, compressed = FALSE,
batch_correction = FALSE, batch_correction = "FALSE",
batch_filename = "Batch.tsv", batch_filename = "Batch.tsv",
clean_samples = FALSE, clean_samples = FALSE,
verbose = TRUE) { verbose = TRUE) {
...@@ -36,16 +35,50 @@ preprocess_data_affymetrix_scan <- function(input_data_dir, output_data_file, ...@@ -36,16 +35,50 @@ preprocess_data_affymetrix_scan <- function(input_data_dir, output_data_file,
# We do not run the fast analysis (by default). # We do not run the fast analysis (by default).
input_data_regexp <- paste0(raw_data_input_dir, "*") input_data_regexp <- paste0(raw_data_input_dir, "*")
remove(raw_data_input_dir) remove(raw_data_input_dir)
eset <- vector() eset <- SCAN.UPC::SCAN(input_data_regexp, outFilePath = output_data_files[1])
if (batch_correction == FALSE) {
eset <- SCAN.UPC::SCAN(input_data_regexp, outFilePath = output_data_file) # We remove the probes that have 0 variance accross the samples.
exp_data <- Biobase::exprs(eset)
probe_vars <- apply(exp_data, 1, var)
probe_var_0 <- names(probe_vars[probe_vars == 0])
if (length(probe_var_0) > 0) {
clean_probe_list <- setdiff(rownames(exp_data), probe_var_0)
eset <- Biobase::ExpressionSet(exp_data[clean_probe_list, ])
remove(clean_probe_list)
}
remove(exp_data, probe_vars, probe_var_0)
# We correct for the batch effect if necesary.
eset_bc <- NULL
if (batch_correction != "FALSE") {
eset_bc <- correct_batch_effect(eset = eset,
input_data_dir = input_data_dir)
if (batch_correction == "TRUE") {
eset <- eset_bc
remove(eset_bc)
}
} else { } else {
# We define the I/Os (for batch). remove(eset_bc)
batch_data_file <- paste0(input_data_dir, batch_filename) }
eset <- SCAN.UPC::SCAN(input_data_regexp,
outFilePath = output_data_file, # If necessary, we remove the samples that do not have clinical data.
batchFilePath = batch_data_file) if (clean_samples) {
remove(batch_data_file) # We load the clinical data as to get the samples to keep.
samples <- rownames(Biobase::pData(ArrayUtils::load_clinical_data(input_data_dir,
verbose = FALSE)))
# We only keep the samples with clinical data.
eset <- eset[, samples]
if (batch_correction == "BOTH") {
eset_bc <- eset_bc[, samples]
}
}
# We save the eset_bc data as TSV file. ESET was already done as part of SCAN.
if (batch_correction == "BOTH") {
utils::write.table(Biobase::exprs(eset_bc),
file = output_data_files[2],
sep = "\t",
quote = FALSE)
} }
# We clean up and log information. # We clean up and log information.
...@@ -54,6 +87,10 @@ preprocess_data_affymetrix_scan <- function(input_data_dir, output_data_file, ...@@ -54,6 +87,10 @@ preprocess_data_affymetrix_scan <- function(input_data_dir, output_data_file,
message(paste0("[", Sys.time(), "] Expression data pre-processed with SCAN.")) message(paste0("[", Sys.time(), "] Expression data pre-processed with SCAN."))
} }
# We return the created ESET. # We return the created ESET(s).
return(eset) if (batch_correction == "BOTH") {
return(list(eset, eset_bc))
} else {
return(eset)
}
} }
...@@ -5,26 +5,25 @@ ...@@ -5,26 +5,25 @@
#' #'
#' The function assumes that a folder containing the raw data exists (as cel files). #' The function assumes that a folder containing the raw data exists (as cel files).
#' #'
#' Note: the function does not check for the existence of folders or files. The bacth effect #' Note: the function does not check for the existence of folders or files.
#' correction is not yet supported.
#' #'
#' @param input_data_dir A string representing the folder that contains the input data. #' @param input_data_dir A string representing the folder that contains the input data.
#' @param output_data_file A string representing the file that should contain the #' @param output_data_files An array of strings representing the files that should contain the
#' preprocessed data. #' preprocessed data. At least one value, maximum two if batch_correction is "BOTH".
#' @param compressed A boolean representing whether the cel files are compressed. This #' @param compressed A boolean representing whether the cel files are compressed. This
#' is FALSE by default. Note: only for compatibility reasons (has no effect on analysis). #' is FALSE by default. Note: only for compatibility reasons (has no effect on analysis).
#' @param batch_correction A boolean indicating whether batch correction should #' @param batch_correction A String indicating whether batch correction should
#' be performed, default to FALSE. Note: not yet suported. #' be performed. Options are "TRUE", "FALSE", "BOTH", default to "FALSE".
#' @param batch_filename A string indicating where the batch information can be found, #' @param batch_filename A string indicating where the batch information can be found,
#' default to 'Batch.tsv'. Note: not yet suported. #' default to 'Batch.tsv'.
#' @param clean_samples A boolean indicating whether the dataset should be cleaned by removing #' @param clean_samples A boolean indicating whether the dataset should be cleaned by removing
#' the samples that do not have clinical data. Default to FALSE. #' the samples that do not have clinical data. Default to FALSE.
#' @param verbose A boolean representing whether the function should display log information. This #' @param verbose A boolean representing whether the function should display log information. This
#' is TRUE by default. #' is TRUE by default.
#' @return The expression data as an ESET object. #' @return The expression data as ESET objects. Potentially only one object (therefore unlisted).
preprocess_data_agilent_limma <- function(input_data_dir, output_data_file, preprocess_data_agilent_limma <- function(input_data_dir, output_data_files,
compressed = FALSE, compressed = FALSE,
batch_correction = FALSE, batch_correction = "FALSE",
batch_filename = "Batch.tsv", batch_filename = "Batch.tsv",
clean_samples = FALSE, clean_samples = FALSE,
verbose = TRUE) { verbose = TRUE) {
...@@ -54,7 +53,36 @@ preprocess_data_agilent_limma <- function(input_data_dir, output_data_file, ...@@ -54,7 +53,36 @@ preprocess_data_agilent_limma <- function(input_data_dir, output_data_file,
# Second, perform quantile normalization. # Second, perform quantile normalization.
batch_data_norm <- limma::normalizeBetweenArrays(batch_data_bg, method = "quantile") batch_data_norm <- limma::normalizeBetweenArrays(batch_data_bg, method = "quantile")
rm(batch_data_bg)
# We remove the duplicate rows (based on the row names only).
probe_id_counts <- table(row.names(batch_data))
unique_probe_ids <- setdiff(names(probe_id_counts), names(probe_id_counts[probe_id_counts > 1]))
batch_data_norm <- batch_data_norm[unique_probe_ids, ]
# We remove the probes that have 0 variance accross the samples.
probe_vars <- apply(batch_data_norm, 1, var)
probe_var_0 <- names(probe_vars[probe_vars == 0])
if (length(probe_var_0) > 0) {
clean_probe_list <- setdiff(rownames(batch_data_norm), probe_var_0)
batch_data_norm <- batch_data_norm[clean_probe_list, ]
remove(clean_probe_list)
}
remove(probe_vars, probe_var_0)
# We correct for the batch effect if necesary.
batch_data_norm_bc <- NULL
if (batch_correction != "FALSE") {
batch_data_norm_bc <- correct_batch_effect(eset = batch_data_norm,
input_data_dir = input_data_dir,
is_eset = FALSE)
if (batch_correction == "TRUE") {
batch_data_norm <- batch_data_norm_bc
remove(batch_data_norm_bc)
}
} else {
remove(batch_data_norm_bc)
}
# If necessary, we remove the samples that do not have clinical data. # If necessary, we remove the samples that do not have clinical data.
if (clean_samples) { if (clean_samples) {
# We load the clinical data as to get the samples to keep. # We load the clinical data as to get the samples to keep.
...@@ -62,24 +90,33 @@ preprocess_data_agilent_limma <- function(input_data_dir, output_data_file, ...@@ -62,24 +90,33 @@ preprocess_data_agilent_limma <- function(input_data_dir, output_data_file,
verbose = FALSE))) verbose = FALSE)))
# We only keep the samples with clinical data. # We only keep the samples with clinical data.
batch_data_norm <- batch_data_norm[, samples] batch_data_norm <- batch_data_norm[, samples]
if (batch_correction == "BOTH") {
batch_data_norm_bc <- batch_data_norm_bc[, samples]
}
} }
# We remove the duplicate rows (based on the row names only).
probe_id_counts <- table(row.names(batch_data))
unique_probe_ids <- setdiff(names(probe_id_counts), names(probe_id_counts[probe_id_counts > 1]))
batch_data_norm <- batch_data_norm[unique_probe_ids, ]
# We save the eset data as TSV file. # We save the eset data as TSV file.
eset <- methods::new("ExpressionSet", exprs = as.matrix(batch_data_norm)) eset <- methods::new("ExpressionSet", exprs = as.matrix(batch_data_norm))
rm(batch_data_bg) utils::write.table(batch_data_norm, file = output_data_files[1], sep = "\t", quote = FALSE)
utils::write.table(batch_data_norm, file = output_data_file, sep = "\t", quote = FALSE)
rm(batch_data_norm) rm(batch_data_norm)