Commit 231e050f authored by Leon-Charles Tranchevent's avatar Leon-Charles Tranchevent
Browse files

Replaced probe ids by probe names for Agilent data processing.

parent ccb29793
......@@ -40,6 +40,9 @@ preprocess_data_agilent_limma <- function(input_data_dir, output_data_file,
green.only = TRUE,
verbose = TRUE)
batch_data <- log2(batch$E)
# We change the probe ids (1 to 45015) to be the probe names instead (A_XX_PXXXX).
rownames(batch_data) <- (batch$genes)$ProbeName
remove(raw_data_input_dir, batch)
# We run the LIMMA pre-processing method on the data.
......@@ -61,6 +64,11 @@ preprocess_data_agilent_limma <- function(input_data_dir, output_data_file,
batch_data_norm <- batch_data_norm[, samples]
}
# We remove the duplicate rows (based on the row names only).
probe_id_counts <- table(row.names(batch_data))
unique_probe_ids <- setdiff(names(probe_id_counts), names(probe_id_counts[probe_id_counts > 1]))
batch_data_norm <- batch_data_norm[unique_probe_ids, ]
# We save the eset data as TSV file.
eset <- methods::new("ExpressionSet", exprs = as.matrix(batch_data_norm))
rm(batch_data_bg)
......
......@@ -7,18 +7,21 @@
#' @param data_file A string representing the file that contains the expression data.
#' @param header A boolean indicating whether the file contains a header (default to TRUE).
#' @param sep A string that is used as a field separator to read the data (default to
#' tab for TSV files).
#' tab for TSV files).
#' @param row_names The index of the row names (default to 1).
#' @param as_is A boolean indicating whether R should keep the data as they are in the
#' file (default to TRUE).
#' file (default to TRUE).
#' @param as_eset A boolean indicating whether the returned object should be an eset. A matrix
#' is returned otherwise. Default to TRUE.
#' @param verbose A boolean representing whether the function should display log information. This
#' is TRUE by default.
#' @return An ExpressionSet containing the preprocessed expression data.
#' is TRUE by default.
#' @return An object containing the expression data. Either an ExpressionSet or a Matrix.
read_eset <- function(data_file,
header = TRUE,
sep = "\t",
row_names = 1,
as_is = TRUE,
as_eset = TRUE,
verbose = TRUE) {
# We load the matrix and creates the associated eset object.
......@@ -30,11 +33,14 @@ read_eset <- function(data_file,
exprs_eset <- Biobase::ExpressionSet(assayData = exprs_mat)
# We clean up and log information.
rm(exprs_mat)
if (verbose == TRUE) {
message(paste0("[", Sys.time(), "] Expression data read."))
}
# We return the eset.
return(exprs_eset)
# We return the object.
if (as_eset) {
return(exprs_eset)
} else {
return(exprs_mat)
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment