run_quality_control_on_preprocessed.R 2.17 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#' @title Executes a quality control of a given microarray dataset (preprocessed data).
#'
#' @description This function executes a quality control of the dataset defined by the input parameters.
#' It starts by loading the clinical data associated to annotate the preprocessed data and
#' then runs the quality control of the annotated data. The function assumes that a folder with
#' the clinical data exists. It then creates a report that contains various quality
#' indicators and is stored as an HTML document. It does not return any value.
#'
#' Note: the function does not check for the existence of folders or files.
#'
#' @param eset An ESET object that contains the preprocessed expression data.
#' @param input_data_dir A string representing the folder that contains the input data (clinical data).
#' @param output_data_dir A string representing the folder that will contain the output of the QC.
#' @param phenotype_groups A list of phenotype factor names that can be used to highlight the
#'  samples in the QC report. This is none by default.
#' @param verbose A boolean representing whether the function should display log information. This
#'  is TRUE by default.
#' @return NULL
run_quality_control_on_preprocessed <- function(eset, input_data_dir, output_data_dir,
                                                phenotype_groups = vector(),
                                                verbose          = TRUE) {

  # We load the clinical data as to annotate the ESET object and make QC more useful.
  pheno_data <- ArrayUtils::load_clinical_data(input_data_dir, verbose = verbose)
  Biobase::phenoData(eset) <- pheno_data
  remove(pheno_data)

  # Now, we do the QC on the normalized data.
  arrayQualityMetrics::arrayQualityMetrics(expressionset   = eset,
                                           outdir          = output_data_dir,
                                           force           = TRUE,
32
                                           do.logtransform = FALSE,
33
34
35
36
37
38
39
                                           intgroup        = phenotype_groups)

  # We clean up and log information.
  if (verbose == TRUE) {
    message(paste0("[", Sys.time(), "] QC analysis performed (preprocessed data)."))
  }
}