run_quality_control_on_raw_affymetrix.R 2.87 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#' @title Executes a quality control of a given Affymetrix microarray dataset (raw data).
#'
#' @description This function executes a quality control of the dataset defined by the input parameters.
#' It starts by loading the clinical data associated with the dataset, then
#' loads the raw data and last runs the quality control of the annotated data. The function assumes
#' that a folder with both the clinical data and the raw data exists (in a subfolder '/RAW/'). It
#' then creates a report that contains various quality indicators and is stored as an HTML document.
#' It does not return any value.
#'
#' Note: the function does not check for the existence of folders or files.
#'
#' @param input_data_dir A string representing the folder that contains the input data.
#' @param output_data_dir A string representing the folder that contains the output data.
#' @param compressed  A boolean representing whether the raw data are compressed or not. This is
#'  TRUE by default.
#' @param phenotype_groups A list of phenotype factor names that can be used to highlight the
#'  samples in the QC report. This is none by default.
#' @param verbose A boolean representing whether the function should display log information. This
#'  is TRUE by default.
#' @return NULL
run_quality_control_on_raw_affymetrix <- function(input_data_dir,
                                                  output_data_dir,
                                                  compressed       = TRUE,
                                                  phenotype_groups = vector(),
                                                  verbose          = TRUE) {

  # We define the I/Os.
  raw_data_input_dir <- paste0(input_data_dir, "RAW/")

  # We load the clinical data as to annotate the AffyBatch object and make QC more useful.
  pheno_data <- ArrayUtils::load_clinical_data(input_data_dir, verbose = verbose)

  # We load the CEL files to create the affyBatch object and then attach the clinical data.
  raw_file_list <- affy::list.celfiles(raw_data_input_dir, full.names = TRUE)
  batch <- affy::ReadAffy(filenames = raw_file_list, compress = compressed, verbose = verbose)
  Biobase::phenoData(batch) <- pheno_data

  # We clean up and log information.
  rm(raw_file_list, pheno_data, raw_data_input_dir)
  if (verbose == TRUE) {
    message(paste0("[", Sys.time(), "] Expression data read."))
  }

  # We run the quality control itself.
  arrayQualityMetrics::arrayQualityMetrics(expressionset   = batch,
                                           outdir          = output_data_dir,
                                           force           = TRUE,
                                           do.logtransform = TRUE,
                                           intgroup        = phenotype_groups)

  # We clean up and log information.
  rm(batch)
  if (verbose == TRUE) {
    message(paste0("[", Sys.time(), "] QC analysis performed (raw data)."))
  }
}