Commit 20994b82 authored by Leon-Charles Tranchevent's avatar Leon-Charles Tranchevent
Browse files

Small modifications to steps 01 and 02 (wrt a bug for the RNA-seq datasets and...

Small modifications to steps 01 and 02 (wrt a bug for the RNA-seq datasets and the new QOS on iris).
parent 58c4002c
......@@ -12,4 +12,4 @@ get_log:
vsn:
@sbatch ${CODE_FOLDER}stabilize_variance.sh
doc:
@sbatch ${CODE_FOLDER}doc.sh
\ No newline at end of file
@sbatch ${CODE_FOLDER}doc.sh
......@@ -92,7 +92,7 @@ for (i in seq_len(length(config$datasets))) {
row.names = 1,
quote = "",
stringsAsFactors = FALSE)
# We are not running apt-gcrma so we rely on the ArrayLib functions.
# We are not running apt-gcrma so we rely on the ArrayUtils functions.
esets <- ArrayUtils::preprocess_data(raw_data_subdir,
output_data_files,
platform = dataset$platform,
......@@ -103,7 +103,7 @@ for (i in seq_len(length(config$datasets))) {
verbose = TRUE)
rm(exprs_raw, exprs_fn)
} else {
# We are not running apt-gcrma so we rely on the ArrayLib functions.
# We are not running apt-gcrma so we rely on the ArrayUtils functions.
esets <- ArrayUtils::preprocess_data(raw_data_subdir,
output_data_files,
platform = dataset$platform,
......@@ -165,7 +165,7 @@ for (i in seq_len(length(config$datasets))) {
row.names = 1,
quote = "",
stringsAsFactors = FALSE)
# We are not running apt-gcrma so we rely on the ArrayLib functions.
# We are not running apt-gcrma so we rely on the ArrayUtils functions.
eset <- ArrayUtils::preprocess_data(raw_data_subdir,
output_data_file,
platform = dataset$platform,
......@@ -176,7 +176,7 @@ for (i in seq_len(length(config$datasets))) {
verbose = TRUE)
rm(exprs_raw, exprs_fn)
} else {
# We are not running apt-gcrma so we rely on the ArrayLib functions.
# We are not running apt-gcrma so we rely on the ArrayUtils functions.
eset <- ArrayUtils::preprocess_data(raw_data_subdir,
output_data_file,
platform = dataset$platform,
......
......@@ -47,7 +47,7 @@ do
then
apt_cmd="${apt_script} -a ${apt_script_method} -d ${global_raw_data_dir}Platforms/${cdfName} -o ${OUTPUT_FOLDER}apt_gcrma_temp ${global_raw_data_dir}${datasetName}/RAW/*"
else
apt_cmd="${apt_script} -a ${apt_script_method} -d ${global_raw_data_dir}Platforms/${cdfName} -m ${mpsName} -o ${OUTPUT_FOLDER}apt_gcrma_temp ${global_raw_data_dir}${datasetName}/RAW/*"
apt_cmd="${apt_script} -a ${apt_script_method} -d ${global_raw_data_dir}Platforms/${cdfName} -m ${global_raw_data_dir}Platforms/${mpsName} -o ${OUTPUT_FOLDER}apt_gcrma_temp ${global_raw_data_dir}${datasetName}/RAW/*"
fi
# We run the APT command and we rename / copy the result file to the real apt folder.
......
......@@ -48,7 +48,7 @@ message(paste0("[", Sys.time(), "] Configuration done."))
run_vsn <- function(output_data_subdir, dataset_name, normalization_name, batchcorrection_tag) {
# We read the expression data.
input_file_name <- paste0(output_data_subdir, dataset_name, "_normalized_",
normalization$name, "_", batchcorrection_tag, ".tsv")
normalization_name, "_", batchcorrection_tag, ".tsv")
exp_data <- as.matrix(read.delim(input_file_name, row.names = 1))
exp_data_min <- min(min(exp_data))
......@@ -62,12 +62,12 @@ run_vsn <- function(output_data_subdir, dataset_name, normalization_name, batchc
# We plot the mean vs sdev before variance stabilization.
output_file_name <- paste0(output_data_subdir, dataset_name, "_normalized_",
normalization$name, "_", batchcorrection_tag, "_meansd_ranks.png")
normalization_name, "_", batchcorrection_tag, "_meansd_ranks.png")
png(output_file_name)
vsn::meanSdPlot(exp_data)
dev.off()
output_file_name <- paste0(output_data_subdir, dataset_name, "_normalized_",
normalization$name, "_", batchcorrection_tag, "_meansd_vals.png")
normalization_name, "_", batchcorrection_tag, "_meansd_vals.png")
png(output_file_name)
vsn::meanSdPlot(exp_data, ranks = FALSE)
dev.off()
......@@ -110,12 +110,12 @@ run_vsn <- function(output_data_subdir, dataset_name, normalization_name, batchc
# We plot the mean vs sdev after variance stabilization.
output_file_name <- paste0(output_data_subdir, dataset_name, "_normalized_",
normalization$name, "_", batchcorrection_tag, "_meansd_ranks_vsn.png")
normalization_name, "_", batchcorrection_tag, "_meansd_ranks_vsn.png")
png(output_file_name)
vsn::meanSdPlot(exp_data_vsn)
dev.off()
output_file_name <- paste0(output_data_subdir, dataset_name, "_normalized_",
normalization$name, "_", batchcorrection_tag, "_meansd_vals_vsn.png")
normalization_name, "_", batchcorrection_tag, "_meansd_vals_vsn.png")
png(output_file_name)
vsn::meanSdPlot(exp_data_vsn, ranks = FALSE)
dev.off()
......@@ -129,7 +129,7 @@ run_vsn <- function(output_data_subdir, dataset_name, normalization_name, batchc
# Scatter plot of the processed data versus the scaled vsn data.
output_file_name <- paste0(output_data_subdir, dataset_name, "_normalized_",
normalization$name, "_", batchcorrection_tag, "_data_vs_vsn.png")
normalization_name, "_", batchcorrection_tag, "_data_vs_vsn.png")
png(output_file_name)
plot(exp_data, exp_data_vsn_scl)
dev.off()
......@@ -137,7 +137,7 @@ run_vsn <- function(output_data_subdir, dataset_name, normalization_name, batchc
# We save the data.
output_file_name <- paste0(output_data_subdir, dataset_name, "_normalized_",
normalization$name, "_", batchcorrection_tag, "_vsn.tsv")
normalization_name, "_", batchcorrection_tag, "_vsn.tsv")
utils::write.table(exp_data_vsn_scl, file = output_file_name, sep = "\t", quote = FALSE)
rm(scaling_factor, exp_data_min, exp_data_max)
rm(exp_data_vsn, exp_data_vsn_scl, output_file_name)
......
......@@ -7,12 +7,12 @@ seed: 43633773
normalizations:
-
name: normS00
platforms: ["Affymetrix", "Agilent", "Illumina"]
methods: ["SCAN", "" ,""]
platforms: ["Affymetrix", "Agilent", "Illumina", "RNAseq"]
methods: ["SCAN", "", "", ""]
-
name: normG00
platforms: ["Affymetrix", "Agilent", "Illumina"]
methods: ["APT-GCRMA", "" ,""]
platforms: ["Affymetrix", "Agilent", "Illumina", "RNAseq"]
methods: ["APT-GCRMA", "", "", ""]
apt_script: "/home/users/ltranchevent/Software/apt-2.10.2.2-x86_64-intel-linux/bin/apt-probeset-summarize"
apt_script_method: "gc-correction,scale-intensities.ceiling=65536.high_pct=0.999,rma-bg,quant-norm.sketch=0.usepm=true.bioc=true,pm-only,med-polish"
# VSN
......
......@@ -8,7 +8,7 @@ The idea is to study several expression datasets (both microarray- and sequencin
The original data have been extracted from GEO or were produced in-house. Details can be found in the configuration files (for instance ./Confs/datasets_config.yml).
## Prerequisites
Most of the code is currently composed of R and bash scripts. Makefiles are used to store the main commands. Steps are numbered and can be run sequentially using the dedicated Makefiles (more details are indicated in the dedicated README files). The code has been tested on my local machine, and then run on the iris cluster (excepted one job which has to run on frodo since it relies on a tool that does not run on iris). Note that this project relies on various R packages including BioConductor, Affy, SCAN.UPC, arrayQualityMetrics, limma, tidyverse as well as the ArrayUtils set of function.
Most of the code is currently composed of R and bash scripts. Makefiles are used to store the main commands. Steps are numbered and can be run sequentially using the dedicated Makefiles (more details are indicated in the dedicated README files). The code has been tested on my local machine, and then run on the iris cluster (excepted one job which has to run on frodo since it relies on a tool that does not run on iris). Note that this project relies on various R packages including BioConductor, Affy, SCAN.UPC, arrayQualityMetrics, limma, tidyverse as well as the ArrayUtils set of functions.
## Authors
* **Léon-Charles Tranchevent**
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment