Commit 2844b9fb authored by Leon-Charles Tranchevent's avatar Leon-Charles Tranchevent
Browse files

Included a patient effect in the limma analysis for the GSE8397-Moran dataset.

parent 84272dc8
local_data_dir: !!str '01/'
local_code_dir: !!str '01-Quality_control/'
datasets:
# dataset_name compressed clinical.descriptors.
- ['GSE20141', TRUE, 'D']
- ['GSE20163', TRUE, 'D']
- ['GSE20164', TRUE, 'DG']
......
local_data_dir: !!str '02/'
local_code_dir: !!str '02-Preprocessing/'
datasets:
# dataset_name clinical.descriptors perform.batch.effect.correction
- ['GSE20141', 'D', FALSE]
- ['GSE20163', 'D', FALSE]
- ['GSE20164', 'DG', FALSE]
......
......@@ -2,6 +2,7 @@ local_input_data_dir: !!str '02/'
local_data_dir: !!str '03/'
local_code_dir: !!str '03-Predict_gender/'
datasets:
# dataset_name compressed array_plateform
- ['GSE20141', TRUE, 'HGU133Plus2']
- ['GSE20163', TRUE, 'HGU133A']
- ['GSE20164', TRUE, 'HGU133A']
......
......@@ -2,6 +2,7 @@ local_input_data_dir: !!str '02/'
local_data_dir: !!str '04/'
local_code_dir: !!str '04-Clean_datasets/'
datasets:
# dataset_name samples.to.delete samples.to.update
- ['GSE20141', 'GSM503950_C_1074p_SNc.CEL.gz,GSM503951_C_1271p_SNc.CEL.gz,GSM503952_C_2829_SNc.CEL.gz,GSM503956_C_3603_SNc.CEL.gz,GSM503959_PD_1401_SNc.CEL.gz,GSM503966_PD_5138_SNc.CEL.gz', '']
- ['GSE20163', '', 'GSM506004.CEL;Gender;M,GSM506010.CEL;Gender;F,GSM506011.CEL;Gender;M,GSM506012.CEL;Gender;F']
- ['GSE20164', 'GSM506020.CEL.gz', '']
......
......@@ -7,5 +7,3 @@ clean_outputs:
@rm -rf ${OUTPUT_FOLDER}/*
run:
@Rscript --vanilla ${CODE_FOLDER}/get_DEGs.R > ${OUTPUT_FOLDER}/run_log.out 2> ${OUTPUT_FOLDER}/run_log.err
test:
@Rscript --vanilla ${CODE_FOLDER}/get_DEGs.R GSE20141 > ${OUTPUT_FOLDER}/run_log.out 2> ${OUTPUT_FOLDER}/run_log.err
......@@ -12,6 +12,7 @@ library("hgu133a.db")
library("u133x3p.db")
library("AnnotationDbi")
library("tidyverse")
library("statmod")
message(paste0("[", Sys.time(), "] Libraries loaded."))
# ================================================================================================
......@@ -199,12 +200,23 @@ for (i in 1:length(local_config$datasets)) {
levels = c("F.Control", "F.PD", "M.Control", "M.PD"))
# We create the design matrices themselves from the clinical matrices.
# Regular cases by default (no block)
design_gender <- stats::model.matrix(~ 0 + clinical_gender)
colnames(design_gender) <- levels(clinical_gender)
design_disease_status <- stats::model.matrix(~ 0 + clinical_disease_status)
colnames(design_disease_status) <- levels(clinical_disease_status)
design_gender_disease_status <- stats::model.matrix(~ 0 + clinical_gender_disease_status)
colnames(design_gender_disease_status) <- levels(clinical_gender_disease_status)
# If we need a block we create a more complex design (so far, GSE8397-moran only).
if (as.logical(dataset_config[4]) == TRUE) {
design_gender <- stats::model.matrix(~ 0 + clinical_gender + clinical$Tissue)
colnames(design_gender) <- c(levels(clinical_gender), "Tissue")
design_disease_status <- stats::model.matrix(~ 0 + clinical_disease_status + clinical$Tissue)
colnames(design_disease_status) <- c(levels(clinical_disease_status), "Tissue")
design_gender_disease_status <- stats::model.matrix(~ 0 + clinical_gender_disease_status +
clinical$Tissue)
colnames(design_gender_disease_status) <- c(levels(clinical_gender_disease_status), "Tissue")
}
# We then create the contrast matrices for limma.
contrast_gender <- limma::makeContrasts(Female_vs_Male = F - M, levels = design_gender)
......@@ -224,12 +236,25 @@ for (i in 1:length(local_config$datasets)) {
# We fit the data using the above models and use the contrats
# to prepare the differential analyses.
# TODO: Important parameter: correlation when some samples are paired (moran (GSE8397):
# TODO: Important parameter: block/correlation when some samples are paired (moran (GSE8397):
# same patients and diff tissues) (p98-116)
# TODO: Important parameter: method = ls or robust (?)
# Default to no block.
lmfit_gender <- limma::lmFit(exprs_eset, design_gender)
lmfit_disease_status <- limma::lmFit(exprs_eset, design_disease_status)
lmfit_gender_disease_status <- limma::lmFit(exprs_eset, design_gender_disease_status)
# If we need a block we create it (so far, GSE8397-moran only).
if (as.logical(dataset_config[4]) == TRUE) {
corfit <- limma::duplicateCorrelation(exprs_eset, design_gender, block = clinical$Patient)
lmfit_gender <- limma::lmFit(exprs_eset, design_gender,
block = clinical$Patient, correlation = corfit$consensus)
lmfit_disease_status <- limma::lmFit(exprs_eset, design_disease_status,
block = clinical$Patient,
correlation = corfit$consensus)
lmfit_gender_disease_status <- limma::lmFit(exprs_eset, design_gender_disease_status,
block = clinical$Patient,
correlation = corfit$consensus)
}
fit_gender <- contrasts.fit(lmfit_gender, contrast_gender)
fit_disease_status <- contrasts.fit(lmfit_disease_status, contrast_disease_status)
if (as.logical(dataset_config[3]) == TRUE) {
......@@ -469,7 +494,7 @@ for (i in 1:length(local_config$datasets)) {
file = table_gender_disease_status_fn, sep = "\t",
quote = FALSE, col.names = NA)
table_gender_patients_fn <- paste0(output_data_dir, dataset_config[1],
"_toptable_dgender_patients.tsv")
"_toptable_gender_patients.tsv")
write.table(table_gender_patients,
file = table_gender_patients_fn, sep = "\t",
quote = FALSE, col.names = NA)
......
local_input_data_dir: !!str '04/'
local_data_dir: !!str '05/'
local_code_dir: !!str '05-Get_DEGs/'
# Name Plateform Has.F.PD Has.pairs
datasets:
- ['GSE20141', 'HGU133Plus2', TRUE]
- ['GSE20163', 'HGU133A', TRUE]
- ['GSE20164', 'HGU133A', TRUE]
- ['GSE20292', 'HGU133A', TRUE]
- ['GSE7307', 'HGU133Plus2', FALSE]
- ['GSE7621', 'HGU133Plus2', TRUE]
- ['GSE8397', 'HGU133A', TRUE]
- ['Simunovic', 'HGU133A', TRUE]
- ['E-MEXP-1416', 'U133_X3P', TRUE]
- ['HG-U133A', 'HGU133A', TRUE]
- ['HG-U133_Plus_2', 'HGU133Plus2', TRUE]
- ['GSE20141', 'HGU133Plus2', TRUE, FALSE]
- ['GSE20163', 'HGU133A', TRUE, FALSE]
- ['GSE20164', 'HGU133A', TRUE, FALSE]
- ['GSE20292', 'HGU133A', TRUE, FALSE]
- ['GSE7307', 'HGU133Plus2', FALSE, FALSE]
- ['GSE7621', 'HGU133Plus2', TRUE, FALSE]
- ['GSE8397', 'HGU133A', TRUE, TRUE]
- ['Simunovic', 'HGU133A', TRUE, FALSE]
- ['E-MEXP-1416', 'U133_X3P', TRUE, FALSE]
- ['HG-U133A', 'HGU133A', TRUE, FALSE]
- ['HG-U133_Plus_2', 'HGU133Plus2', TRUE, FALSE]
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment