Commit 87e61139 authored by Leon-Charles Tranchevent's avatar Leon-Charles Tranchevent
Browse files

Added back the support for Fisher enrichment.

parent 31f6e4e4
......@@ -353,21 +353,17 @@ for (m in seq_len(length(datasets))) {
} # End for each dataset.
rm(m)
# Actual plotting of the selected genes (from the local configuration file).
##gene <- "DENR"
##gene_datasets <- c("GSE20163", "GSE20292", "GSE26927", "GSE49036", "GSE8397", "Moreira", "NBB")
#gene <- "ABCA8"
#gene_datasets <- c("GSE20159", "GSE20163", "GSE20164", "GSE20292", "GSE26927", "GSE49036", "GSE8397", "NBB")
#gene <- "NLRC5"
#gene_datasets <- c("ALL", "GSE20159", "GSE26927", "GSE49036", "NBB")
#for (gene in config$genes_to_plot) {#}
gene <- "XIST"
gene_datasets <- c("GSE20159", "GSE20163", "GSE20292", "GSE26927", "GSE49036", "GSE8397", "NBB")
plot_gene(gene = gene,
gene_datasets = gene_datasets,
# Actual plotting of the selected genes.
common_datasets <- c("GSE20159", "GSE20163", "GSE20292", "GSE26927", "GSE49036", "GSE8397", "NBB")
top_genes <- c("H2AC6", "GDPD5", "POGK", "CA2", "SALL1", "SGSH", "CX3CL1", "CXCR4", "TMEM61",
"SOD2", "DENR", "TH", "NR4A2", "NELL2", "SDC1", "CBLN1", "VAV3", "LMO3",
"FAM189A1", "DYNC1LI1", "EFNA1", "NDUFA10", "MCM3AP", "IFITM2", "MAPK1",
"ID3", "ECE1", "GRHL3", "RGS2", "YBX3")
for (gene in top_genes) {
plotfilename <- paste0(output_data_dir, "SNage_VSN_PDVsControl_females_max-avg_", gene, ".png")
png(plotfilename)
plot_gene(gene = gene,
gene_datasets = common_datasets,
output_data_dir = output_data_dir,
integration_name = integration$name,
limma_analysis = config$limma_analyses[[5]],
......@@ -375,8 +371,11 @@ plot_gene(gene = gene,
palette = current_plt,
clin_data_all, exp_data_all, zexp_data_all, m_indx_map, config,
plot_all = FALSE, plot_datasets = TRUE)
plot_gene(gene = gene,
gene_datasets = gene_datasets,
dev.off()
plotfilename <- paste0(output_data_dir, "SNage_VSN_PDVsControl_males_max-avg_", gene, ".png")
png(plotfilename)
plot_gene(gene = gene,
gene_datasets = common_datasets,
output_data_dir = output_data_dir,
integration_name = integration$name,
limma_analysis = config$limma_analyses[[6]],
......@@ -384,146 +383,13 @@ plot_gene(gene = gene,
palette = current_plt,
clin_data_all, exp_data_all, zexp_data_all, m_indx_map, config,
plot_all = FALSE, plot_datasets = TRUE)
plot_gene(gene = gene,
gene_datasets = gene_datasets,
output_data_dir = output_data_dir,
integration_name = integration$name,
limma_analysis = config$limma_analyses[[7]],
selection_name = selection$name,
palette = current_plt,
clin_data_all, exp_data_all, zexp_data_all, m_indx_map, config,
plot_all = FALSE, plot_datasets = TRUE)
# FEMALE ANALYSIS
k <- 5
limma_analysis <- config$limma_analyses[[k]]
# gene <- "GRHL3"
# gene_datasets <- c("GSE26927", "GSE49036", "NBB")
# gene <- "SGSH"
# gene_datasets <- c("GSE20159", "GSE20163", "GSE20164", "GSE26927", "GSE49036", "GSE8397", "NBB")
# gene <- "CX3CL1"
# gene_datasets <- c("GSE20163", "GSE20164", "GSE26927", "GSE49036", "GSE8397", "NBB")
# gene <- "ABCA8"
# gene_datasets <- c("GSE20159", "GSE20163", "GSE20164", "GSE20292", "GSE26927", "GSE49036", "GSE8397", "NBB")
# gene <- "H2AC6"
# gene_datasets <- c("GSE20163", "GSE20164", "GSE20292", "GSE49036", "GSE8397")
# gene <- "TMEM61"
# gene_datasets <- c("GSE26927", "GSE49036", "NBB")
# gene <- "SOD2"
# gene_datasets <- c("GSE26927", "GSE49036", "NBB")
gene <- "CA2"
gene_datasets <- c("GSE20163", "GSE20164", "GSE20292", "GSE26927", "GSE49036", "GSE8397", "NBB")
plot_gene(gene = gene,
gene_datasets = gene_datasets,
output_data_dir = output_data_dir,
integration_name = integration$name,
limma_analysis = limma_analysis,
selection_name = selection$name,
palette = current_plt,
clin_data_all, exp_data_all, zexp_data_all, m_indx_map, config,
plot_all = FALSE, plot_datasets = TRUE)
#plot_all = TRUE, plot_datasets = FALSE)
gene_datasets <- c("GSE20292", "GSE8397", "GSE49036")
plot_gene(gene = gene,
gene_datasets = gene_datasets,
output_data_dir = output_data_dir,
integration_name = integration$name,
limma_analysis = limma_analysis,
selection_name = selection$name,
palette = current_plt,
clin_data_all, exp_data_all, zexp_data_all, m_indx_map, config,
plot_all = FALSE, plot_datasets = TRUE)
#plot_all = TRUE, plot_datasets = FALSE)
# MALE ANALYSIS
k <- 6
limma_analysis <- config$limma_analyses[[k]]
# gene <- "DENR"
# gene_datasets <- c("GSE20163", "GSE20292", "GSE26927", "GSE49036", "GSE8397", "Moreira", "NBB")
# gene <- "SDC1"
# gene_datasets <- c("GSE20163", "GSE20292", "GSE26927", "GSE49036", "GSE8397", "Moreira", "NBB")
# gene <- "LMO3"
# gene_datasets <- c("GSE20159", "GSE20163", "GSE20292", "GSE26927", "GSE49036", "GSE8397", "Moreira", "NBB")
# gene <- "CBLN1"
# gene_datasets <- c("GSE20163", "GSE20292", "GSE26927", "GSE49036", "GSE8397", "Moreira", "NBB")
# gene <- "SLITRK5"
# gene_datasets <- c("GSE20163", "GSE20292", "GSE26927", "GSE49036", "GSE8397", "Moreira", "NBB")
# gene <- "OSBPL10"
# gene_datasets <- c("GSE20159", "GSE20163", "GSE20292", "GSE26927", "GSE49036", "GSE8397", "Moreira", "NBB")
# gene <- "NELL2"
# gene_datasets <- c("GSE20159", "GSE20163", "GSE20292", "GSE26927", "GSE49036", "GSE8397", "Moreira", "NBB")
# gene <- "RAP1GDS1"
# gene_datasets <- c("GSE20159", "GSE20163", "GSE20292", "GSE26927", "GSE49036", "GSE8397", "Moreira", "NBB")
gene <- "NR4A2"
gene_datasets <- c("GSE20159", "GSE20163", "GSE20292", "GSE26927", "GSE49036", "GSE8397", "Moreira", "NBB")
plot_gene(gene = gene,
gene_datasets = gene_datasets,
output_data_dir = output_data_dir,
integration_name = integration$name,
limma_analysis = limma_analysis,
selection_name = selection$name,
palette = current_plt,
clin_data_all, exp_data_all, zexp_data_all, m_indx_map, config,
plot_all = FALSE, plot_datasets = TRUE)
#plot_all = TRUE, plot_datasets = FALSE)
gene_datasets <- c("GSE20292", "GSE8397", "GSE49036")
plot_gene(gene = gene,
gene_datasets = gene_datasets,
output_data_dir = output_data_dir,
integration_name = integration$name,
limma_analysis = limma_analysis,
selection_name = selection$name,
palette = current_plt,
clin_data_all, exp_data_all, zexp_data_all, m_indx_map, config,
plot_all = FALSE, plot_datasets = TRUE)
#plot_all = TRUE, plot_datasets = FALSE)
# GDS ANALYSIS
k <- 7
limma_analysis <- config$limma_analyses[[k]]
#gene <- "EFNA1"
#gene_datasets <- c("GSE20159", "GSE26927", "GSE49036", "GSE8397", "NBB")
#gene <- "TP53"
#gene_datasets <- c("GSE20163", "GSE20292", "GSE26927", "GSE49036", "GSE8397", "NBB")
#gene <- "CD93"
#gene_datasets <- c("GSE20159", "GSE20163", "GSE26927", "GSE49036", "GSE8397", "NBB")
gene <- "RFC5"
gene_datasets <- c("GSE20159", "GSE20163", "GSE20292", "GSE26927", "GSE49036", "GSE8397", "NBB")
gene <- "EFNA1"
gene_datasets <- c("GSE20159", "GSE26927", "GSE49036", "GSE8397", "NBB")
plot_gene(gene = gene,
gene_datasets = gene_datasets,
output_data_dir = output_data_dir,
integration_name = integration$name,
limma_analysis = limma_analysis,
selection_name = selection$name,
palette = current_plt,
clin_data_all, exp_data_all, zexp_data_all, m_indx_map, config,
plot_all = FALSE, plot_datasets = TRUE)
#plot_all = TRUE, plot_datasets = FALSE)
gene_datasets <- c("GSE49036", "NBB", "GSE26927")
plot_gene(gene = gene,
gene_datasets = gene_datasets,
output_data_dir = output_data_dir,
integration_name = integration$name,
limma_analysis = limma_analysis,
selection_name = selection$name,
palette = current_plt,
clin_data_all, exp_data_all, zexp_data_all, m_indx_map, config,
plot_all = FALSE, plot_datasets = TRUE)
#plot_all = TRUE, plot_datasets = FALSE)
dev.off()
rm(plotfilename)
message(paste0("[",gene, "] plotted."))
}
rm(gene, top_genes, common_datasets)
rm(selection, exp_data_all, zexp_data_all, clin_data_all, m_indx, m_indx_map)
rm(l, limma_analysis, current_plt)
rm(k, vsn)
rm(l, current_plt, vsn)
rm(j, integration, int_criteria)
rm(i, datasets)
......
......@@ -285,7 +285,7 @@ for (i in seq_len(length(config$integrations))) {
"_", selection$name, "_all_pivalue_rankings.tsv")
write.table(res_females %>%
mutate(ranking_value = pi_value) %>%
select(Gene, log_fold_change, P_value, pi_value, ranking_value) %>%
select(Gene, log_fold_change, P_value, adj_P_value, pi_value, ranking_value) %>%
arrange(desc(ranking_value)),
res_females_fn,
quote = FALSE,
......@@ -298,7 +298,7 @@ for (i in seq_len(length(config$integrations))) {
"_", selection$name, "_all_pivalue_rankings.tsv")
write.table(res_males %>%
mutate(ranking_value = pi_value) %>%
select(Gene, log_fold_change, P_value, pi_value, ranking_value) %>%
select(Gene, log_fold_change, P_value, adj_P_value, pi_value, ranking_value) %>%
arrange(desc(ranking_value)),
res_males_fn,
quote = FALSE,
......@@ -311,7 +311,7 @@ for (i in seq_len(length(config$integrations))) {
limma_analysis <- config$limma_analyses[[k]]
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_all_gdrscore_rankings.tsv")
write.table(merge(res_females %>% select(Gene, log_fold_change, P_value, pi_value),
write.table(merge(res_females %>% select(Gene, log_fold_change, P_value, adj_P_value, pi_value),
spec_females %>% mutate(ranking_value = gender_specific_score) %>%
select(Gene, ranking_value),
by = "Gene") %>% arrange(desc(ranking_value)),
......@@ -324,7 +324,7 @@ for (i in seq_len(length(config$integrations))) {
limma_analysis <- config$limma_analyses[[k]]
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_all_gdrscore_rankings.tsv")
write.table(merge(res_males %>% select(Gene, log_fold_change, P_value, pi_value),
write.table(merge(res_males %>% select(Gene, log_fold_change, P_value, adj_P_value, pi_value),
spec_males %>% mutate(ranking_value = gender_specific_score) %>%
select(Gene, ranking_value),
by = "Gene") %>% arrange(desc(ranking_value)),
......@@ -340,10 +340,10 @@ for (i in seq_len(length(config$integrations))) {
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_all_combinedscores_rankings.tsv")
n <- dim(res_females)[1]
write.table(merge(res_females %>% select(Gene, log_fold_change, P_value, pi_value),
write.table(merge(res_females %>% select(Gene, log_fold_change, P_value, adj_P_value, pi_value),
spec_females %>% select(Gene, gender_specific_score),
by = "Gene") %>% mutate(ranking_value = ((rank(pi_value) / n) + (rank(gender_specific_score) / n))) %>%
select(Gene, log_fold_change, P_value, pi_value, ranking_value) %>%
select(Gene, log_fold_change, P_value, adj_P_value, pi_value, ranking_value) %>%
arrange(desc(ranking_value)),
res_females_fn,
quote = FALSE,
......@@ -355,10 +355,10 @@ for (i in seq_len(length(config$integrations))) {
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_all_combinedscores_rankings.tsv")
n <- dim(res_males)[1]
write.table(merge(res_males %>% select(Gene, log_fold_change, P_value, pi_value),
write.table(merge(res_males %>% select(Gene, log_fold_change, P_value, adj_P_value, pi_value),
spec_males %>% select(Gene, gender_specific_score),
by = "Gene") %>% mutate(ranking_value = ((rank(pi_value) / n) + (rank(gender_specific_score) / n))) %>%
select(Gene, log_fold_change, P_value, pi_value, ranking_value) %>%
select(Gene, log_fold_change, P_value, adj_P_value, pi_value, ranking_value) %>%
arrange(desc(ranking_value)),
res_males_fn,
quote = FALSE,
......@@ -372,8 +372,8 @@ for (i in seq_len(length(config$integrations))) {
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gs_pivalue_rankings.tsv")
write.table(GS_females %>%
mutate(log_fold_change = log_fold_change.F, P_value = P_value.F, pi_value = pi_value.F) %>%
select(Gene, log_fold_change, P_value, pi_value) %>%
mutate(log_fold_change = log_fold_change.F, P_value = P_value.F, adj_P_value = adj_P_value.F, pi_value = pi_value.F) %>%
select(Gene, log_fold_change, P_value, adj_P_value, pi_value) %>%
mutate(ranking_value = pi_value) %>%
filter(!Gene %in% potGD$Gene) %>%
arrange(desc(ranking_value)),
......@@ -387,8 +387,8 @@ for (i in seq_len(length(config$integrations))) {
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gs_pivalue_rankings.tsv")
write.table(GS_males %>%
mutate(log_fold_change = log_fold_change.M, P_value = P_value.M, pi_value = pi_value.M) %>%
select(Gene, log_fold_change, P_value, pi_value) %>%
mutate(log_fold_change = log_fold_change.M, P_value = P_value.M, adj_P_value = adj_P_value.M, pi_value = pi_value.M) %>%
select(Gene, log_fold_change, P_value, adj_P_value, pi_value) %>%
mutate(ranking_value = pi_value) %>%
filter(!Gene %in% potGD$Gene) %>%
arrange(desc(ranking_value)),
......@@ -405,8 +405,8 @@ for (i in seq_len(length(config$integrations))) {
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gs_gdrscore_rankings.tsv")
write.table(merge(GS_females %>%
mutate(log_fold_change = log_fold_change.F, P_value = P_value.F, pi_value = pi_value.F) %>%
select(Gene, log_fold_change, P_value, pi_value),
mutate(log_fold_change = log_fold_change.F, P_value = P_value.F, adj_P_value = adj_P_value.F, pi_value = pi_value.F) %>%
select(Gene, log_fold_change, P_value, adj_P_value, pi_value),
spec_females %>% mutate(ranking_value = gender_specific_score) %>%
select(Gene, ranking_value),
by = "Gene") %>%
......@@ -422,8 +422,8 @@ for (i in seq_len(length(config$integrations))) {
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gs_gdrscore_rankings.tsv")
write.table(merge(GS_males %>%
mutate(log_fold_change = log_fold_change.M, P_value = P_value.M, pi_value = pi_value.M) %>%
select(Gene, log_fold_change, P_value, pi_value),
mutate(log_fold_change = log_fold_change.M, P_value = P_value.M, adj_P_value = adj_P_value.M, pi_value = pi_value.M) %>%
select(Gene, log_fold_change, P_value, adj_P_value, pi_value),
spec_males %>% mutate(ranking_value = gender_specific_score) %>%
select(Gene, ranking_value),
by = "Gene") %>%
......@@ -443,11 +443,11 @@ for (i in seq_len(length(config$integrations))) {
"_", selection$name, "_gs_combinedscores_rankings.tsv")
n <- dim(GS_females)[1]
write.table(merge(GS_females %>%
mutate(log_fold_change = log_fold_change.F, P_value = P_value.F, pi_value = pi_value.F) %>%
select(Gene, log_fold_change, P_value, pi_value),
mutate(log_fold_change = log_fold_change.F, P_value = P_value.F, adj_P_value = adj_P_value.F, pi_value = pi_value.F) %>%
select(Gene, log_fold_change, P_value, adj_P_value, pi_value),
spec_females %>% select(Gene, gender_specific_score),
by = "Gene") %>% mutate(ranking_value = ((rank(pi_value) / n) + (rank(gender_specific_score) / n))) %>%
select(Gene, log_fold_change, P_value, pi_value, ranking_value) %>%
select(Gene, log_fold_change, P_value, adj_P_value, pi_value, ranking_value) %>%
filter(!Gene %in% potGD$Gene) %>%
arrange(desc(ranking_value)),
res_females_fn,
......@@ -461,11 +461,11 @@ for (i in seq_len(length(config$integrations))) {
"_", selection$name, "_gs_combinedscores_rankings.tsv")
n <- dim(GS_males)[1]
write.table(merge(GS_males %>%
mutate(log_fold_change = log_fold_change.M, P_value = P_value.M, pi_value = pi_value.M) %>%
select(Gene, log_fold_change, P_value, pi_value),
mutate(log_fold_change = log_fold_change.M, P_value = P_value.M, adj_P_value = adj_P_value.M, pi_value = pi_value.M) %>%
select(Gene, log_fold_change, P_value, adj_P_value, pi_value),
spec_males %>% select(Gene, gender_specific_score),
by = "Gene") %>% mutate(ranking_value = ((rank(pi_value) / n) + (rank(gender_specific_score) / n))) %>%
select(Gene, log_fold_change, P_value, pi_value, ranking_value) %>%
select(Gene, log_fold_change, P_value, adj_P_value, pi_value, ranking_value) %>%
filter(!Gene %in% potGD$Gene) %>%
arrange(desc(ranking_value)),
res_males_fn,
......@@ -480,7 +480,7 @@ for (i in seq_len(length(config$integrations))) {
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gd_pivalue_rankings.tsv")
write.table(res_females %>%
select(Gene, log_fold_change, P_value, pi_value) %>%
select(Gene, log_fold_change, P_value, adj_P_value, pi_value) %>%
mutate(ranking_value = pi_value) %>%
filter(Gene %in% potGD$Gene) %>%
arrange(desc(ranking_value)),
......@@ -494,7 +494,7 @@ for (i in seq_len(length(config$integrations))) {
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gd_pivalue_rankings.tsv")
write.table(res_males %>%
select(Gene, log_fold_change, P_value, pi_value) %>%
select(Gene, log_fold_change, P_value, adj_P_value, pi_value) %>%
mutate(ranking_value = pi_value) %>%
filter(Gene %in% potGD$Gene) %>%
arrange(desc(ranking_value)),
......@@ -510,7 +510,7 @@ for (i in seq_len(length(config$integrations))) {
limma_analysis <- config$limma_analyses[[k]]
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gd_gdrscore_rankings.tsv")
write.table(merge(res_females %>% select(Gene, log_fold_change, P_value, pi_value),
write.table(merge(res_females %>% select(Gene, log_fold_change, P_value, adj_P_value, pi_value),
spec_females %>% mutate(ranking_value = gender_specific_score) %>%
select(Gene, ranking_value),
by = "Gene") %>%
......@@ -525,7 +525,7 @@ for (i in seq_len(length(config$integrations))) {
limma_analysis <- config$limma_analyses[[k]]
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gd_gdrscore_rankings.tsv")
write.table(merge(res_males %>% select(Gene, log_fold_change, P_value, pi_value),
write.table(merge(res_males %>% select(Gene, log_fold_change, P_value, adj_P_value, pi_value),
spec_males %>% mutate(ranking_value = gender_specific_score) %>%
select(Gene, ranking_value),
by = "Gene") %>%
......@@ -544,10 +544,10 @@ for (i in seq_len(length(config$integrations))) {
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gd_combinedscores_rankings.tsv")
n <- dim(res_females)[1]
write.table(merge(res_females %>% select(Gene, log_fold_change, P_value, pi_value),
write.table(merge(res_females %>% select(Gene, log_fold_change, P_value, adj_P_value, pi_value),
spec_females %>% select(Gene, gender_specific_score),
by = "Gene") %>% mutate(ranking_value = ((rank(pi_value) / n) + (rank(gender_specific_score) / n))) %>%
select(Gene, log_fold_change, P_value, pi_value, ranking_value) %>%
select(Gene, log_fold_change, P_value, adj_P_value, pi_value, ranking_value) %>%
filter(Gene %in% potGD$Gene) %>%
arrange(desc(ranking_value)),
res_females_fn,
......@@ -560,10 +560,10 @@ for (i in seq_len(length(config$integrations))) {
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gd_combinedscores_rankings.tsv")
n <- dim(res_males)[1]
write.table(merge(res_males %>% select(Gene, log_fold_change, P_value, pi_value),
write.table(merge(res_males %>% select(Gene, log_fold_change, P_value, adj_P_value, pi_value),
spec_males %>% select(Gene, gender_specific_score),
by = "Gene") %>% mutate(ranking_value = ((rank(pi_value) / n) + (rank(gender_specific_score) / n))) %>%
select(Gene, log_fold_change, P_value, pi_value, ranking_value) %>%
select(Gene, log_fold_change, P_value, adj_P_value, pi_value, ranking_value) %>%
filter(Gene %in% potGD$Gene) %>%
arrange(desc(ranking_value)),
res_males_fn,
......@@ -579,7 +579,7 @@ for (i in seq_len(length(config$integrations))) {
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gsgd_pivalue_rankings.tsv")
write.table(res_females %>%
select(Gene, log_fold_change, P_value, pi_value) %>%
select(Gene, log_fold_change, P_value, adj_P_value, pi_value) %>%
mutate(ranking_value = pi_value) %>%
filter(Gene %in% c(potGD$Gene, GS_females$Gene)) %>%
arrange(desc(ranking_value)),
......@@ -593,7 +593,7 @@ for (i in seq_len(length(config$integrations))) {
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gsgd_pivalue_rankings.tsv")
write.table(res_males %>%
select(Gene, log_fold_change, P_value, pi_value) %>%
select(Gene, log_fold_change, P_value, adj_P_value, pi_value) %>%
mutate(ranking_value = pi_value) %>%
filter(Gene %in% c(potGD$Gene, GS_males$Gene)) %>%
arrange(desc(ranking_value)),
......@@ -609,7 +609,7 @@ for (i in seq_len(length(config$integrations))) {
limma_analysis <- config$limma_analyses[[k]]
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gsgd_gdrscore_rankings.tsv")
write.table(merge(res_females %>% select(Gene, log_fold_change, P_value, pi_value),
write.table(merge(res_females %>% select(Gene, log_fold_change, P_value, adj_P_value, pi_value),
spec_females %>% mutate(ranking_value = gender_specific_score) %>%
select(Gene, ranking_value),
by = "Gene") %>%
......@@ -624,7 +624,7 @@ for (i in seq_len(length(config$integrations))) {
limma_analysis <- config$limma_analyses[[k]]
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gsgd_gdrscore_rankings.tsv")
write.table(merge(res_males %>% select(Gene, log_fold_change, P_value, pi_value),
write.table(merge(res_males %>% select(Gene, log_fold_change, P_value, adj_P_value, pi_value),
spec_males %>% mutate(ranking_value = gender_specific_score) %>%
select(Gene, ranking_value),
by = "Gene") %>%
......@@ -643,10 +643,10 @@ for (i in seq_len(length(config$integrations))) {
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gsgd_combinedscores_rankings.tsv")
n <- dim(res_females)[1]
write.table(merge(res_females %>% select(Gene, log_fold_change, P_value, pi_value),
write.table(merge(res_females %>% select(Gene, log_fold_change, P_value, adj_P_value, pi_value),
spec_females %>% select(Gene, gender_specific_score),
by = "Gene") %>% mutate(ranking_value = ((rank(pi_value) / n) + (rank(gender_specific_score) / n))) %>%
select(Gene, log_fold_change, P_value, pi_value, ranking_value) %>%
select(Gene, log_fold_change, P_value, adj_P_value, pi_value, ranking_value) %>%
filter(Gene %in% c(potGD$Gene, GS_females$Gene)) %>%
arrange(desc(ranking_value)),
res_females_fn,
......@@ -659,10 +659,10 @@ for (i in seq_len(length(config$integrations))) {
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gsgd_combinedscores_rankings.tsv")
n <- dim(res_males)[1]
write.table(merge(res_males %>% select(Gene, log_fold_change, P_value, pi_value),
write.table(merge(res_males %>% select(Gene, log_fold_change, P_value, adj_P_value, pi_value),
spec_males %>% select(Gene, gender_specific_score),
by = "Gene") %>% mutate(ranking_value = ((rank(pi_value) / n) + (rank(gender_specific_score) / n))) %>%
select(Gene, log_fold_change, P_value, pi_value, ranking_value) %>%
select(Gene, log_fold_change, P_value, adj_P_value, pi_value, ranking_value) %>%
filter(Gene %in% c(potGD$Gene, GS_males$Gene)) %>%
arrange(desc(ranking_value)),
res_males_fn,
......
......@@ -29,5 +29,8 @@ Rscript --vanilla ${CODE_FOLDER}/compute_gender_specificity.R > ${OUTPUT_FOLDER}
Rscript --vanilla ${CODE_FOLDER}/compute_dataset_overlap.R > ${OUTPUT_FOLDER}dataset_overlap_log.out 2> ${OUTPUT_FOLDER}dataset_overlap_log.err
Rscript --vanilla ${CODE_FOLDER}/merge_and_filter_rankings.R > ${OUTPUT_FOLDER}merge_filter_log.out 2> ${OUTPUT_FOLDER}merge_filter_log.err
# Additional file that contains the genes that have different probe mappings between the male and female analyses.
diff <(sort -k2,2 ${OUTPUT_FOLDER}SNage_VSN_PDVsControl_females_matching_data.tsv | cut -f 2,12,15,18,21,24) <(sort -k2,2 ${OUTPUT_FOLDER}SNage_VSN_PDVsControl_males_matching_data.tsv | cut -f 2,12,15,21,24,27) | grep '>' | cut -f 1 | sed -r "s/> //g" > ${OUTPUT_FOLDER}SNage_VSN_PDVsControl_females_vs_males_matching_data_problems.tsv
# Moving the slurm log file to data
mv ${CODE_FOLDER}/slurm-*out ${OUTPUT_FOLDER}/
......@@ -47,9 +47,11 @@ duplicate_row_first_cell <- function(row, sep = "|") {
val_2 <- rep(row[3], length(keys))
val_3 <- rep(row[4], length(keys))
val_4 <- rep(row[5], length(keys))
val_5 <- rep(row[6], length(keys))
to_ret <- cbind(Gene = keys,
log_fold_change = val_1,
P_value = val_2,
adj_P_value = val_3,
pi_value = val_3,
ranking_value = val_4)
return(to_ret)
......@@ -199,6 +201,125 @@ perform_gsea <- function(ranked_genes,
return(gsea)
}
#' @title Perform a regular enrichment analysis using Fisher's over-representation.
#'
#' @description This functions relies on clusterProfiler and the associated packages
#' to run the enrichment for a given ontology among DO, GO (all three subtypes), KEGG and
#' REACTOME. It saves the results in the provided folder and return the enrichResult object
#' for further plotting.
#'
#' @param genes The list of genes to analyse. The gene identifiers have to match what is
#' expected - be it entrezgene identifiers or gene symbols.
#' @param background The list of all genes to be used as a background for correct
#' estimation of the statistics. The genes identifiers also have to match what is expected.
#' @param type The name of the ontology to use among "DO", "GOMF", "GOBP", "GOCC",
#' "KEGG" and "REACTOME". The GO analyses relies on gene symbols, the others use entrezgene ids.
#' Default to "DO".
#' @param file_dir The folder in which the tables will be saved. Default to "".
#' @param file_prefix The string that can be used as a prefix to name the file. Default to "".
#' @param symplify A boolean indicating whether the results should be simplified (by removing
#' redundant terms). Default to TRUE.
#' @return The enrichResult object that contains the enriched terms.
perform_enrichment <- function(genes,
background,
type = "DO",
file_dir = "",
file_prefix = "",
simplify = TRUE) {
# Default case, we just set the result to NULL.
enrich <- NULL
switch(type,
DO = {
enrich <- DOSE::enrichDO(gene = genes,
universe = background,
ont = "DO",
minGSSize = config$min_gs_size,
maxGSSize = config$max_gs_size,
pvalueCutoff = 1,
qvalueCutoff = 1,
pAdjustMethod = "BH",
readable = TRUE)
},
GOMF = {
enrich <- clusterProfiler::enrichGO(gene = genes,
universe = background,
keyType = "SYMBOL",
OrgDb = org.Hs.eg.db, #nolint
ont = "MF",
minGSSize = 10,
maxGSSize = config$max_gs_size,
pvalueCutoff = 1,
qvalueCutoff = 1,
pAdjustMethod = "BH",
readable = FALSE)
},
GOBP = {
enrich <- clusterProfiler::enrichGO(gene = genes,
universe = background,
keyType = "SYMBOL",
OrgDb = org.Hs.eg.db, #nolint
ont = "BP",
minGSSize = config$min_gs_size,
maxGSSize = config$max_gs_size,
pvalueCutoff = 1,
qvalueCutoff = 1,
pAdjustMethod = "BH",
readable = FALSE)
},
GOCC = {
enrich <- clusterProfiler::enrichGO(gene = genes,
universe = background,