Commit 31f6e4e4 authored by Leon-Charles Tranchevent's avatar Leon-Charles Tranchevent
Browse files

New ranking files for GSEA (all, GS, GD, GSGD).

parent a7976e94
......@@ -62,7 +62,7 @@ overlap_threshold <- 0.5
for (i in seq_len(length(config$integrations))) {
# We get the integration name for that scheme.
integration <- config$integrations[[i]]
integration <- config$integrations[[i]]
int_criteria <- str_split(integration$criteria, ",")[[1]]
# Default parameters for the VSN and probe selection method.
......@@ -246,7 +246,7 @@ for (i in seq_len(length(config$integrations))) {
quote = FALSE,
sep = "\t",
row.names = FALSE)
rm(GS_females_fn,GS_females, k, limma_analysis)
rm(GS_females_fn, k, limma_analysis)
# And the male gender-specific genes.
k <- 6
......@@ -257,7 +257,7 @@ for (i in seq_len(length(config$integrations))) {
quote = FALSE,
sep = "\t",
row.names = FALSE)
rm(GS_males_fn, GS_males, k, limma_analysis)
rm(GS_males_fn, k, limma_analysis)
# The gender-dimorphic genes.
k <- 7
......@@ -277,12 +277,12 @@ for (i in seq_len(length(config$integrations))) {
# are generated depending on the metrics used to rank the genes (pi values, gender specificity
# scores, or another metrics that combine both) and on the genes that are ranked (only the
# gender-specific genes, only the gender-dimorphic genes, or both).
# We start with the rankings based on pi values, for all genes.
k <- 5
limma_analysis <- config$limma_analyses[[k]]
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gsgd_pivalue_rankings.tsv")
"_", selection$name, "_all_pivalue_rankings.tsv")
write.table(res_females %>%
mutate(ranking_value = pi_value) %>%
select(Gene, log_fold_change, P_value, pi_value, ranking_value) %>%
......@@ -295,7 +295,7 @@ for (i in seq_len(length(config$integrations))) {
k <- 6
limma_analysis <- config$limma_analyses[[k]]
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gsgd_pivalue_rankings.tsv")
"_", selection$name, "_all_pivalue_rankings.tsv")
write.table(res_males %>%
mutate(ranking_value = pi_value) %>%
select(Gene, log_fold_change, P_value, pi_value, ranking_value) %>%
......@@ -305,12 +305,12 @@ for (i in seq_len(length(config$integrations))) {
sep = "\t",
row.names = FALSE)
rm(res_males_fn, k, limma_analysis)
# We continue with the rankings based on gender-specificity scores, for all genes.
k <- 5
limma_analysis <- config$limma_analyses[[k]]
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gsgd_gdrscore_rankings.tsv")
"_", selection$name, "_all_gdrscore_rankings.tsv")
write.table(merge(res_females %>% select(Gene, log_fold_change, P_value, pi_value),
spec_females %>% mutate(ranking_value = gender_specific_score) %>%
select(Gene, ranking_value),
......@@ -323,7 +323,7 @@ for (i in seq_len(length(config$integrations))) {
k <- 6
limma_analysis <- config$limma_analyses[[k]]
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gsgd_gdrscore_rankings.tsv")
"_", selection$name, "_all_gdrscore_rankings.tsv")
write.table(merge(res_males %>% select(Gene, log_fold_change, P_value, pi_value),
spec_males %>% mutate(ranking_value = gender_specific_score) %>%
select(Gene, ranking_value),
......@@ -338,7 +338,7 @@ for (i in seq_len(length(config$integrations))) {
k <- 5
limma_analysis <- config$limma_analyses[[k]]
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gsgd_combinedscores_rankings.tsv")
"_", selection$name, "_all_combinedscores_rankings.tsv")
n <- dim(res_females)[1]
write.table(merge(res_females %>% select(Gene, log_fold_change, P_value, pi_value),
spec_females %>% select(Gene, gender_specific_score),
......@@ -353,7 +353,7 @@ for (i in seq_len(length(config$integrations))) {
k <- 6
limma_analysis <- config$limma_analyses[[k]]
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gsgd_combinedscores_rankings.tsv")
"_", selection$name, "_all_combinedscores_rankings.tsv")
n <- dim(res_males)[1]
write.table(merge(res_males %>% select(Gene, log_fold_change, P_value, pi_value),
spec_males %>% select(Gene, gender_specific_score),
......@@ -371,7 +371,8 @@ for (i in seq_len(length(config$integrations))) {
limma_analysis <- config$limma_analyses[[k]]
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gs_pivalue_rankings.tsv")
write.table(res_females %>%
write.table(GS_females %>%
mutate(log_fold_change = log_fold_change.F, P_value = P_value.F, pi_value = pi_value.F) %>%
select(Gene, log_fold_change, P_value, pi_value) %>%
mutate(ranking_value = pi_value) %>%
filter(!Gene %in% potGD$Gene) %>%
......@@ -385,7 +386,8 @@ for (i in seq_len(length(config$integrations))) {
limma_analysis <- config$limma_analyses[[k]]
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gs_pivalue_rankings.tsv")
write.table(res_males %>%
write.table(GS_males %>%
mutate(log_fold_change = log_fold_change.M, P_value = P_value.M, pi_value = pi_value.M) %>%
select(Gene, log_fold_change, P_value, pi_value) %>%
mutate(ranking_value = pi_value) %>%
filter(!Gene %in% potGD$Gene) %>%
......@@ -402,7 +404,9 @@ for (i in seq_len(length(config$integrations))) {
limma_analysis <- config$limma_analyses[[k]]
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gs_gdrscore_rankings.tsv")
write.table(merge(res_females %>% select(Gene, log_fold_change, P_value, pi_value),
write.table(merge(GS_females %>%
mutate(log_fold_change = log_fold_change.F, P_value = P_value.F, pi_value = pi_value.F) %>%
select(Gene, log_fold_change, P_value, pi_value),
spec_females %>% mutate(ranking_value = gender_specific_score) %>%
select(Gene, ranking_value),
by = "Gene") %>%
......@@ -417,7 +421,9 @@ for (i in seq_len(length(config$integrations))) {
limma_analysis <- config$limma_analyses[[k]]
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gs_gdrscore_rankings.tsv")
write.table(merge(res_males %>% select(Gene, log_fold_change, P_value, pi_value),
write.table(merge(GS_males %>%
mutate(log_fold_change = log_fold_change.M, P_value = P_value.M, pi_value = pi_value.M) %>%
select(Gene, log_fold_change, P_value, pi_value),
spec_males %>% mutate(ranking_value = gender_specific_score) %>%
select(Gene, ranking_value),
by = "Gene") %>%
......@@ -435,8 +441,10 @@ for (i in seq_len(length(config$integrations))) {
limma_analysis <- config$limma_analyses[[k]]
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gs_combinedscores_rankings.tsv")
n <- dim(res_females)[1]
write.table(merge(res_females %>% select(Gene, log_fold_change, P_value, pi_value),
n <- dim(GS_females)[1]
write.table(merge(GS_females %>%
mutate(log_fold_change = log_fold_change.F, P_value = P_value.F, pi_value = pi_value.F) %>%
select(Gene, log_fold_change, P_value, pi_value),
spec_females %>% select(Gene, gender_specific_score),
by = "Gene") %>% mutate(ranking_value = ((rank(pi_value) / n) + (rank(gender_specific_score) / n))) %>%
select(Gene, log_fold_change, P_value, pi_value, ranking_value) %>%
......@@ -451,8 +459,10 @@ for (i in seq_len(length(config$integrations))) {
limma_analysis <- config$limma_analyses[[k]]
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gs_combinedscores_rankings.tsv")
n <- dim(res_males)[1]
write.table(merge(res_males %>% select(Gene, log_fold_change, P_value, pi_value),
n <- dim(GS_males)[1]
write.table(merge(GS_males %>%
mutate(log_fold_change = log_fold_change.M, P_value = P_value.M, pi_value = pi_value.M) %>%
select(Gene, log_fold_change, P_value, pi_value),
spec_males %>% select(Gene, gender_specific_score),
by = "Gene") %>% mutate(ranking_value = ((rank(pi_value) / n) + (rank(gender_specific_score) / n))) %>%
select(Gene, log_fold_change, P_value, pi_value, ranking_value) %>%
......@@ -493,7 +503,7 @@ for (i in seq_len(length(config$integrations))) {
sep = "\t",
row.names = FALSE)
rm(res_males_fn, k, limma_analysis)
# We continue with the rankings based on gender-specificity scores, for the gender-dimorphic
# genes only.
k <- 5
......@@ -561,6 +571,106 @@ for (i in seq_len(length(config$integrations))) {
sep = "\t",
row.names = FALSE)
rm(res_males_fn, k, limma_analysis, n)
# We continue with the rankings based on pi values, for the gender-specific and
# gender-dimorphic genes only.
k <- 5
limma_analysis <- config$limma_analyses[[k]]
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gsgd_pivalue_rankings.tsv")
write.table(res_females %>%
select(Gene, log_fold_change, P_value, pi_value) %>%
mutate(ranking_value = pi_value) %>%
filter(Gene %in% c(potGD$Gene, GS_females$Gene)) %>%
arrange(desc(ranking_value)),
res_females_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
rm(res_females_fn, k, limma_analysis)
k <- 6
limma_analysis <- config$limma_analyses[[k]]
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gsgd_pivalue_rankings.tsv")
write.table(res_males %>%
select(Gene, log_fold_change, P_value, pi_value) %>%
mutate(ranking_value = pi_value) %>%
filter(Gene %in% c(potGD$Gene, GS_males$Gene)) %>%
arrange(desc(ranking_value)),
res_males_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
rm(res_males_fn, k, limma_analysis)
# We continue with the rankings based on gender-specificity scores, for the gender-specific and
# gender-dimorphic genes only.
k <- 5
limma_analysis <- config$limma_analyses[[k]]
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gsgd_gdrscore_rankings.tsv")
write.table(merge(res_females %>% select(Gene, log_fold_change, P_value, pi_value),
spec_females %>% mutate(ranking_value = gender_specific_score) %>%
select(Gene, ranking_value),
by = "Gene") %>%
filter(Gene %in% c(potGD$Gene, GS_females$Gene)) %>%
arrange(desc(ranking_value)),
res_females_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
rm(res_females_fn, k, limma_analysis)
k <- 6
limma_analysis <- config$limma_analyses[[k]]
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gsgd_gdrscore_rankings.tsv")
write.table(merge(res_males %>% select(Gene, log_fold_change, P_value, pi_value),
spec_males %>% mutate(ranking_value = gender_specific_score) %>%
select(Gene, ranking_value),
by = "Gene") %>%
filter(Gene %in% c(potGD$Gene, GS_males$Gene)) %>%
arrange(desc(ranking_value)),
res_males_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
rm(res_males_fn, k, limma_analysis)
# We continue with the rankings based on both metrics, for the gender-specific and
# gender-dimorphic genes only.
k <- 5
limma_analysis <- config$limma_analyses[[k]]
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gsgd_combinedscores_rankings.tsv")
n <- dim(res_females)[1]
write.table(merge(res_females %>% select(Gene, log_fold_change, P_value, pi_value),
spec_females %>% select(Gene, gender_specific_score),
by = "Gene") %>% mutate(ranking_value = ((rank(pi_value) / n) + (rank(gender_specific_score) / n))) %>%
select(Gene, log_fold_change, P_value, pi_value, ranking_value) %>%
filter(Gene %in% c(potGD$Gene, GS_females$Gene)) %>%
arrange(desc(ranking_value)),
res_females_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
rm(res_females_fn, k, limma_analysis, n)
k <- 6
limma_analysis <- config$limma_analyses[[k]]
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gsgd_combinedscores_rankings.tsv")
n <- dim(res_males)[1]
write.table(merge(res_males %>% select(Gene, log_fold_change, P_value, pi_value),
spec_males %>% select(Gene, gender_specific_score),
by = "Gene") %>% mutate(ranking_value = ((rank(pi_value) / n) + (rank(gender_specific_score) / n))) %>%
select(Gene, log_fold_change, P_value, pi_value, ranking_value) %>%
filter(Gene %in% c(potGD$Gene, GS_males$Gene)) %>%
arrange(desc(ranking_value)),
res_males_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
rm(res_males_fn, k, limma_analysis, n)
rm(GS_females, GS_males)
rm(potGD, res_females, res_males, spec_females, spec_males)
rm(l, selection, vsn, j, integration, int_criteria)
} # End for each integration scheme.
......
#!/usr/bin/env Rscript
# ================================================================================================
# Libraries
# ================================================================================================
library("yaml")
library("tidyverse")
library("stringr")
source("../libs/conf/confR.R")
source("../libs/utils/utils.R")
message(paste0("[", Sys.time(), "] Libraries loaded."))
# ================================================================================================
# Configuration
# ================================================================================================
options(bitmapType = "cairo")
config <- read_config(config_dirs = c("../Confs/", "./"))
output_data_dir <- paste0(config$global_data_dir, config$local_data_dir)
input_data_dir <- paste0(config$global_data_dir, config$local_input_data_dir)
input_edata_dir <- paste0(config$global_data_dir, config$local_exp_data_dir)
input_rsedata_dir <- paste0(config$global_data_dir, config$local_rs_exp_data_dir)
message(paste0("[", Sys.time(), "] Configuration done."))
# ================================================================================================
# Functions
# ================================================================================================
# ================================================================================================
# Main
# ================================================================================================
# The aim of this script is to take the results of the integration and create gene lists that can
# be further analysed by GSEA and network-based methods. Several filters are applied and new
# scores / metrics are also computed. The general idea is to define the gender-specific and
# gender-dimorphic genes that make sense while keeping the workfow as simple as possible.
# Filtering parameters
specificity_threshold <- 1.1
overlap_threshold <- 0.5
# We focus on one configuration, our main configuration (SNage, VSN, max-avg) but we also
# run the other integration schemes (SN and DA) with the same default parameters.
for (i in seq_len(length(config$integrations))) {
# We get the integration name for that scheme.
integration <- config$integrations[[i]]
int_criteria <- str_split(integration$criteria, ",")[[1]]
# Default parameters for the VSN and probe selection method.
j <- 1
vsn <- config$variance_methods[[j]]
l <- 1
selection <- config$selections[[l]]
#################################################################################################
# A: reading the results of the previous steps.
#################################################################################################
# We first read the results of the integration of the female analyses.
k <- 5
limma_analysis <- config$limma_analyses[[k]]
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_",
limma_analysis$name, "_", selection$name, "_integration.tsv")
res_females <- read.delim(res_females_fn,
row.names = 1,
quote = "",
stringsAsFactors = FALSE)
rm(res_females_fn)
# We then read the gender-specificity scores for the female analysis.
spec_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_",
limma_analysis$name, "_", selection$name, "_genderspecificityscore.tsv")
spec_females <- read.delim(spec_females_fn,
row.names = 1,
quote = "",
stringsAsFactors = FALSE)
rm(spec_females_fn)
rm(k, limma_analysis)
# We repeat the same procedure for the results of the integration of the males analyses.
k <- 6
limma_analysis <- config$limma_analyses[[k]]
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_",
limma_analysis$name, "_", selection$name, "_integration.tsv")
res_males <- read.delim(res_males_fn,
row.names = 1,
quote = "",
stringsAsFactors = FALSE)
rm(res_males_fn)
# And the gender-specificity scores for the female analysis.
spec_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_",
limma_analysis$name, "_", selection$name, "_genderspecificityscore.tsv")
spec_males <- read.delim(spec_males_fn,
row.names = 1,
quote = "",
stringsAsFactors = FALSE)
rm(spec_males_fn)
rm(k, limma_analysis)
# We merge the female and male results as to identify the genes that are potentially gender-specific
# and the ones that are potentially gender-dimorphic. This decision is based on the log fold-changes.
res_FM <- merge(x = res_females, y = res_males, by = "SYMBOL", all = TRUE, suffixes = c(".F",".M"))
rm(res_females, res_males)
# The common potential gender-specific genes are present in both the female and male analyses,
# have the same regulation (up/down).
common_potGS <- res_FM %>% filter(!is.na(wavg_csss_logFC.F), !is.na(wavg_csss_logFC.M)) %>%
filter(sign(wavg_csss_logFC.F) == sign(wavg_csss_logFC.M))
# The unique potential gender-specific genes are present in only one of the two analyses (either
# female or male) and are de facto considered as gender-specific.
unique_females_potGS <- res_FM %>% filter(is.na(wavg_csss_logFC.M))
unique_males_potGS <- res_FM %>% filter(is.na(wavg_csss_logFC.F))
# The potential gender-dimorphic genes are present in both analyses but with a different
# regulation (up/down).
potGD <- res_FM %>% filter(!is.na(wavg_csss_logFC.F), !is.na(wavg_csss_logFC.M)) %>%
filter(sign(wavg_csss_logFC.F) != sign(wavg_csss_logFC.M))
rm(res_FM)
# We then need to refine the potential gender-specific and gender-dimorphic genes based on various
# criteria. We start with the gender-specific genes and more precisely with the common potential
# gender-specific genes.
# First, we aim at identifying the genes that are really specific (i.e., whose signal is really
# different between females and males). For this, we use the gender-specificity scores computed
# in a previous step. In addition, we request the absolute LFC to be higher for the gender of
# specificity.
common_females_potGS <- merge(common_potGS,
spec_females %>% select(Gene, gender_specific_score),
by.x = "SYMBOL", by.y = "Gene", all.x = TRUE) %>%
filter(gender_specific_score >= specificity_threshold) %>%
filter(abs(wavg_csss_logFC.F) >= abs(wavg_csss_logFC.M))
common_males_potGS <- merge(common_potGS,
spec_males %>% select(Gene, gender_specific_score),
by.x = "SYMBOL", by.y = "Gene", all.x = TRUE) %>%
filter(gender_specific_score >= specificity_threshold) %>%
filter(abs(wavg_csss_logFC.M) >= abs(wavg_csss_logFC.F))
rm(common_potGS, spec_males, spec_females)
# We check that no gene is in common between common_females_potGS and common_males_potGS.
# This could happen if the gender specificity scores are not computed correctly.
m <- paste0("Genes specific for both males and females: ",
dim(merge(common_females_potGS, common_males_potGS, by = "SYMBOL"))[1])
message(paste0("[", Sys.time(), "] ", m, "."))
rm(m)
# The unique_females_potGS and unique_males_potGS do not need any filtering since
# the signal is present in only one gender to start with.
# Therefore, we can merge the common and unique lists for each gender to obtain the
# gender-specific genes.
GS_females <- rbind(common_females_potGS, unique_females_potGS %>%
mutate(gender_specific_score = NA))
GS_males <- rbind(common_males_potGS, unique_males_potGS %>%
mutate(gender_specific_score = NA))
rm(common_females_potGS, common_males_potGS,unique_females_potGS, unique_males_potGS)
# We save the files we have generated, starting with the female gender-specific genes.
k <- 5
limma_analysis <- config$limma_analyses[[k]]
GS_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_integration_table.tsv")
write.table(GS_females, GS_females_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
rm(GS_females_fn, GS_females, k, limma_analysis)
# And the male gender-specific genes.
k <- 6
limma_analysis <- config$limma_analyses[[k]]
GS_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_integration_table.tsv")
write.table(GS_males, GS_males_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
rm(GS_males_fn, GS_males, k, limma_analysis)
# The gender-dimorphic genes.
k <- 7
limma_analysis <- config$limma_analyses[[k]]
GD_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_integration_table.tsv")
write.table(potGD, GD_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
rm(potGD, GD_fn, k, limma_analysis)
rm(integration, selection, vsn, int_criteria, j, l)
} # End for each integration scheme.
# We clean the workspace and log the session details for reproducibility.
rm(i, overlap_threshold, specificity_threshold)
rm(config, output_data_dir, input_data_dir, input_edata_dir, input_rsedata_dir)
sessionInfo()
......@@ -4,7 +4,7 @@
#SBATCH --mail-user=leon-charles.tranchevent@uni.lu
#SBATCH -N 1
#SBATCH -n 20
#SBATCH --time=0-03:00:00
#SBATCH --time=0-04:00:00
#SBATCH -p batch
#SBATCH --qos=qos-batch
......
......@@ -4,7 +4,7 @@
#SBATCH --mail-user=leon-charles.tranchevent@uni.lu
#SBATCH -N 1
#SBATCH -n 8
#SBATCH --time=0-06:00:00
#SBATCH --time=0-10:00:00
#SBATCH -p batch
#SBATCH --qos=qos-batch
......
......@@ -39,7 +39,13 @@ duplicate_row_first_cell <- function(row, sep = "|") {
keys <- strsplit(row[1], sep, fixed = TRUE)[[1]]
val_1 <- rep(row[2], length(keys))
val_2 <- rep(row[3], length(keys))
to_ret <- cbind(Gene = keys, log_fold_change = val_1, P_value = val_2)
val_3 <- rep(row[4], length(keys))
val_4 <- rep(row[5], length(keys))
to_ret <- cbind(Gene = keys,
log_fold_change = val_1,
P_value = val_2,
pi_value = val_3,
ranking_value = val_4)
return(to_ret)
}
......@@ -47,109 +53,90 @@ duplicate_row_first_cell <- function(row, sep = "|") {
# Main
# ================================================================================================
# We perform the enrichment on all rankings files.
ranking_files <- list.files(path = output_data_dir, pattern = "*rankings.tsv")
# We do all integration schemes one by one.
for (i in seq_len(length(config$integrations))) {
for (ranking_file in ranking_files) {
# We define the I/Os.
analysis_prefix <- strsplit(ranking_file, "_rankings.tsv")[[1]]
ofile_prefix <- paste0("PF_", analysis_prefix, "_")
input_filename <- paste0(output_data_dir, ranking_file)
# We get the integration name for that scheme.
integration <- config$integrations[[i]]
# We read the prepared ranking and keep only the value used to rank.
ranking <- read.delim(input_filename, stringsAsFactors = FALSE)
rm(input_filename)
# We only do the enrichment if it is necessary.
if (integration$use_for_enrichment == "FALSE") {
next
# We clean genes (no duplicate entries with pipe separated gene symbols).
# Only if necessary (that is if we have at least one pipe).
ranking_clean <- NULL
if (sum(grepl("\\|", ranking$Gene)) > 0) {
tmp_res <- t(apply(ranking, 1, duplicate_row_first_cell))
ranking_clean <- data.frame(do.call(rbind, c(tmp_res)),
row.names = NULL,
stringsAsFactors = FALSE)
rm(tmp_res)
} else {
ranking_clean <- ranking
}
# We do all limma comparisons one by one.
for (j in seq_len(length(config$limma_analyses))) {
# We extract the Limma parameters.
limma_parameters <- config$limma_analyses[[j]]
# We only do the enrichment if it is necessary.
if (limma_parameters$use_for_enrichment == "FALSE") {
ranking_clean$log_fold_change <- as.numeric(ranking_clean$log_fold_change)
ranking_clean$P_value <- as.numeric(ranking_clean$P_value)
ranking_clean$pi_value <- as.numeric(ranking_clean$pi_value)
ranking_clean$ranking_value <- as.numeric(ranking_clean$ranking_value)
rm(ranking)
# Be careful here the P values are the nominal P values not the adjusted ones.
ranking_final <- ranking_clean %>% select(Gene, log_fold_change, P_value) %>%
arrange(P_value) %>% filter(P_value < 0.05)
</