Commit a7976e94 authored by Leon-Charles Tranchevent's avatar Leon-Charles Tranchevent
Browse files

Functional enrichment of all ranking files instead of only the files of the main configuration.

parent 67e7de53
......@@ -284,7 +284,9 @@ for (i in seq_len(length(config$integrations))) {
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gsgd_pivalue_rankings.tsv")
write.table(res_females %>%
select(Gene, pi_value) %>% arrange(desc(pi_value)),
mutate(ranking_value = pi_value) %>%
select(Gene, log_fold_change, P_value, pi_value, ranking_value) %>%
arrange(desc(ranking_value)),
res_females_fn,
quote = FALSE,
sep = "\t",
......@@ -295,7 +297,9 @@ for (i in seq_len(length(config$integrations))) {
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gsgd_pivalue_rankings.tsv")
write.table(res_males %>%
select(Gene, pi_value) %>% arrange(desc(pi_value)),
mutate(ranking_value = pi_value) %>%
select(Gene, log_fold_change, P_value, pi_value, ranking_value) %>%
arrange(desc(ranking_value)),
res_males_fn,
quote = FALSE,
sep = "\t",
......@@ -307,9 +311,10 @@ for (i in seq_len(length(config$integrations))) {
limma_analysis <- config$limma_analyses[[k]]
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gsgd_gdrscore_rankings.tsv")
write.table(merge(res_females %>% select(Gene),
spec_females %>% select(Gene, gender_specific_score),
by = "Gene") %>% arrange(desc(gender_specific_score)),
write.table(merge(res_females %>% select(Gene, log_fold_change, P_value, pi_value),
spec_females %>% mutate(ranking_value = gender_specific_score) %>%
select(Gene, ranking_value),
by = "Gene") %>% arrange(desc(ranking_value)),
res_females_fn,
quote = FALSE,
sep = "\t",
......@@ -319,9 +324,10 @@ for (i in seq_len(length(config$integrations))) {
limma_analysis <- config$limma_analyses[[k]]
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gsgd_gdrscore_rankings.tsv")
write.table(merge(res_males %>% select(Gene),
spec_males %>% select(Gene, gender_specific_score),
by = "Gene") %>% arrange(desc(gender_specific_score)),
write.table(merge(res_males %>% select(Gene, log_fold_change, P_value, pi_value),
spec_males %>% mutate(ranking_value = gender_specific_score) %>%
select(Gene, ranking_value),
by = "Gene") %>% arrange(desc(ranking_value)),
res_males_fn,
quote = FALSE,
sep = "\t",
......@@ -334,11 +340,11 @@ for (i in seq_len(length(config$integrations))) {
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gsgd_combinedscores_rankings.tsv")
n <- dim(res_females)[1]
write.table(merge(res_females %>% select(Gene, pi_value),
write.table(merge(res_females %>% select(Gene, log_fold_change, P_value, pi_value),
spec_females %>% select(Gene, gender_specific_score),
by = "Gene") %>% mutate(combined_score = ((1 - rank(-pi_value) / n) + (1 - rank(-gender_specific_score) / n))) %>%
select(Gene, combined_score) %>%
arrange(desc(combined_score)),
by = "Gene") %>% mutate(ranking_value = ((rank(pi_value) / n) + (rank(gender_specific_score) / n))) %>%
select(Gene, log_fold_change, P_value, pi_value, ranking_value) %>%
arrange(desc(ranking_value)),
res_females_fn,
quote = FALSE,
sep = "\t",
......@@ -349,11 +355,11 @@ for (i in seq_len(length(config$integrations))) {
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gsgd_combinedscores_rankings.tsv")
n <- dim(res_males)[1]
write.table(merge(res_males %>% select(Gene, pi_value),
write.table(merge(res_males %>% select(Gene, log_fold_change, P_value, pi_value),
spec_males %>% select(Gene, gender_specific_score),
by = "Gene") %>% mutate(combined_score = ((1 - rank(-pi_value) / n) + (1 - rank(-gender_specific_score) / n))) %>%
select(Gene, combined_score) %>%
arrange(desc(combined_score)),
by = "Gene") %>% mutate(ranking_value = ((rank(pi_value) / n) + (rank(gender_specific_score) / n))) %>%
select(Gene, log_fold_change, P_value, pi_value, ranking_value) %>%
arrange(desc(ranking_value)),
res_males_fn,
quote = FALSE,
sep = "\t",
......@@ -366,7 +372,10 @@ for (i in seq_len(length(config$integrations))) {
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gs_pivalue_rankings.tsv")
write.table(res_females %>%
select(Gene, pi_value) %>% filter(!Gene %in% potGD$Gene) %>% arrange(desc(pi_value)),
select(Gene, log_fold_change, P_value, pi_value) %>%
mutate(ranking_value = pi_value) %>%
filter(!Gene %in% potGD$Gene) %>%
arrange(desc(ranking_value)),
res_females_fn,
quote = FALSE,
sep = "\t",
......@@ -377,7 +386,10 @@ for (i in seq_len(length(config$integrations))) {
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gs_pivalue_rankings.tsv")
write.table(res_males %>%
select(Gene, pi_value) %>% filter(!Gene %in% potGD$Gene) %>% arrange(desc(pi_value)),
select(Gene, log_fold_change, P_value, pi_value) %>%
mutate(ranking_value = pi_value) %>%
filter(!Gene %in% potGD$Gene) %>%
arrange(desc(ranking_value)),
res_males_fn,
quote = FALSE,
sep = "\t",
......@@ -390,11 +402,12 @@ for (i in seq_len(length(config$integrations))) {
limma_analysis <- config$limma_analyses[[k]]
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gs_gdrscore_rankings.tsv")
write.table(merge(res_females %>% select(Gene),
spec_females %>% select(Gene, gender_specific_score),
write.table(merge(res_females %>% select(Gene, log_fold_change, P_value, pi_value),
spec_females %>% mutate(ranking_value = gender_specific_score) %>%
select(Gene, ranking_value),
by = "Gene") %>%
filter(!Gene %in% potGD$Gene) %>%
arrange(desc(gender_specific_score)),
arrange(desc(ranking_value)),
res_females_fn,
quote = FALSE,
sep = "\t",
......@@ -404,18 +417,18 @@ for (i in seq_len(length(config$integrations))) {
limma_analysis <- config$limma_analyses[[k]]
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gs_gdrscore_rankings.tsv")
write.table(merge(res_males %>% select(Gene),
spec_males %>% select(Gene, gender_specific_score),
write.table(merge(res_males %>% select(Gene, log_fold_change, P_value, pi_value),
spec_males %>% mutate(ranking_value = gender_specific_score) %>%
select(Gene, ranking_value),
by = "Gene") %>%
filter(!Gene %in% potGD$Gene) %>%
arrange(desc(gender_specific_score)),
arrange(desc(ranking_value)),
res_males_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
rm(res_males_fn, k, limma_analysis)
# We continue with the rankings based on both metrics, for the gender-specific
# genes only.
k <- 5
......@@ -423,12 +436,12 @@ for (i in seq_len(length(config$integrations))) {
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gs_combinedscores_rankings.tsv")
n <- dim(res_females)[1]
write.table(merge(res_females %>% select(Gene, pi_value),
write.table(merge(res_females %>% select(Gene, log_fold_change, P_value, pi_value),
spec_females %>% select(Gene, gender_specific_score),
by = "Gene") %>% mutate(combined_score = ((1 - rank(-pi_value) / n) + (1 - rank(-gender_specific_score) / n))) %>%
select(Gene, combined_score) %>%
by = "Gene") %>% mutate(ranking_value = ((rank(pi_value) / n) + (rank(gender_specific_score) / n))) %>%
select(Gene, log_fold_change, P_value, pi_value, ranking_value) %>%
filter(!Gene %in% potGD$Gene) %>%
arrange(desc(combined_score)),
arrange(desc(ranking_value)),
res_females_fn,
quote = FALSE,
sep = "\t",
......@@ -439,12 +452,12 @@ for (i in seq_len(length(config$integrations))) {
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gs_combinedscores_rankings.tsv")
n <- dim(res_males)[1]
write.table(merge(res_males %>% select(Gene, pi_value),
write.table(merge(res_males %>% select(Gene, log_fold_change, P_value, pi_value),
spec_males %>% select(Gene, gender_specific_score),
by = "Gene") %>% mutate(combined_score = ((1 - rank(-pi_value) / n) + (1 - rank(-gender_specific_score) / n))) %>%
select(Gene, combined_score) %>%
by = "Gene") %>% mutate(ranking_value = ((rank(pi_value) / n) + (rank(gender_specific_score) / n))) %>%
select(Gene, log_fold_change, P_value, pi_value, ranking_value) %>%
filter(!Gene %in% potGD$Gene) %>%
arrange(desc(combined_score)),
arrange(desc(ranking_value)),
res_males_fn,
quote = FALSE,
sep = "\t",
......@@ -457,7 +470,10 @@ for (i in seq_len(length(config$integrations))) {
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gd_pivalue_rankings.tsv")
write.table(res_females %>%
select(Gene, pi_value) %>% filter(Gene %in% potGD$Gene) %>% arrange(desc(pi_value)),
select(Gene, log_fold_change, P_value, pi_value) %>%
mutate(ranking_value = pi_value) %>%
filter(Gene %in% potGD$Gene) %>%
arrange(desc(ranking_value)),
res_females_fn,
quote = FALSE,
sep = "\t",
......@@ -468,24 +484,28 @@ for (i in seq_len(length(config$integrations))) {
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gd_pivalue_rankings.tsv")
write.table(res_males %>%
select(Gene, pi_value) %>% filter(Gene %in% potGD$Gene) %>% arrange(desc(pi_value)),
select(Gene, log_fold_change, P_value, pi_value) %>%
mutate(ranking_value = pi_value) %>%
filter(Gene %in% potGD$Gene) %>%
arrange(desc(ranking_value)),
res_males_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
rm(res_males_fn, k, limma_analysis)
# We continue with the rankings based on gender-specificity scores, for the gender-dimorphic
# genes only.
k <- 5
limma_analysis <- config$limma_analyses[[k]]
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gd_gdrscore_rankings.tsv")
write.table(merge(res_females %>% select(Gene),
spec_females %>% select(Gene, gender_specific_score),
write.table(merge(res_females %>% select(Gene, log_fold_change, P_value, pi_value),
spec_females %>% mutate(ranking_value = gender_specific_score) %>%
select(Gene, ranking_value),
by = "Gene") %>%
filter(Gene %in% potGD$Gene) %>%
arrange(desc(gender_specific_score)),
arrange(desc(ranking_value)),
res_females_fn,
quote = FALSE,
sep = "\t",
......@@ -495,11 +515,12 @@ for (i in seq_len(length(config$integrations))) {
limma_analysis <- config$limma_analyses[[k]]
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gd_gdrscore_rankings.tsv")
write.table(merge(res_males %>% select(Gene),
spec_males %>% select(Gene, gender_specific_score),
write.table(merge(res_males %>% select(Gene, log_fold_change, P_value, pi_value),
spec_males %>% mutate(ranking_value = gender_specific_score) %>%
select(Gene, ranking_value),
by = "Gene") %>%
filter(Gene %in% potGD$Gene) %>%
arrange(desc(gender_specific_score)),
arrange(desc(ranking_value)),
res_males_fn,
quote = FALSE,
sep = "\t",
......@@ -513,12 +534,12 @@ for (i in seq_len(length(config$integrations))) {
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gd_combinedscores_rankings.tsv")
n <- dim(res_females)[1]
write.table(merge(res_females %>% select(Gene, pi_value),
write.table(merge(res_females %>% select(Gene, log_fold_change, P_value, pi_value),
spec_females %>% select(Gene, gender_specific_score),
by = "Gene") %>% mutate(combined_score = ((1 - rank(-pi_value) / n) + (1 - rank(-gender_specific_score) / n))) %>%
select(Gene, combined_score) %>%
by = "Gene") %>% mutate(ranking_value = ((rank(pi_value) / n) + (rank(gender_specific_score) / n))) %>%
select(Gene, log_fold_change, P_value, pi_value, ranking_value) %>%
filter(Gene %in% potGD$Gene) %>%
arrange(desc(combined_score)),
arrange(desc(ranking_value)),
res_females_fn,
quote = FALSE,
sep = "\t",
......@@ -529,12 +550,12 @@ for (i in seq_len(length(config$integrations))) {
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gd_combinedscores_rankings.tsv")
n <- dim(res_males)[1]
write.table(merge(res_males %>% select(Gene, pi_value),
write.table(merge(res_males %>% select(Gene, log_fold_change, P_value, pi_value),
spec_males %>% select(Gene, gender_specific_score),
by = "Gene") %>% mutate(combined_score = ((1 - rank(-pi_value) / n) + (1 - rank(-gender_specific_score) / n))) %>%
select(Gene, combined_score) %>%
by = "Gene") %>% mutate(ranking_value = ((rank(pi_value) / n) + (rank(gender_specific_score) / n))) %>%
select(Gene, log_fold_change, P_value, pi_value, ranking_value) %>%
filter(Gene %in% potGD$Gene) %>%
arrange(desc(combined_score)),
arrange(desc(ranking_value)),
res_males_fn,
quote = FALSE,
sep = "\t",
......
......@@ -45,7 +45,13 @@ duplicate_row_first_cell <- function(row, sep = "|") {
keys <- strsplit(row[1], sep, fixed = TRUE)[[1]]
val_1 <- rep(row[2], length(keys))
val_2 <- rep(row[3], length(keys))
to_ret <- cbind(Gene = keys, log_fold_change = val_1, P_value = val_2)
val_3 <- rep(row[4], length(keys))
val_4 <- rep(row[5], length(keys))
to_ret <- cbind(Gene = keys,
log_fold_change = val_1,
P_value = val_2,
pi_value = val_3,
ranking_value = val_4)
return(to_ret)
}
......@@ -75,7 +81,7 @@ perform_gsea <- function(ranked_genes,
file_dir = "",
file_prefix = "",
simplify = TRUE) {
# By default, we set the results to NULL.
gsea <- NULL
switch(type,
......@@ -176,7 +182,7 @@ perform_gsea <- function(ranked_genes,
TERM2GENE = msigdb)
}
)
# We simplify the enrichment results by removing the potentially redundant terms.
if (simplify == TRUE) {
gsea <- clusterProfiler::simplify(gsea,
......@@ -184,11 +190,11 @@ perform_gsea <- function(ranked_genes,
by = "p.adjust",
select_fun = min)
}
# We save the table as TSV file.
gsea_fn <- paste0(file_dir, file_prefix, "gsea_", type, ".tsv")
write.table(gsea, file = gsea_fn, sep = "\t", quote = FALSE, col.names = NA)
# We return the enrichResult object.
return(gsea)
}
......@@ -214,7 +220,7 @@ plot_enrichment <- function(enrich,
type_tag = "enrich",
file_dir = "",
file_prefix = "") {
# We start by defining the text size of the y labels wrt
# to the length of these labels. Default size is 9.
# This was empirically defined and it is used to circumvent a bug that prevents
......@@ -228,7 +234,7 @@ plot_enrichment <- function(enrich,
y_text_elmt <- ggplot2::element_blank()
}
}
# Dotplot of the the top enriched terms.
enrich_dotplot_fn <- paste0(file_dir, file_prefix, type_tag, "_", ont_tag,
"_dotplot.png")
......@@ -240,7 +246,7 @@ plot_enrichment <- function(enrich,
plot.title = element_text(size = 10))
print(p)
dev.off()
# The heatplot between genes and top enriched terms.
enrich_heatplot_fn <- paste0(file_dir, file_prefix, type_tag, "_", ont_tag,
"_heatplot.png")
......@@ -253,7 +259,7 @@ plot_enrichment <- function(enrich,
axis.text.x = element_blank())
print(p)
dev.off()
# Network of top enriched terms (emapplot).
enrich_emapplot_fn <- paste0(file_dir, file_prefix, type_tag, "_", ont_tag,
"_emapplot.png")
......@@ -302,29 +308,25 @@ plot_top_gsea_hits <- function(gsea,
}
}
#' @title Reading a file with LFC and P values and computes the pi values.
#'
#' @description This functions reads a results files given an integration scheme, a limma
#' tag and a file tag. It then compute the pi values, mapped the official symbols to
#' EntrezGene identifiers.
#'
#' @param integration The integration scheme to use (SN, DA, SNage).
#' @param limma_tag The name of the limma analysis to use.
#' @param file_tag The name of the file tag to use.
#' @param negative A boolean indicating whether the sign of the pi values should be inversed.
#' @return A dataframe with the gene official symbols, the EntrezGene identifiers (can be NA),
#' the log fold-change, P values and pi values.
get_pi_values <- function(integration, limma_tag, file_tag, negative = FALSE) {
# ================================================================================================
# Main
# ================================================================================================
# We perform the enrichment on all rankings files.
ranking_files <- list.files(path = output_data_dir, pattern = "*rankings.tsv")
# We do all integration schemes one by one.
for (ranking_file in ranking_files) {
# We define the I/Os.
analysis_prefix <- paste0(integration$name, "_VSN_", limma_tag, "_max-avg_", file_tag)
analysis_prefix <- strsplit(ranking_file, "_rankings.tsv")[[1]]
ofile_prefix <- paste0("CP_", analysis_prefix, "_")
ranking_file <- paste0(output_data_dir, analysis_prefix, "_rankings.tsv")
input_filename <- paste0(output_data_dir, ranking_file)
# We read the prepared ranking and keep only the value used to rank.
ranking <- read.delim(ranking_file, stringsAsFactors = FALSE)
rm(ranking_file)
ranking <- read.delim(input_filename, stringsAsFactors = FALSE)
rm(input_filename)
# We clean genes (no duplicate entries with pipe separated gene symbols).
# Only if necessary (that is if we have at least one pipe).
ranking_clean <- NULL
......@@ -333,28 +335,16 @@ get_pi_values <- function(integration, limma_tag, file_tag, negative = FALSE) {
ranking_clean <- data.frame(do.call(rbind, c(tmp_res)),
row.names = NULL,
stringsAsFactors = FALSE)
ranking_clean$log_fold_change <- as.numeric(ranking_clean$log_fold_change)
ranking_clean$P_value <- as.numeric(ranking_clean$P_value)
rm(tmp_res)
} else {
ranking_clean <- ranking
ranking_clean$P_value <- as.numeric(ranking_clean$P_value)
ranking_clean$log_fold_change <- as.numeric(ranking_clean$log_fold_change)
}
ranking_clean$log_fold_change <- as.numeric(ranking_clean$log_fold_change)
ranking_clean$P_value <- as.numeric(ranking_clean$P_value)
ranking_clean$pi_value <- as.numeric(ranking_clean$pi_value)
ranking_clean$ranking_value <- as.numeric(ranking_clean$ranking_value)
rm(ranking)
# We then add the pi values, retracting a small epsilon to the P values when they are equal
# to 1, as to avoid ties (which are therefore separated via the associated log fold-change).
ranking_clean <- ranking_clean %>%
mutate(pi_value = -log10(P_value) * abs(log_fold_change))
pseudo_pval_ref <- 0.1 * (9 + max(ranking_clean$P_value[ranking_clean$P_value < 1]))
pseudo_pi_ref <- -log10(pseudo_pval_ref) * abs(ranking_clean$log_fold_change[ranking_clean$pi_value == 0])
ranking_clean$pi_value[ranking_clean$pi_value == 0] <- pseudo_pi_ref
if (negative) {
ranking_clean$pi_value <- -ranking_clean$pi_value
}
rm(pseudo_pval_ref, pseudo_pi_ref)
# We map the gene symbols to the EntrezGene identifiers.
mapped_egenes <- bitr(ranking_clean$Gene,
fromType = "SYMBOL",
......@@ -365,117 +355,77 @@ get_pi_values <- function(integration, limma_tag, file_tag, negative = FALSE) {
by.x = "Gene",
by.y = "SYMBOL",
all.x = TRUE) %>%
mutate(EGene = ENTREZID) %>% select(Gene, EGene, log_fold_change, P_value, pi_value)
mutate(EGene = ENTREZID) %>% select(Gene, EGene, log_fold_change, ranking_value)
ranking_final$log_fold_change <- as.numeric(ranking_final$log_fold_change)
ranking_final$P_value <- as.numeric(ranking_final$P_value)
ranking_final$pi_value <- as.numeric(ranking_final$pi_value)
ranking_final <- ranking_final %>% arrange(desc(pi_value))
ranking_final$ranking_value <- as.numeric(ranking_final$ranking_value)
ranking_final <- ranking_final %>% arrange(desc(ranking_value))
ranking_final_eg <- ranking_final %>% filter(!is.na(EGene))
rm(mapped_egenes, ranking_clean)
return(ranking_final)
}
# ================================================================================================
# Main
# ================================================================================================
# We do all integration schemes one by one.
for (i in seq_len(length(config$integrations))) {
# We get the integration name for that scheme.
integration <- config$integrations[[i]]
# We only do the enrichment if it is necessary.
if (integration$use_for_enrichment == "FALSE") {
next
}
# We prepare the gene id specific rankings (using either gene names or entrezgene ids).
ranking_eg <- ranking_final_eg$ranking_value
ranking_sy <- ranking_final$ranking_value
logFC_eg <- ranking_final_eg$log_fold_change
logFC_sy <- ranking_final$log_fold_change
names(ranking_eg) <- ranking_final_eg$EGene
names(ranking_sy) <- ranking_final$Gene
names(logFC_eg) <- ranking_final_eg$EGene
names(logFC_sy) <- ranking_final$Gene
ranking_eg <- ranking_eg[!duplicated(names(ranking_eg))]
ranking_sy <- ranking_sy[!duplicated(names(ranking_sy))]
logFC_eg <- logFC_eg[!duplicated(names(logFC_eg))]
logFC_sy <- logFC_sy[!duplicated(names(logFC_sy))]
rm(ranking_final, ranking_final_eg)
# For each defined strategy
for (j in seq_len(length(config$enrichment_strategies))) {
# We loop over the functional sources.
for (k in seq_len(length(config$functional_sources))) {
# We get the integration name for that scheme.
enrichment_strategy <- config$enrichment_strategies[[j]]
# I/Os
analysis_prefix <- paste0(integration$name, "_VSN_", enrichment_strategy$limmas[1],
"_max-avg_", enrichment_strategy$file_tags[1])
ofile_prefix <- paste0("CP_", analysis_prefix, "_")
# We get the pi values of the two files
ranking_final <- get_pi_values(integration, enrichment_strategy$limmas[1],
enrichment_strategy$file_tags[1], negative = FALSE)
if (!is.na(enrichment_strategy$limmas[2])) {
ranking_final_neg <- get_pi_values(integration, enrichment_strategy$limmas[2],
enrichment_strategy$file_tags[2], negative = TRUE)
ranking_final <- rbind(ranking_final, ranking_final_neg)
# We define the current source.
func_source <- config$functional_sources[[k]]
if (is.null(func_source$cp_name) || func_source$cp_name == "") {
next
}
# We prepare the gene id specific rankings (using either gene names or entrezgene ids).
ranking_final_eg <- ranking_final %>% filter(!is.na(EGene))
ranking_eg <- ranking_final_eg$pi_value
ranking_sy <- ranking_final$pi_value
logFC_eg <- ranking_final_eg$log_fold_change
logFC_sy <- ranking_final$log_fold_change
names(ranking_eg) <- ranking_final_eg$EGene
names(ranking_sy) <- ranking_final$Gene
names(logFC_eg) <- ranking_final_eg$EGene
names(logFC_sy) <- ranking_final$Gene
ranking_eg <- ranking_eg[!duplicated(names(ranking_eg))]
ranking_sy <- ranking_sy[!duplicated(names(ranking_sy))]
logFC_eg <- logFC_eg[!duplicated(names(logFC_eg))]
logFC_sy <- logFC_sy[!duplicated(names(logFC_sy))]
rm(ranking_final, ranking_final_eg)
# We loop over the functional sources.
for (k in seq_len(length(config$functional_sources))) {
# We define the current source.
func_source <- config$functional_sources[[k]]
if (is.null(func_source$cp_name) || func_source$cp_name == "") {
next
}
# We define the reference genes (depending on the source).
ranking <- NULL
logFC <- NULL
if (func_source$cp_gene_id_type == "entrezgene") {
ranking <- ranking_eg
logFC <- logFC_eg
} else if (func_source$cp_gene_id_type == "symbol") {
ranking <- ranking_sy
logFC <- logFC_sy
}
# We define the reference genes (depending on the source).
ranking <- NULL
logFC <- NULL
if (func_source$cp_gene_id_type == "entrezgene") {
ranking <- ranking_eg
logFC <- logFC_eg
} else if (func_source$cp_gene_id_type == "symbol") {
ranking <- ranking_sy
logFC <- logFC_sy
}
# We run the GSEA itself.
gsea_pi <- perform_gsea(ranking,
type = func_source$cp_name,