Commit 67e7de53 authored by Leon-Charles Tranchevent's avatar Leon-Charles Tranchevent
Browse files

New combined scores to integrate both the pi values and the gender-specificity scores.

parent fdf6b966
......@@ -363,10 +363,8 @@ rm(m)
#for (gene in config$genes_to_plot) {#}
# DEBUG
gene <- "RFC5"
##gene_datasets <- c("GSE20159", "GSE20163", "GSE20292", "GSE26927", "GSE49036", "GSE8397", "NBB")
gene_datasets <- c("GSE20163")
gene <- "XIST"
gene_datasets <- c("GSE20159", "GSE20163", "GSE20292", "GSE26927", "GSE49036", "GSE8397", "NBB")
plot_gene(gene = gene,
gene_datasets = gene_datasets,
......
......@@ -25,6 +25,25 @@ message(paste0("[", Sys.time(), "] Configuration done."))
# Functions
# ================================================================================================
#' @title compute the pi values for a given set of fold changes and P values.
#'
#' @description This function takes a set of log fold-changes and P values corresponding to the
#' same entities. It then compute the pi values based on the following formula:
#' pi value = -log10(P value) * abs(log fold-change)
#' For the P values equal to one, a small workaround is used as to avoid having many ties
#' with pi values set to 0. A small epsilon is substracted to the original P before computation.
#' Note: the order of the two parameters should be the same and correspond to the same entities.
#' @param log_fold_change A set of log fold change values.
#' @param P_value A set of P values.
#' @return A set of pi values.
compute_pi_values <- function(log_fold_change, P_value) {
pi_value <- -log10(P_value) * abs(log_fold_change)
pseudo_p_value <- 0.1 * (9 + max(P_value[P_value < 1]))
pseudo_pi_value <- -log10(pseudo_p_value) * abs(log_fold_change[pi_value == 0])
pi_value[pi_value == 0] <- pseudo_pi_value
return(pi_value)
}
# ================================================================================================
# Main
# ================================================================================================
......@@ -52,6 +71,9 @@ for (i in seq_len(length(config$integrations))) {
l <- 1
selection <- config$selections[[l]]
#################################################################################################
# A: reading the results of the previous steps.
#################################################################################################
# We first read the dataset overlap data (by definition, common to males and females).
data_over_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_ALL_",
selection$name, "_datasetoverlap.tsv")
......@@ -127,10 +149,19 @@ for (i in seq_len(length(config$integrations))) {
rm(res_gds_fn)
rm(k, limma_analysis)
#################################################################################################
# B: saving the tables (for manual exploration of the results).
#################################################################################################
# We first add the pi values to the resuts of the male and female analyses.
res_females$pi_value <- compute_pi_values(res_females$log_fold_change,
res_females$P_value)
res_males$pi_value <- compute_pi_values(res_males$log_fold_change,
res_males$P_value)
# We merge the female and male results as to identify the genes that are potentially gender-specific
# and the ones that are potentially gender-dimorphic. This decision is based on the log fold-changes.
res_FM <- merge(x = res_females, y = res_males, by = "Gene", all = TRUE, suffixes = c(".F",".M"))
rm(res_females, res_males)
# The common potential gender-specific genes are present in both the female and male analyses,
# have the same regulation (up/down).
......@@ -166,7 +197,7 @@ for (i in seq_len(length(config$integrations))) {
by = "Gene", all.x = TRUE) %>%
filter(gender_specific_score >= specificity_threshold) %>%
filter(abs(log_fold_change.M) >= abs(log_fold_change.F))
rm(spec_females, spec_males, common_potGS)
rm(common_potGS)
# We check that no gene is in common between common_females_potGS and common_males_potGS.
# This could happen if the gender specificity scores are not computed correctly.
......@@ -185,12 +216,6 @@ for (i in seq_len(length(config$integrations))) {
mutate(gender_specific_score = NA))
rm(common_females_potGS, common_males_potGS,unique_females_potGS, unique_males_potGS)
# We also create lists of both gender-specific and gender-dimorphic genes for each gender.
GSGD_females <- rbind(GS_females, potGD %>%
mutate(gender_specific_score = NA))
GSGD_males <- rbind(GS_males, potGD %>%
mutate(gender_specific_score = NA))
# We now take care of the potential gender-dimorphic genes. This list contains so far genes
# with opposite regulation in females and males.
......@@ -210,109 +235,312 @@ for (i in seq_len(length(config$integrations))) {
GD <- resGDS_potGDsel_only %>% filter(!is.na(log_fold_change))
#potGD_notinGDS <- resGDS_potGDsel_only %>% filter(is.na(log_fold_change))
rm(resGDS_potGDsel_only, potGDsel, potGD)
rm(resGDS_potGDsel_only, potGDsel)
# We save the files we have generated, starting with the female gender-specific genes.
k <- 5
limma_analysis <- config$limma_analyses[[k]]
GS_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_genderspecific.tsv")
"_", selection$name, "_genderspecific_table.tsv")
write.table(GS_females, GS_females_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
GS_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_genderspecific_rankings.tsv")
write.table(GS_females %>%
mutate(log_fold_change = log_fold_change.F) %>%
mutate(P_value = P_value.F) %>%
select(Gene, log_fold_change, P_value),
GS_females_fn,
rm(GS_females_fn,GS_females, k, limma_analysis)
# And the male gender-specific genes.
k <- 6
limma_analysis <- config$limma_analyses[[k]]
GS_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_genderspecific_table.tsv")
write.table(GS_males, GS_males_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
rm(GS_females_fn,GS_females)
rm(GS_males_fn, GS_males, k, limma_analysis)
# The complete female list (gender-specific + gender-dimorphic).
GSGD_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_genderspecific_genderdimorphic.tsv")
write.table(GSGD_females, GSGD_females_fn,
# The gender-dimorphic genes.
k <- 7
limma_analysis <- config$limma_analyses[[k]]
GD_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_genderdimorphic_table.tsv")
write.table(GD, GD_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
GSGD_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_genderspecific_genderdimorphic_rankings.tsv")
write.table(GSGD_females %>%
mutate(log_fold_change = log_fold_change.F) %>%
mutate(P_value = P_value.F) %>%
select(Gene, log_fold_change, P_value),
GSGD_females_fn,
rm(GD, GD_fn, k, limma_analysis)
#################################################################################################
# C: saving the ranking files (for GSEA).
#################################################################################################
# We save the rankings files for the male and female analyses. There are a number of files that
# are generated depending on the metrics used to rank the genes (pi values, gender specificity
# scores, or another metrics that combine both) and on the genes that are ranked (only the
# gender-specific genes, only the gender-dimorphic genes, or both).
# We start with the rankings based on pi values, for all genes.
k <- 5
limma_analysis <- config$limma_analyses[[k]]
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gsgd_pivalue_rankings.tsv")
write.table(res_females %>%
select(Gene, pi_value) %>% arrange(desc(pi_value)),
res_females_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
rm(GSGD_females_fn, GSGD_females)
rm(k, limma_analysis)
rm(res_females_fn, k, limma_analysis)
k <- 6
limma_analysis <- config$limma_analyses[[k]]
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gsgd_pivalue_rankings.tsv")
write.table(res_males %>%
select(Gene, pi_value) %>% arrange(desc(pi_value)),
res_males_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
rm(res_males_fn, k, limma_analysis)
# We continue with the rankings based on gender-specificity scores, for all genes.
k <- 5
limma_analysis <- config$limma_analyses[[k]]
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gsgd_gdrscore_rankings.tsv")
write.table(merge(res_females %>% select(Gene),
spec_females %>% select(Gene, gender_specific_score),
by = "Gene") %>% arrange(desc(gender_specific_score)),
res_females_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
rm(res_females_fn, k, limma_analysis)
k <- 6
limma_analysis <- config$limma_analyses[[k]]
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gsgd_gdrscore_rankings.tsv")
write.table(merge(res_males %>% select(Gene),
spec_males %>% select(Gene, gender_specific_score),
by = "Gene") %>% arrange(desc(gender_specific_score)),
res_males_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
rm(res_males_fn, k, limma_analysis)
# And the male gender-specific genes.
# We continue with the rankings based on both metrics, for all genes.
k <- 5
limma_analysis <- config$limma_analyses[[k]]
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gsgd_combinedscores_rankings.tsv")
n <- dim(res_females)[1]
write.table(merge(res_females %>% select(Gene, pi_value),
spec_females %>% select(Gene, gender_specific_score),
by = "Gene") %>% mutate(combined_score = ((1 - rank(-pi_value) / n) + (1 - rank(-gender_specific_score) / n))) %>%
select(Gene, combined_score) %>%
arrange(desc(combined_score)),
res_females_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
rm(res_females_fn, k, limma_analysis, n)
k <- 6
limma_analysis <- config$limma_analyses[[k]]
GS_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_genderspecific.tsv")
write.table(GS_males, GS_males_fn,
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gsgd_combinedscores_rankings.tsv")
n <- dim(res_males)[1]
write.table(merge(res_males %>% select(Gene, pi_value),
spec_males %>% select(Gene, gender_specific_score),
by = "Gene") %>% mutate(combined_score = ((1 - rank(-pi_value) / n) + (1 - rank(-gender_specific_score) / n))) %>%
select(Gene, combined_score) %>%
arrange(desc(combined_score)),
res_males_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
GS_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_genderspecific_rankings.tsv")
write.table(GS_males %>%
mutate(log_fold_change = log_fold_change.M) %>%
mutate(P_value = P_value.M) %>%
select(Gene, log_fold_change, P_value),
GS_males_fn,
rm(res_males_fn, k, limma_analysis, n)
# We continue with the rankings based on pi values, for the gender-specific genes only.
k <- 5
limma_analysis <- config$limma_analyses[[k]]
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gs_pivalue_rankings.tsv")
write.table(res_females %>%
select(Gene, pi_value) %>% filter(!Gene %in% potGD$Gene) %>% arrange(desc(pi_value)),
res_females_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
rm(GS_males_fn, GS_males)
rm(res_females_fn, k, limma_analysis)
k <- 6
limma_analysis <- config$limma_analyses[[k]]
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gs_pivalue_rankings.tsv")
write.table(res_males %>%
select(Gene, pi_value) %>% filter(!Gene %in% potGD$Gene) %>% arrange(desc(pi_value)),
res_males_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
rm(res_males_fn, k, limma_analysis)
# The complete male list (gender-specific + gender-dimorphic).
GSGD_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_genderspecific_genderdimorphic.tsv")
write.table(GSGD_males, GSGD_males_fn,
# We continue with the rankings based on gender-specificity scores, for the gender-specific
# genes only.
k <- 5
limma_analysis <- config$limma_analyses[[k]]
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gs_gdrscore_rankings.tsv")
write.table(merge(res_females %>% select(Gene),
spec_females %>% select(Gene, gender_specific_score),
by = "Gene") %>%
filter(!Gene %in% potGD$Gene) %>%
arrange(desc(gender_specific_score)),
res_females_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
GSGD_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_genderspecific_genderdimorphic_rankings.tsv")
write.table(GSGD_males %>%
mutate(log_fold_change = log_fold_change.M) %>%
mutate(P_value = P_value.M) %>%
select(Gene, log_fold_change, P_value),
GSGD_males_fn,
rm(res_females_fn, k, limma_analysis)
k <- 6
limma_analysis <- config$limma_analyses[[k]]
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gs_gdrscore_rankings.tsv")
write.table(merge(res_males %>% select(Gene),
spec_males %>% select(Gene, gender_specific_score),
by = "Gene") %>%
filter(!Gene %in% potGD$Gene) %>%
arrange(desc(gender_specific_score)),
res_males_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
rm(GSGD_males_fn, GSGD_males)
rm(k, limma_analysis)
rm(res_males_fn, k, limma_analysis)
# We continue with the rankings based on both metrics, for the gender-specific
# genes only.
k <- 5
limma_analysis <- config$limma_analyses[[k]]
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gs_combinedscores_rankings.tsv")
n <- dim(res_females)[1]
write.table(merge(res_females %>% select(Gene, pi_value),
spec_females %>% select(Gene, gender_specific_score),
by = "Gene") %>% mutate(combined_score = ((1 - rank(-pi_value) / n) + (1 - rank(-gender_specific_score) / n))) %>%
select(Gene, combined_score) %>%
filter(!Gene %in% potGD$Gene) %>%
arrange(desc(combined_score)),
res_females_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
rm(res_females_fn, k, limma_analysis, n)
k <- 6
limma_analysis <- config$limma_analyses[[k]]
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gs_combinedscores_rankings.tsv")
n <- dim(res_males)[1]
write.table(merge(res_males %>% select(Gene, pi_value),
spec_males %>% select(Gene, gender_specific_score),
by = "Gene") %>% mutate(combined_score = ((1 - rank(-pi_value) / n) + (1 - rank(-gender_specific_score) / n))) %>%
select(Gene, combined_score) %>%
filter(!Gene %in% potGD$Gene) %>%
arrange(desc(combined_score)),
res_males_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
rm(res_males_fn, k, limma_analysis, n)
# The gender-dimorphic genes.
k <- 7
# We continue with the rankings based on pi values, for the gender-dimorphic genes only.
k <- 5
limma_analysis <- config$limma_analyses[[k]]
GD_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_genderdimorphic.tsv")
write.table(GD, GD_fn,
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gd_pivalue_rankings.tsv")
write.table(res_females %>%
select(Gene, pi_value) %>% filter(Gene %in% potGD$Gene) %>% arrange(desc(pi_value)),
res_females_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
GD_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_genderdimorphic_rankings.tsv")
write.table(GD %>%
select(Gene, log_fold_change, P_value),
GD_fn,
rm(res_females_fn, k, limma_analysis)
k <- 6
limma_analysis <- config$limma_analyses[[k]]
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gd_pivalue_rankings.tsv")
write.table(res_males %>%
select(Gene, pi_value) %>% filter(Gene %in% potGD$Gene) %>% arrange(desc(pi_value)),
res_males_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
rm(GD_fn, GD)
rm(k, limma_analysis)
rm(res_males_fn, k, limma_analysis)
# We continue with the rankings based on gender-specificity scores, for the gender-dimorphic
# genes only.
k <- 5
limma_analysis <- config$limma_analyses[[k]]
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gd_gdrscore_rankings.tsv")
write.table(merge(res_females %>% select(Gene),
spec_females %>% select(Gene, gender_specific_score),
by = "Gene") %>%
filter(Gene %in% potGD$Gene) %>%
arrange(desc(gender_specific_score)),
res_females_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
rm(res_females_fn, k, limma_analysis)
k <- 6
limma_analysis <- config$limma_analyses[[k]]
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gd_gdrscore_rankings.tsv")
write.table(merge(res_males %>% select(Gene),
spec_males %>% select(Gene, gender_specific_score),
by = "Gene") %>%
filter(Gene %in% potGD$Gene) %>%
arrange(desc(gender_specific_score)),
res_males_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
rm(res_males_fn, k, limma_analysis)
# We continue with the rankings based on both metrics, for the gender-dimorphic
# genes only.
k <- 5
limma_analysis <- config$limma_analyses[[k]]
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gd_combinedscores_rankings.tsv")
n <- dim(res_females)[1]
write.table(merge(res_females %>% select(Gene, pi_value),
spec_females %>% select(Gene, gender_specific_score),
by = "Gene") %>% mutate(combined_score = ((1 - rank(-pi_value) / n) + (1 - rank(-gender_specific_score) / n))) %>%
select(Gene, combined_score) %>%
filter(Gene %in% potGD$Gene) %>%
arrange(desc(combined_score)),
res_females_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
rm(res_females_fn, k, limma_analysis, n)
k <- 6
limma_analysis <- config$limma_analyses[[k]]
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gd_combinedscores_rankings.tsv")
n <- dim(res_males)[1]
write.table(merge(res_males %>% select(Gene, pi_value),
spec_males %>% select(Gene, gender_specific_score),
by = "Gene") %>% mutate(combined_score = ((1 - rank(-pi_value) / n) + (1 - rank(-gender_specific_score) / n))) %>%
select(Gene, combined_score) %>%
filter(Gene %in% potGD$Gene) %>%
arrange(desc(combined_score)),
res_males_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
rm(res_males_fn, k, limma_analysis, n)
rm(potGD, res_females, res_males, spec_females, spec_males)
rm(l, selection, vsn, j, integration, int_criteria)
} # End for each integration scheme.
......
......@@ -302,6 +302,78 @@ plot_top_gsea_hits <- function(gsea,
}
}
#' @title Reading a file with LFC and P values and computes the pi values.
#'
#' @description This functions reads a results files given an integration scheme, a limma
#' tag and a file tag. It then compute the pi values, mapped the official symbols to
#' EntrezGene identifiers.
#'
#' @param integration The integration scheme to use (SN, DA, SNage).
#' @param limma_tag The name of the limma analysis to use.
#' @param file_tag The name of the file tag to use.
#' @param negative A boolean indicating whether the sign of the pi values should be inversed.
#' @return A dataframe with the gene official symbols, the EntrezGene identifiers (can be NA),
#' the log fold-change, P values and pi values.
get_pi_values <- function(integration, limma_tag, file_tag, negative = FALSE) {
# We define the I/Os.
analysis_prefix <- paste0(integration$name, "_VSN_", limma_tag, "_max-avg_", file_tag)
ofile_prefix <- paste0("CP_", analysis_prefix, "_")
ranking_file <- paste0(output_data_dir, analysis_prefix, "_rankings.tsv")
# We read the prepared ranking and keep only the value used to rank.
ranking <- read.delim(ranking_file, stringsAsFactors = FALSE)
rm(ranking_file)
# We clean genes (no duplicate entries with pipe separated gene symbols).
# Only if necessary (that is if we have at least one pipe).
ranking_clean <- NULL
if (sum(grepl("\\|", ranking$Gene)) > 0) {
tmp_res <- t(apply(ranking, 1, duplicate_row_first_cell))
ranking_clean <- data.frame(do.call(rbind, c(tmp_res)),
row.names = NULL,
stringsAsFactors = FALSE)
ranking_clean$log_fold_change <- as.numeric(ranking_clean$log_fold_change)
ranking_clean$P_value <- as.numeric(ranking_clean$P_value)
rm(tmp_res)
} else {
ranking_clean <- ranking
ranking_clean$P_value <- as.numeric(ranking_clean$P_value)
ranking_clean$log_fold_change <- as.numeric(ranking_clean$log_fold_change)
}
rm(ranking)
# We then add the pi values, retracting a small epsilon to the P values when they are equal
# to 1, as to avoid ties (which are therefore separated via the associated log fold-change).
ranking_clean <- ranking_clean %>%
mutate(pi_value = -log10(P_value) * abs(log_fold_change))
pseudo_pval_ref <- 0.1 * (9 + max(ranking_clean$P_value[ranking_clean$P_value < 1]))
pseudo_pi_ref <- -log10(pseudo_pval_ref) * abs(ranking_clean$log_fold_change[ranking_clean$pi_value == 0])
ranking_clean$pi_value[ranking_clean$pi_value == 0] <- pseudo_pi_ref
if (negative) {
ranking_clean$pi_value <- -ranking_clean$pi_value
}
rm(pseudo_pval_ref, pseudo_pi_ref)
# We map the gene symbols to the EntrezGene identifiers.
mapped_egenes <- bitr(ranking_clean$Gene,
fromType = "SYMBOL",
toType = c("ENTREZID"),
OrgDb = org.Hs.eg.db) #nolint
ranking_final <- merge(x = ranking_clean,
y = mapped_egenes,
by.x = "Gene",
by.y = "SYMBOL",
all.x = TRUE) %>%
mutate(EGene = ENTREZID) %>% select(Gene, EGene, log_fold_change, P_value, pi_value)
ranking_final$log_fold_change <- as.numeric(ranking_final$log_fold_change)
ranking_final$P_value <- as.numeric(ranking_final$P_value)
ranking_final$pi_value <- as.numeric(ranking_final$pi_value)
ranking_final <- ranking_final %>% arrange(desc(pi_value))
rm(mapped_egenes, ranking_clean)
return(ranking_final)
}
# ================================================================================================
# Main
# ================================================================================================
......@@ -316,139 +388,91 @@ for (i in seq_len(length(config$integrations))) {
if (integration$use_for_enrichment == "FALSE") {
next
}
# We do all limma comparisons one by one.
for (j in seq_len(length(config$limma_analyses))) {