Commit 67e7de53 authored by Leon-Charles Tranchevent's avatar Leon-Charles Tranchevent
Browse files

New combined scores to integrate both the pi values and the gender-specificity scores.

parent fdf6b966
......@@ -363,10 +363,8 @@ rm(m)
#for (gene in config$genes_to_plot) {#}
# DEBUG
gene <- "RFC5"
##gene_datasets <- c("GSE20159", "GSE20163", "GSE20292", "GSE26927", "GSE49036", "GSE8397", "NBB")
gene_datasets <- c("GSE20163")
gene <- "XIST"
gene_datasets <- c("GSE20159", "GSE20163", "GSE20292", "GSE26927", "GSE49036", "GSE8397", "NBB")
plot_gene(gene = gene,
gene_datasets = gene_datasets,
......
......@@ -302,36 +302,22 @@ plot_top_gsea_hits <- function(gsea,
}
}
# ================================================================================================
# Main
# ================================================================================================
# We do all integration schemes one by one.
for (i in seq_len(length(config$integrations))) {
# We get the integration name for that scheme.
integration <- config$integrations[[i]]
# We only do the enrichment if it is necessary.
if (integration$use_for_enrichment == "FALSE") {
next
}
# We do all limma comparisons one by one.
for (j in seq_len(length(config$limma_analyses))) {
# We extract the Limma parameters.
limma_parameters <- config$limma_analyses[[j]]
# We only do the enrichment if it is necessary.
if (limma_parameters$use_for_enrichment == "FALSE") {
next
}
for (tag in limma_parameters$enrichment_file_tags) {
#' @title Reading a file with LFC and P values and computes the pi values.
#'
#' @description This functions reads a results files given an integration scheme, a limma
#' tag and a file tag. It then compute the pi values, mapped the official symbols to
#' EntrezGene identifiers.
#'
#' @param integration The integration scheme to use (SN, DA, SNage).
#' @param limma_tag The name of the limma analysis to use.
#' @param file_tag The name of the file tag to use.
#' @param negative A boolean indicating whether the sign of the pi values should be inversed.
#' @return A dataframe with the gene official symbols, the EntrezGene identifiers (can be NA),
#' the log fold-change, P values and pi values.
get_pi_values <- function(integration, limma_tag, file_tag, negative = FALSE) {
# We define the I/Os.
analysis_prefix <- paste0(integration$name, "_VSN_", limma_parameters$name, "_max-avg_", tag)
analysis_prefix <- paste0(integration$name, "_VSN_", limma_tag, "_max-avg_", file_tag)
ofile_prefix <- paste0("CP_", analysis_prefix, "_")
ranking_file <- paste0(output_data_dir, analysis_prefix, "_rankings.tsv")
......@@ -364,6 +350,9 @@ for (i in seq_len(length(config$integrations))) {
pseudo_pval_ref <- 0.1 * (9 + max(ranking_clean$P_value[ranking_clean$P_value < 1]))
pseudo_pi_ref <- -log10(pseudo_pval_ref) * abs(ranking_clean$log_fold_change[ranking_clean$pi_value == 0])
ranking_clean$pi_value[ranking_clean$pi_value == 0] <- pseudo_pi_ref
if (negative) {
ranking_clean$pi_value <- -ranking_clean$pi_value
}
rm(pseudo_pval_ref, pseudo_pi_ref)
# We map the gene symbols to the EntrezGene identifiers.
......@@ -381,10 +370,47 @@ for (i in seq_len(length(config$integrations))) {
ranking_final$P_value <- as.numeric(ranking_final$P_value)
ranking_final$pi_value <- as.numeric(ranking_final$pi_value)
ranking_final <- ranking_final %>% arrange(desc(pi_value))
ranking_final_eg <- ranking_final %>% filter(!is.na(EGene))
rm(mapped_egenes, ranking_clean)
return(ranking_final)
}
# ================================================================================================
# Main
# ================================================================================================
# We do all integration schemes one by one.
for (i in seq_len(length(config$integrations))) {
# We get the integration name for that scheme.
integration <- config$integrations[[i]]
# We only do the enrichment if it is necessary.
if (integration$use_for_enrichment == "FALSE") {
next
}
# For each defined strategy
for (j in seq_len(length(config$enrichment_strategies))) {
# We get the integration name for that scheme.
enrichment_strategy <- config$enrichment_strategies[[j]]
# I/Os
analysis_prefix <- paste0(integration$name, "_VSN_", enrichment_strategy$limmas[1],
"_max-avg_", enrichment_strategy$file_tags[1])
ofile_prefix <- paste0("CP_", analysis_prefix, "_")
# We get the pi values of the two files
ranking_final <- get_pi_values(integration, enrichment_strategy$limmas[1],
enrichment_strategy$file_tags[1], negative = FALSE)
if (!is.na(enrichment_strategy$limmas[2])) {
ranking_final_neg <- get_pi_values(integration, enrichment_strategy$limmas[2],
enrichment_strategy$file_tags[2], negative = TRUE)
ranking_final <- rbind(ranking_final, ranking_final_neg)
}
# We prepare the gene id specific rankings (using either gene names or entrezgene ids).
ranking_final_eg <- ranking_final %>% filter(!is.na(EGene))
ranking_eg <- ranking_final_eg$pi_value
ranking_sy <- ranking_final$pi_value
logFC_eg <- ranking_final_eg$log_fold_change
......@@ -426,7 +452,7 @@ for (i in seq_len(length(config$integrations))) {
file_prefix = ofile_prefix,
simplify = FALSE)
# We plot all figures for the gsea (PVAL * FC).
# We plot all figures for the gsea (PVAL and FC).
plot_enrichment(gsea_pi,
sign(logFC),
ont_tag = func_source$cp_name,
......@@ -446,9 +472,7 @@ for (i in seq_len(length(config$integrations))) {
rm(func_source, gsea_pi, logFC, ranking)
} # End for each functional source.
rm(k, ranking_eg, ranking_sy, logFC_eg, logFC_sy, analysis_prefix, ofile_prefix)
} # End for each enrichment file tag.
rm(tag, limma_parameters)
} # End for each limma comparison.
} # End for each enrichment strategy.
rm(j, integration)
} # End for each integration.
rm(i)
......
......@@ -30,56 +30,42 @@ limma_analyses:
coefficient: "F - M"
name: "FemaleVsMale"
clinical_factor: "Gender"
use_for_enrichment: "FALSE"
enrichment_file_tags: []
use_for_network: "FALSE"
-
factor: Disease.status
coefficient: "PD - Control"
name: "PDVsControl"
clinical_factor: "Disease_status"
use_for_enrichment: "FALSE"
enrichment_file_tags: []
use_for_network: "TRUE"
-
factor: gender_disease_status
coefficient: "F.PD - M.PD"
name: "FemaleVsMale_PD"
clinical_factor: "Gender_PD"
use_for_enrichment: "FALSE"
enrichment_file_tags: []
use_for_network: "FALSE"
-
factor: gender_disease_status
coefficient: "F.Control - M.Control"
name: "FemaleVsMale_control"
clinical_factor: "Gender_Control"
use_for_enrichment: "FALSE"
enrichment_file_tags: []
use_for_network: "FALSE"
-
factor: gender_disease_status
coefficient: "F.PD - F.Control"
name: "PDVsControl_females"
clinical_factor: "Disease_status_females"
use_for_enrichment: "TRUE"
enrichment_file_tags: ["genderspecific", "genderspecific_genderdimorphic"]
use_for_network: "TRUE"
-
factor: gender_disease_status
coefficient: "M.PD - M.Control"
name: "PDVsControl_males"
clinical_factor: "Disease_status_males"
use_for_enrichment: "TRUE"
enrichment_file_tags: ["genderspecific", "genderspecific_genderdimorphic"]
use_for_network: "TRUE"
-
factor: gender_disease_status
coefficient: "(F.PD - F.Control) - (M.PD - M.Control)"
name: "Gender_disease_status"
clinical_factor: "Gender_disease_status"
use_for_enrichment: "TRUE"
enrichment_file_tags: ["genderdimorphic"]
use_for_network: "TRUE"
# Integration schemes
nb_min_pval: 2
......@@ -89,12 +75,12 @@ integrations:
-
name: SN
criteria: tissue;SN
use_for_enrichment: "FALSE"
use_for_enrichment: "TRUE"
use_for_network: "FALSE"
-
name: DA
criteria: tissue;DA
use_for_enrichment: "FALSE"
use_for_enrichment: "TRUE"
use_for_network: "FALSE"
-
name: SNage
......@@ -128,6 +114,22 @@ p_val_combinations:
short_name: zt
method: "z.transform"
# Enrichments parameters
enrichment_strategies:
-
limmas: ["PDVsControl_females", "PDVsControl_males"]
file_tags: ["genderspecific", "genderspecific"]
-
limmas: ["PDVsControl_males", "PDVsControl_females"]
file_tags: ["genderspecific", "genderspecific"]
-
limmas: ["PDVsControl_females", "PDVsControl_males"]
file_tags: ["genderspecific_genderdimorphic", "genderspecific"]
-
limmas: ["PDVsControl_males", "PDVsControl_females"]
file_tags: ["genderspecific_genderdimorphic", "genderspecific"]
-
limmas: ["Gender_disease_status"]
file_tags: ["genderdimorphic"]
nb_permutations: 150000
min_gs_size: 10
max_gs_size: 300
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment