Commit 3c790cda authored by Leon-Charles Tranchevent's avatar Leon-Charles Tranchevent
Browse files

New GD rankings based on signals from both male and female analyses.

parent 6e905134
......@@ -63,7 +63,6 @@ for (i in seq_len(length(config$integrations))) {
# We get the integration name for that scheme.
integration <- config$integrations[[i]]
int_criteria <- str_split(integration$criteria, ",")[[1]]
# Default parameters for the VSN and probe selection method.
j <- 1
......@@ -672,7 +671,7 @@ for (i in seq_len(length(config$integrations))) {
rm(res_males_fn, k, limma_analysis, n)
rm(GS_females, GS_males)
rm(potGD, res_females, res_males, spec_females, spec_males)
rm(l, selection, vsn, j, integration, int_criteria)
rm(l, selection, vsn, j, integration)
} # End for each integration scheme.
# We clean the workspace and log the session details for reproducibility.
......
......@@ -28,8 +28,9 @@ module load lang/R/3.6.0-foss-2019a-bare
Rscript --vanilla ${CODE_FOLDER}/compute_gender_specificity.R > ${OUTPUT_FOLDER}gdr_spec_log.out 2> ${OUTPUT_FOLDER}gdr_spec_log.err
Rscript --vanilla ${CODE_FOLDER}/compute_dataset_overlap.R > ${OUTPUT_FOLDER}dataset_overlap_log.out 2> ${OUTPUT_FOLDER}dataset_overlap_log.err
Rscript --vanilla ${CODE_FOLDER}/merge_and_filter_rankings.R > ${OUTPUT_FOLDER}merge_filter_log.out 2> ${OUTPUT_FOLDER}merge_filter_log.err
Rscript --vanilla ${CODE_FOLDER}/refine_GD_rankings.R > ${OUTPUT_FOLDER}refine_rankings_log.out 2> ${OUTPUT_FOLDER}refine_rankings_log.err
# Additional file that contains the genes that have different probe mappings between the male and female analyses.
# Additional file that contains the genes that have different probe mappings between the male and female analyses.
diff <(sort -k2,2 ${OUTPUT_FOLDER}SNage_VSN_PDVsControl_females_matching_data.tsv | cut -f 2,12,15,18,21,24) <(sort -k2,2 ${OUTPUT_FOLDER}SNage_VSN_PDVsControl_males_matching_data.tsv | cut -f 2,12,15,21,24,27) | grep '>' | cut -f 1 | sed -r "s/> //g" > ${OUTPUT_FOLDER}SNage_VSN_PDVsControl_females_vs_males_matching_data_problems.tsv
# Moving the slurm log file to data
......
#!/usr/bin/env Rscript
# ================================================================================================
# Libraries
# ================================================================================================
library("yaml")
library("tidyverse")
library("stringr")
source("../libs/conf/confR.R")
source("../libs/utils/utils.R")
message(paste0("[", Sys.time(), "] Libraries loaded."))
# ================================================================================================
# Configuration
# ================================================================================================
options(bitmapType = "cairo")
config <- read_config(config_dirs = c("../Confs/", "./"))
output_data_dir <- paste0(config$global_data_dir, config$local_data_dir)
input_data_dir <- paste0(config$global_data_dir, config$local_input_data_dir)
input_edata_dir <- paste0(config$global_data_dir, config$local_exp_data_dir)
input_rsedata_dir <- paste0(config$global_data_dir, config$local_rs_exp_data_dir)
message(paste0("[", Sys.time(), "] Configuration done."))
# ================================================================================================
# Main
# ================================================================================================
# The aim of this script is to refine the GD rankings that are present for males and females and
# create a single GD ranking based on both by summing up the male and female signals.
# We focus on one configuration, our main configuration (SNage, VSN, max-avg) but we also
# run the other integration schemes (SN and DA) with the same default parameters.
for (i in seq_len(length(config$integrations))) {
# We get the integration name for that scheme.
integration <- config$integrations[[i]]
# Default parameters for the VSN and probe selection method.
j <- 1
vsn <- config$variance_methods[[j]]
l <- 1
selection <- config$selections[[l]]
#################################################################################################
# A: we start with the rankings based on pi values.
#################################################################################################
k <- 5
limma_analysis <- config$limma_analyses[[k]]
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gd_pivalue_rankings.tsv")
GD_females <- read.table(res_females_fn,
sep = "\t",
header = TRUE, stringsAsFactors = FALSE)
rm(res_females_fn, k, limma_analysis)
k <- 6
limma_analysis <- config$limma_analyses[[k]]
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gd_pivalue_rankings.tsv")
GD_males <- read.table(res_males_fn,
sep = "\t",
header = TRUE, stringsAsFactors = FALSE)
rm(res_males_fn, k, limma_analysis)
# We then merge across the two genders.
GD_FM <- merge(x = GD_females, y = GD_males, by = "Gene", all = TRUE, suffixes = c("",".M")) %>%
mutate(ranking_value = ranking_value + ranking_value.M) %>%
select(Gene, log_fold_change, P_value, adj_P_value, pi_value, ranking_value) %>%
arrange(desc(ranking_value))
k <- 5
limma_analysis <- config$limma_analyses[[k]]
res_females_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gd_pivalueref_rankings.tsv")
write.table(GD_FM, res_females_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
rm(res_females_fn, k, limma_analysis)
GD_MF <- merge(x = GD_males, y = GD_females, by = "Gene", all = TRUE, suffixes = c("",".F")) %>%
mutate(ranking_value = ranking_value + ranking_value.F) %>%
select(Gene, log_fold_change, P_value, adj_P_value, pi_value, ranking_value) %>%
arrange(desc(ranking_value))
k <- 6
limma_analysis <- config$limma_analyses[[k]]
res_males_fn <- paste0(output_data_dir, integration$name, "_", vsn$name, "_", limma_analysis$name,
"_", selection$name, "_gd_pivalueref_rankings.tsv")
write.table(GD_MF, res_males_fn,
quote = FALSE,
sep = "\t",
row.names = FALSE)
rm(res_males_fn, k, limma_analysis, GD_FM, GD_MF, GD_females, GD_males)
rm(l, selection, vsn, j, integration)
} # End for each integration scheme.
# We clean the workspace and log the session details for reproducibility.
rm(i)
rm(config, output_data_dir, input_data_dir, input_edata_dir, input_rsedata_dir)
sessionInfo()
......@@ -52,8 +52,8 @@ duplicate_row_first_cell <- function(row, sep = "|") {
log_fold_change = val_1,
P_value = val_2,
adj_P_value = val_3,
pi_value = val_3,
ranking_value = val_4)
pi_value = val_4,
ranking_value = val_5)
return(to_ret)
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment