Commit 21dcd288 authored by Leon-Charles Tranchevent's avatar Leon-Charles Tranchevent
Browse files

Adapted the scripts to identify the gender specific pathways as to include all...

Adapted the scripts to identify the gender specific pathways as to include all selected configurations.
parent 2406de9c
......@@ -106,13 +106,13 @@ for (i in seq_len(length(config$integrations))) {
# We run the Network based ORA.
output_dir <- paste0(output_data_dir, ofile_prefix, "tmp")
output_df <- tryCatch({
pathfindR::run_pathfindR(input = ranking_final,
gene_sets = func_source$pf_name,
enrichment_threshold = 1,
iterations = 10,
output_dir = output_dir,
visualize_enriched_terms = FALSE,
pin_name_path = "Biogrid")
suppressWarnings(pathfindR::run_pathfindR(input = ranking_final,
gene_sets = func_source$pf_name,
enrichment_threshold = 1,
iterations = 10,
output_dir = output_dir,
visualize_enriched_terms = FALSE,
pin_name_path = "Biogrid"))
}, warning = function(w) {}, error = function(e) {
# In case of a problem (most probably because there is not enough gene(s)
# to start with).
......
......@@ -70,116 +70,132 @@ compute_delta <- function(DB_ref, DB_ctrl, pval_thres = 0.1) {
# order to select the pathways that are only enriched for one of the two genders (gender specific
# pathways).
#
gsp_data <- c()
#
# KEGG by CP
#
M_filename <- paste0(output_data_dir, "CP_Male_gsea_KEGG.tsv")
F_filename <- paste0(output_data_dir, "CP_Female_gsea_KEGG.tsv")
M_data <- read.delim(M_filename, row.names = 1, stringsAsFactors = FALSE) %>%
select(ID, Description, p.adjust)
F_data <- read.delim(F_filename, row.names = 1, stringsAsFactors = FALSE) %>%
select(ID, Description, p.adjust)
gsp_data <- rbind(gsp_data, compute_delta(M_data, F_data) %>%
mutate(tool = "CP", source = "KEGG"))
rm(M_filename, F_filename, M_data, F_data)
#
# MSIGDB by CP
#
M_filename <- paste0(output_data_dir, "CP_Male_gsea_MSIGDB.tsv")
F_filename <- paste0(output_data_dir, "CP_Female_gsea_MSIGDB.tsv")
M_data <- read.delim(M_filename, row.names = 1, stringsAsFactors = FALSE) %>%
select(ID, Description, p.adjust)
F_data <- read.delim(F_filename, row.names = 1, stringsAsFactors = FALSE) %>%
select(ID, Description, p.adjust)
gsp_data <- rbind(gsp_data, compute_delta(M_data, F_data) %>%
mutate(tool = "CP", source = "MSIGDB"))
rm(M_filename, F_filename, M_data, F_data)
# We do all integration schemes one by one.
for (i in seq_len(length(config$integrations))) {
#
# REACTOME by CP
#
M_filename <- paste0(output_data_dir, "CP_Male_gsea_REACTOME.tsv")
F_filename <- paste0(output_data_dir, "CP_Female_gsea_REACTOME.tsv")
M_data <- read.delim(M_filename, row.names = 1, stringsAsFactors = FALSE) %>%
select(ID, Description, p.adjust)
F_data <- read.delim(F_filename, row.names = 1, stringsAsFactors = FALSE) %>%
select(ID, Description, p.adjust)
gsp_data <- rbind(gsp_data, compute_delta(M_data, F_data) %>%
mutate(tool = "CP", source = "REACTOME"))
rm(M_filename, F_filename, M_data, F_data)
#
# GO-BP by CP
#
M_filename <- paste0(output_data_dir, "CP_Male_gsea_GOBP.tsv")
F_filename <- paste0(output_data_dir, "CP_Female_gsea_GOBP.tsv")
M_data <- read.delim(M_filename, row.names = 1, stringsAsFactors = FALSE) %>%
select(ID, Description, p.adjust)
F_data <- read.delim(F_filename, row.names = 1, stringsAsFactors = FALSE) %>%
select(ID, Description, p.adjust)
gsp_data <- rbind(gsp_data, compute_delta(M_data, F_data) %>%
mutate(tool = "CP", source = "GOBP"))
rm(M_filename, F_filename, M_data, F_data)
# We get the integration name for that scheme.
integration <- config$integrations[[i]]
#
# GO-CC by CP
#
M_filename <- paste0(output_data_dir, "CP_Male_gsea_GOCC.tsv")
F_filename <- paste0(output_data_dir, "CP_Female_gsea_GOCC.tsv")
M_data <- read.delim(M_filename, row.names = 1, stringsAsFactors = FALSE) %>%
select(ID, Description, p.adjust)
F_data <- read.delim(F_filename, row.names = 1, stringsAsFactors = FALSE) %>%
select(ID, Description, p.adjust)
gsp_data <- rbind(gsp_data, compute_delta(M_data, F_data) %>%
mutate(tool = "CP", source = "GOCC"))
rm(M_filename, F_filename, M_data, F_data)
#
# GO-MF by CP
#
M_filename <- paste0(output_data_dir, "CP_Male_gsea_GOMF.tsv")
F_filename <- paste0(output_data_dir, "CP_Female_gsea_GOMF.tsv")
M_data <- read.delim(M_filename, row.names = 1, stringsAsFactors = FALSE) %>%
select(ID, Description, p.adjust)
F_data <- read.delim(F_filename, row.names = 1, stringsAsFactors = FALSE) %>%
select(ID, Description, p.adjust)
gsp_data <- rbind(gsp_data, compute_delta(M_data, F_data) %>%
mutate(tool = "CP", source = "GOMF"))
rm(M_filename, F_filename, M_data, F_data)
#
# MSIGDB by G2P
#
M_filename <- paste0(output_data_dir, "G2P_Male_MSIGDB.tsv")
F_filename <- paste0(output_data_dir, "G2P_Female_MSIGDB.tsv")
M_data <- read.delim(M_filename, row.names = 1, stringsAsFactors = FALSE)
M_data$adj_pv = M_data$pv * dim(M_data)[1]
M_data$adj_pv[M_data$adj_pv > 1] <- 1
M_data <- M_data %>%
select(pid, pname, adj_pv) %>% filter(!is.na(adj_pv)) %>%
rename(ID = pid, Description = pname, p.adjust = adj_pv)
minp_M <- min(M_data$p.adjust[M_data$p.adjust > 0])
M_data$p.adjust[M_data$p.adjust == 0] <- minp_M / 100
F_data <- read.delim(F_filename, row.names = 1, stringsAsFactors = FALSE)
F_data$adj_pv = F_data$pv * dim(F_data)[1]
F_data$adj_pv[F_data$adj_pv > 1] <- 1
F_data <- F_data %>%
select(pid, pname, adj_pv) %>% filter(!is.na(adj_pv)) %>%
rename(ID = pid, Description = pname, p.adjust = adj_pv)
minp_F <- min(F_data$p.adjust[F_data$p.adjust > 0])
F_data$p.adjust[F_data$p.adjust == 0] <- minp_F / 100
gsp_data <- rbind(gsp_data, compute_delta(M_data, F_data) %>%
mutate(tool = "G2P", source = "MSIGDB"))
rm(M_filename, F_filename, M_data, F_data, minp_M, minp_F)
# Save file with all specific pathways.
ofile <- paste0(output_data_dir, "Gender_specific_pathways.tsv")
write.table(gsp_data, file = ofile, sep = "\t", quote = FALSE, col.names = NA)
rm(ofile, gsp_data)
message(paste0("[", Sys.time(), "] Gender specific pathways identified."))
# We create a structure for all results.
gsp_data <- c()
male_prefix <- paste0(integration$name, "_PDVsControl_males_")
female_prefix <- paste0(integration$name, "_PDVsControl_females_")
#
# KEGG by CP
#
M_filename <- paste0(output_data_dir, "CP_", male_prefix, "gsea_KEGG.tsv")
F_filename <- paste0(output_data_dir, "CP_", female_prefix, "gsea_KEGG.tsv")
M_data <- read.delim(M_filename, row.names = 1, stringsAsFactors = FALSE) %>%
select(ID, Description, p.adjust)
F_data <- read.delim(F_filename, row.names = 1, stringsAsFactors = FALSE) %>%
select(ID, Description, p.adjust)
gsp_data <- rbind(gsp_data, compute_delta(M_data, F_data) %>%
mutate(tool = "CP", source = "KEGG"))
rm(M_filename, F_filename, M_data, F_data)
#
# MSIGDB by CP
#
M_filename <- paste0(output_data_dir, "CP_", male_prefix, "gsea_MSIGDB.tsv")
F_filename <- paste0(output_data_dir, "CP_", female_prefix, "gsea_MSIGDB.tsv")
M_data <- read.delim(M_filename, row.names = 1, stringsAsFactors = FALSE) %>%
select(ID, Description, p.adjust)
F_data <- read.delim(F_filename, row.names = 1, stringsAsFactors = FALSE) %>%
select(ID, Description, p.adjust)
gsp_data <- rbind(gsp_data, compute_delta(M_data, F_data) %>%
mutate(tool = "CP", source = "MSIGDB"))
rm(M_filename, F_filename, M_data, F_data)
#
# REACTOME by CP
#
M_filename <- paste0(output_data_dir, "CP_", male_prefix, "gsea_REACTOME.tsv")
F_filename <- paste0(output_data_dir, "CP_", female_prefix, "gsea_REACTOME.tsv")
M_data <- read.delim(M_filename, row.names = 1, stringsAsFactors = FALSE) %>%
select(ID, Description, p.adjust)
F_data <- read.delim(F_filename, row.names = 1, stringsAsFactors = FALSE) %>%
select(ID, Description, p.adjust)
gsp_data <- rbind(gsp_data, compute_delta(M_data, F_data) %>%
mutate(tool = "CP", source = "REACTOME"))
rm(M_filename, F_filename, M_data, F_data)
#
# GO-BP by CP
#
M_filename <- paste0(output_data_dir, "CP_", male_prefix, "gsea_GOBP.tsv")
F_filename <- paste0(output_data_dir, "CP_", female_prefix, "gsea_GOBP.tsv")
M_data <- read.delim(M_filename, row.names = 1, stringsAsFactors = FALSE) %>%
select(ID, Description, p.adjust)
F_data <- read.delim(F_filename, row.names = 1, stringsAsFactors = FALSE) %>%
select(ID, Description, p.adjust)
gsp_data <- rbind(gsp_data, compute_delta(M_data, F_data) %>%
mutate(tool = "CP", source = "GOBP"))
rm(M_filename, F_filename, M_data, F_data)
#
# GO-CC by CP
#
M_filename <- paste0(output_data_dir, "CP_", male_prefix, "gsea_GOCC.tsv")
F_filename <- paste0(output_data_dir, "CP_", female_prefix, "gsea_GOCC.tsv")
M_data <- read.delim(M_filename, row.names = 1, stringsAsFactors = FALSE) %>%
select(ID, Description, p.adjust)
F_data <- read.delim(F_filename, row.names = 1, stringsAsFactors = FALSE) %>%
select(ID, Description, p.adjust)
gsp_data <- rbind(gsp_data, compute_delta(M_data, F_data) %>%
mutate(tool = "CP", source = "GOCC"))
rm(M_filename, F_filename, M_data, F_data)
#
# GO-MF by CP
#
M_filename <- paste0(output_data_dir, "CP_", male_prefix, "gsea_GOMF.tsv")
F_filename <- paste0(output_data_dir, "CP_", female_prefix, "gsea_GOMF.tsv")
M_data <- read.delim(M_filename, row.names = 1, stringsAsFactors = FALSE) %>%
select(ID, Description, p.adjust)
F_data <- read.delim(F_filename, row.names = 1, stringsAsFactors = FALSE) %>%
select(ID, Description, p.adjust)
gsp_data <- rbind(gsp_data, compute_delta(M_data, F_data) %>%
mutate(tool = "CP", source = "GOMF"))
rm(M_filename, F_filename, M_data, F_data)
#
# MSIGDB by G2P
#
M_filename <- paste0(output_data_dir, "G2P_", male_prefix, "MSIGDB.tsv")
F_filename <- paste0(output_data_dir, "G2P_", female_prefix, "MSIGDB.tsv")
M_data <- read.delim(M_filename, row.names = 1, stringsAsFactors = FALSE)
M_data$adj_pv = M_data$pv * dim(M_data)[1]
M_data$adj_pv[M_data$adj_pv > 1] <- 1
M_data <- M_data %>%
select(pid, pname, adj_pv) %>% filter(!is.na(adj_pv)) %>%
rename(ID = pid, Description = pname, p.adjust = adj_pv)
minp_M <- min(M_data$p.adjust[M_data$p.adjust > 0])
M_data$p.adjust[M_data$p.adjust == 0] <- minp_M / 100
F_data <- read.delim(F_filename, row.names = 1, stringsAsFactors = FALSE)
F_data$adj_pv = F_data$pv * dim(F_data)[1]
F_data$adj_pv[F_data$adj_pv > 1] <- 1
F_data <- F_data %>%
select(pid, pname, adj_pv) %>% filter(!is.na(adj_pv)) %>%
rename(ID = pid, Description = pname, p.adjust = adj_pv)
minp_F <- min(F_data$p.adjust[F_data$p.adjust > 0])
F_data$p.adjust[F_data$p.adjust == 0] <- minp_F / 100
gsp_data <- rbind(gsp_data, compute_delta(M_data, F_data) %>%
mutate(tool = "G2P", source = "MSIGDB"))
rm(M_filename, F_filename, M_data, F_data, minp_M, minp_F)
# We do not run it yet for the ROT and PF tools since there is no obvious p-value to use (there are several).
# We can later adapt this script for these two tools as well.
# Save file with all specific pathways.
ofile <- paste0(output_data_dir, integration$name, "_gender_specific_pathways.tsv")
write.table(gsp_data, file = ofile, sep = "\t", quote = FALSE, col.names = NA)
rm(ofile, gsp_data)
message(paste0("[", Sys.time(), "] [", integration$name, "] Gender specific pathways identified."))
rm(integration, male_prefix, female_prefix)
} # End for each integration.
rm(i)
# Final cleaning.
rm(config, input_data_dir, output_data_dir)
......
......@@ -4,7 +4,7 @@
#SBATCH --mail-user=leon-charles.tranchevent@uni.lu
#SBATCH -N 1
#SBATCH -n 8
#SBATCH --time=0-00:30:00
#SBATCH --time=0-00:01:00
#SBATCH -p batch
#SBATCH --qos=qos-batch
......
......@@ -61,56 +61,81 @@ for (i in seq_len(length(config$integrations))) {
cp_filename <- paste0(output_data_dir, "CP_", analysis_prefix, "gsea_GOBP.tsv")
pf_filename <- paste0(output_data_dir, "PF_", analysis_prefix, "GO-BP.tsv")
combined_fn <- paste0(output_data_dir, "COMB_", analysis_prefix, "GO-BP.tsv")
cp_data <- read.delim(cp_filename, row.names = 1, stringsAsFactors = FALSE)
pf_data <- read.delim(pf_filename, row.names = 1, stringsAsFactors = FALSE)
combined <- merge(x = cp_data, y = pf_data, by = "ID")
write.table(combined, file = combined_fn, sep = "\t", quote = FALSE)
rm(cp_filename, pf_filename, combined_fn, cp_data, pf_data, combined)
if (file.exists(pf_filename)) {
cp_data <- read.delim(cp_filename, row.names = 1, stringsAsFactors = FALSE)
pf_data <- read.delim(pf_filename, row.names = 1, stringsAsFactors = FALSE)
combined <- merge(x = cp_data, y = pf_data, by = "ID")
write.table(combined, file = combined_fn, sep = "\t", quote = FALSE)
rm(cp_data, pf_data, combined)
}
rm(cp_filename, pf_filename, combined_fn)
# III- We merge the GO-CC results by CP and PF.
cp_filename <- paste0(output_data_dir, "CP_", analysis_prefix, "gsea_GOCC.tsv")
pf_filename <- paste0(output_data_dir, "PF_", analysis_prefix, "GO-CC.tsv")
combined_fn <- paste0(output_data_dir, "COMB_", analysis_prefix, "GO-CC.tsv")
cp_data <- read.delim(cp_filename, row.names = 1, stringsAsFactors = FALSE)
pf_data <- read.delim(pf_filename, row.names = 1, stringsAsFactors = FALSE)
combined <- merge(x = cp_data, y = pf_data, by = "ID")
write.table(combined, file = combined_fn, sep = "\t", quote = FALSE)
rm(cp_filename, pf_filename, combined_fn, cp_data, pf_data, combined)
if (file.exists(pf_filename)) {
cp_data <- read.delim(cp_filename, row.names = 1, stringsAsFactors = FALSE)
pf_data <- read.delim(pf_filename, row.names = 1, stringsAsFactors = FALSE)
combined <- merge(x = cp_data, y = pf_data, by = "ID")
write.table(combined, file = combined_fn, sep = "\t", quote = FALSE)
rm(cp_data, pf_data, combined)
}
rm(cp_filename, pf_filename, combined_fn)
# IV- We merge the GO-MF results by CP and PF.
cp_filename <- paste0(output_data_dir, "CP_", analysis_prefix, "gsea_GOMF.tsv")
pf_filename <- paste0(output_data_dir, "PF_", analysis_prefix, "GO-MF.tsv")
combined_fn <- paste0(output_data_dir, "COMB_", analysis_prefix, "GO-MF.tsv")
cp_data <- read.delim(cp_filename, row.names = 1, stringsAsFactors = FALSE)
pf_data <- read.delim(pf_filename, row.names = 1, stringsAsFactors = FALSE)
combined <- merge(x = cp_data, y = pf_data, by = "ID")
write.table(combined, file = combined_fn, sep = "\t", quote = FALSE)
rm(cp_filename, pf_filename, combined_fn, cp_data, pf_data, combined)
if (file.exists(pf_filename)) {
cp_data <- read.delim(cp_filename, row.names = 1, stringsAsFactors = FALSE)
pf_data <- read.delim(pf_filename, row.names = 1, stringsAsFactors = FALSE)
combined <- merge(x = cp_data, y = pf_data, by = "ID")
write.table(combined, file = combined_fn, sep = "\t", quote = FALSE)
rm(cp_data, pf_data, combined)
}
rm(cp_filename, pf_filename, combined_fn)
# V- We merge the REACTOME results by CP and PF.
cp_filename <- paste0(output_data_dir, "CP_", analysis_prefix, "gsea_REACTOME.tsv")
pf_filename <- paste0(output_data_dir, "PF_", analysis_prefix, "Reactome.tsv")
combined_fn <- paste0(output_data_dir, "COMB_", analysis_prefix, "REACTOME.tsv")
cp_data <- read.delim(cp_filename, row.names = 1, stringsAsFactors = FALSE)
pf_data <- read.delim(pf_filename, row.names = 1, stringsAsFactors = FALSE)
combined <- merge(x = cp_data, y = pf_data, by = "ID")
write.table(combined, file = combined_fn, sep = "\t", quote = FALSE)
rm(cp_filename, pf_filename, combined_fn, cp_data, pf_data, combined)
if (file.exists(pf_filename)) {
cp_data <- read.delim(cp_filename, row.names = 1, stringsAsFactors = FALSE)
pf_data <- read.delim(pf_filename, row.names = 1, stringsAsFactors = FALSE)
combined <- merge(x = cp_data, y = pf_data, by = "ID")
write.table(combined, file = combined_fn, sep = "\t", quote = FALSE)
rm(cp_data, pf_data, combined)
}
rm(cp_filename, pf_filename, combined_fn)
# VI- We merge the KEGG results by CP, ROT and PF.
cp_filename <- paste0(output_data_dir, "CP_", analysis_prefix, "gsea_KEGG.tsv")
rot_filename <- paste0(output_data_dir, "ROT_", analysis_prefix, "KEGG.tsv")
pf_filename <- paste0(output_data_dir, "PF_", analysis_prefix, "KEGG.tsv")
combined_fn <- paste0(output_data_dir, "COMB_", analysis_prefix, "KEGG.tsv")
cp_data <- read.delim(cp_filename, row.names = 1, stringsAsFactors = FALSE)
rot_data <- read.delim(rot_filename, stringsAsFactors = FALSE) %>%
mutate(ID = str_replace(X, "path:", ""))
pf_data <- read.delim(pf_filename, row.names = 1, stringsAsFactors = FALSE)
combined <- merge(x = merge(x = cp_data, y = rot_data, by = "ID"),
y = pf_data, by = "ID")
write.table(combined, file = combined_fn, sep = "\t", quote = FALSE)
rm(cp_filename, rot_filename, pf_filename, combined_fn, cp_data, rot_data, pf_data, combined)
if (file.exists(pf_filename)) {
cp_data <- read.delim(cp_filename, row.names = 1, stringsAsFactors = FALSE)
rot_data <- read.delim(rot_filename, stringsAsFactors = FALSE) %>%
mutate(ID = str_replace(X, "path:", ""))
pf_data <- read.delim(pf_filename, row.names = 1, stringsAsFactors = FALSE)
combined <- merge(x = merge(x = cp_data, y = rot_data, by = "ID"),
y = pf_data, by = "ID")
write.table(combined, file = combined_fn, sep = "\t", quote = FALSE)
rm(cp_data, rot_data, pf_data, combined)
} else {
cp_data <- read.delim(cp_filename, row.names = 1, stringsAsFactors = FALSE)
rot_data <- read.delim(rot_filename, stringsAsFactors = FALSE) %>%
mutate(ID = str_replace(X, "path:", ""))
combined <- merge(x = cp_data, y = rot_data, by = "ID")
write.table(combined, file = combined_fn, sep = "\t", quote = FALSE)
rm(cp_data, rot_data, combined)
}
rm(cp_filename, rot_filename, pf_filename, combined_fn)
message(paste0("[", Sys.time(), "][", analysis_prefix, "] Combining enrichment results is done."))
rm(analysis_prefix)
# We only have PF results if we have a specific file for that limma comparison,
# (e.g., PDVsControl_male -> PDVsControl_male_specific).
if (limma_parameters$can_be_specific == "TRUE") {
......@@ -139,13 +164,14 @@ for (i in seq_len(length(config$integrations))) {
combined <- merge(x = cp_data, y = rot_data, by = "ID")
write.table(combined, file = combined_fn, sep = "\t", quote = FALSE)
rm(cp_filename, rot_filename, combined_fn, cp_data, rot_data, combined)
message(paste0("[", Sys.time(), "][", analysis_prefix, "] Combining enrichment results is done."))
rm(analysis_prefix)
} # End of depending on the input files, we have PF results or not.
message(paste0("[", Sys.time(), "][", analysis_prefix, "] Combining enrichment results is done."))
rm(analysis_prefix, limma_parameters)
rm(limma_parameters)
} # End for each limma comparison.
rm(j, integration)
rm(j, integration)
} # End for each integration.
rm(i)
......
......@@ -4,7 +4,7 @@
#SBATCH --mail-user=leon-charles.tranchevent@uni.lu
#SBATCH -N 1
#SBATCH -n 8
#SBATCH --time=0-00:30:00
#SBATCH --time=0-00:01:00
#SBATCH -p batch
#SBATCH --qos=qos-batch
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment