Commit d7b41601 authored by Leon-Charles Tranchevent's avatar Leon-Charles Tranchevent
Browse files

Added code to select the most interesting interactions from given regulatory...

Added code to select the most interesting interactions from given regulatory networks (based on differential expression data and consistency with knowledge databases).
parent c9b5b26b
......@@ -33,13 +33,16 @@ refineGRN1:
@echo "* LOCATED IN THE DATA FOLDER. MISSING GENES SHOULD BE DOUBLE CHECKED IN *"
@echo "* GENEGO. *"
@echo "********************************************************************************"
# End of "From now on, we need GeneGo results".
refineGRN2:
@sbatch ${CODE_FOLDER}refine_GRN.sh
refineEnrich:
@sbatch ${CODE_FOLDER}refine_enrich.sh
prepCarnival:
@sbatch ${CODE_FOLDER}prepare_carnival.sh
runCarnival:
@sbatch ${CODE_FOLDER}run_carnival.sh
refineCarnival:
@sbatch ${CODE_FOLDER}refine_carnival.sh
#prepCarnival:
# @sbatch ${CODE_FOLDER}prepare_carnival.sh
#runCarnival:
# @sbatch ${CODE_FOLDER}run_carnival.sh
#refineCarnival:
# @sbatch ${CODE_FOLDER}refine_carnival.sh
selectInts:
@sbatch ${CODE_FOLDER}select_interactions.sh
#!/usr/bin/env Rscript
# ================================================================================================
# Libraries
# ================================================================================================
library("yaml")
library("stringr")
library("tidyverse")
library("viper")
source("../libs/conf/confR.R")
source("../libs/utils/utils.R")
message(paste0("[", Sys.time(), "] Libraries loaded."))
# ================================================================================================
# Configuration
# ================================================================================================
options(bitmapType = "cairo")
config <- read_config(config_dirs = c("../Confs/", "./"))
output_data_dir <- paste0(config$global_data_dir, config$local_data_dir)
input_data_dir <- paste0(config$global_data_dir, config$local_input_data_dir)
message(paste0("[", Sys.time(), "] Configuration done."))
# ================================================================================================
# Functions
# ================================================================================================
#' @title Duplicate rows by splitting on the first cell.
#'
#' @description This function takes one row and split its first cell.
#' It then creates the necessary extra rows while copying the second cell.
#' Ex: "a|b", "whatever" --> "a", "whatever"
#' "b", "whatever"
#'
#' @param row A vector of two values consisting of a set of values (possibly only one) and
#' some other value on the second cell (Not to be splitted).
#' @param sep The string separator for the split.
#' @return A matrix of rows (possibly only one row).
duplicate_row_first_cell <- function(row, sep = "|") {
keys <- strsplit(row[1], sep, fixed = TRUE)[[1]]
val_1 <- rep(row[2], length(keys))
val_2 <- rep(row[3], length(keys))
val_3 <- rep(row[4], length(keys))
to_ret <- cbind(Gene = keys, LFC = val_1, P_value = val_2, adj_P_value = val_3)
return(to_ret)
}
# ================================================================================================
# Main
# ================================================================================================
# Read the GeneGo and DoRothEA data (regulatory networks extracted from their databases).
dr_data_fn <- paste0(output_data_dir, "SNage_VSN_PDVsControl_all_lists_dorothea_GRN_internal_refined.tsv")
dr_data <- read.delim(dr_data_fn, header = TRUE, stringsAsFactors = FALSE)
gg_data_fn <- paste0(output_data_dir, "SNage_VSN_PDVsControl_all_lists_genego_GRN_internal_refined.tsv")
gg_data <- read.delim(gg_data_fn, header = TRUE, stringsAsFactors = FALSE)
rm(dr_data_fn, gg_data_fn)
# For the computations, we simply put all unknown log FC values to 0 and the adjusted P values to 1.
# This simplification is useful for Cytoscape.
dr_data$Source.F.logFC[is.na(dr_data$Source.F.logFC)] <- 0
dr_data$Target.F.logFC[is.na(dr_data$Target.F.logFC)] <- 0
dr_data$Source.M.logFC[is.na(dr_data$Source.M.logFC)] <- 0
dr_data$Target.M.logFC[is.na(dr_data$Target.M.logFC)] <- 0
dr_data$Source.F.adj.P.value[is.na(dr_data$Source.F.adj.P.value)] <- 1
dr_data$Target.F.adj.P.value[is.na(dr_data$Target.F.adj.P.value)] <- 1
dr_data$Source.M.adj.P.value[is.na(dr_data$Source.M.adj.P.value)] <- 1
dr_data$Target.M.adj.P.value[is.na(dr_data$Target.M.adj.P.value)] <- 1
gg_data$Source.F.logFC[is.na(gg_data$Source.F.logFC)] <- 0
gg_data$Target.F.logFC[is.na(gg_data$Target.F.logFC)] <- 0
gg_data$Source.M.logFC[is.na(gg_data$Source.M.logFC)] <- 0
gg_data$Target.M.logFC[is.na(gg_data$Target.M.logFC)] <- 0
gg_data$Source.F.adj.P.value[is.na(gg_data$Source.F.adj.P.value)] <- 1
gg_data$Target.F.adj.P.value[is.na(gg_data$Target.F.adj.P.value)] <- 1
gg_data$Source.M.adj.P.value[is.na(gg_data$Source.M.adj.P.value)] <- 1
gg_data$Target.M.adj.P.value[is.na(gg_data$Target.M.adj.P.value)] <- 1
# We read the list of selected TFs, we will only keep the interactions for these TFs (modulo some other
# conditions - see below).
sel_tfs_fn <- paste0(output_data_dir, "SelectedTFs_fulllist.tsv")
sel_tfs <- read.delim(sel_tfs_fn, header = FALSE, stringsAsFactors = FALSE)$V1
rm(sel_tfs_fn)
# We start by filtering the interactions that are about the selected TFs (either as TF or as target of
# another TF).
dr_data_sel <- rbind(dr_data %>% filter(Source %in% sel_tfs),
dr_data %>% filter(Target %in% sel_tfs))
gg_data_sel <- rbind(gg_data %>% filter(Source %in% sel_tfs),
gg_data %>% filter(Target %in% sel_tfs))
rm(dr_data, gg_data)
# We then filter the links based on the differential expression and the known regulation.
# The idea is to keep an interaction if and only if the experimental signal agrees with
# the known regulation. We start by adding some more fields as to ease the retrieval of the
# suitable interactions.
dr_data_sel <- dr_data_sel %>%
mutate(Source_F_best = abs(Source.F.logFC) > abs(Source.M.logFC)) %>%
mutate(Target_F_best = abs(Target.F.logFC) > abs(Target.M.logFC)) %>%
mutate(Source_best_sign = ifelse(abs(Source.F.logFC) > abs(Source.M.logFC),
sign(Source.F.logFC), sign(Source.M.logFC))) %>%
mutate(Target_best_sign = ifelse(abs(Target.F.logFC) > abs(Target.M.logFC),
sign(Target.F.logFC), sign(Target.M.logFC)))
gg_data_sel <- gg_data_sel %>%
mutate(Source_F_best = abs(Source.F.logFC) > abs(Source.M.logFC)) %>%
mutate(Target_F_best = abs(Target.F.logFC) > abs(Target.M.logFC)) %>%
mutate(Source_best_sign = ifelse(abs(Source.F.logFC) > abs(Source.M.logFC),
sign(Source.F.logFC), sign(Source.M.logFC))) %>%
mutate(Target_best_sign = ifelse(abs(Target.F.logFC) > abs(Target.M.logFC),
sign(Target.F.logFC), sign(Target.M.logFC)))
# We then first select the suitable activations.
dr_data_sel_act <- dr_data_sel %>% filter(Regulation == "Activation") %>%
filter(Source_F_best == Target_F_best,
Source_best_sign == Target_best_sign) %>%
mutate(Obs_regulation = "Activation")
gg_data_sel_act <- gg_data_sel %>% filter(Regulation == "Activation") %>%
filter(Source_F_best == Target_F_best,
Source_best_sign == Target_best_sign) %>%
mutate(Obs_regulation = "Activation")
# Then, the inhibitions.
dr_data_sel_inh <- dr_data_sel %>% filter(Regulation == "Inhibition") %>%
filter(Source_F_best == Target_F_best,
Source_best_sign != Target_best_sign) %>%
mutate(Obs_regulation = "Inhibition")
gg_data_sel_inh <- gg_data_sel %>% filter(Regulation == "Inhibition") %>%
filter(Source_F_best == Target_F_best,
Source_best_sign != Target_best_sign) %>%
mutate(Obs_regulation = "Inhibition")
# Then, the unspecified. There we only check whether these could be activations or inhibitions or
# whether they do not make sense (in case we discard them).
dr_data_sel_unspe_act <- dr_data_sel %>% filter(Regulation == "Unspecified") %>%
filter(Source_F_best == Target_F_best,
Source_best_sign == Target_best_sign) %>%
mutate(Obs_regulation = "Activation")
gg_data_sel_unspe_act <- gg_data_sel %>% filter(Regulation == "Unspecified") %>%
filter(Source_F_best == Target_F_best,
Source_best_sign == Target_best_sign) %>%
mutate(Obs_regulation = "Activation")
dr_data_sel_unspe_inh <- dr_data_sel %>% filter(Regulation == "Unspecified") %>%
filter(Source_F_best == Target_F_best,
Source_best_sign != Target_best_sign) %>%
mutate(Obs_regulation = "Inhibition")
gg_data_sel_unspe_inh <- gg_data_sel %>% filter(Regulation == "Unspecified") %>%
filter(Source_F_best == Target_F_best,
Source_best_sign != Target_best_sign) %>%
mutate(Obs_regulation = "Inhibition")
# We concatenate all selected interactions and save the files.
dr_data_sel_final <- rbind(dr_data_sel_act, dr_data_sel_inh, dr_data_sel_unspe_act, dr_data_sel_unspe_inh) %>% unique()
gg_data_sel_final <- rbind(gg_data_sel_act, gg_data_sel_inh, gg_data_sel_unspe_act, gg_data_sel_unspe_inh) %>% unique()
rm(dr_data_sel, dr_data_sel_act, dr_data_sel_inh, dr_data_sel_unspe_act, dr_data_sel_unspe_inh)
rm(gg_data_sel, gg_data_sel_act, gg_data_sel_inh, gg_data_sel_unspe_act, gg_data_sel_unspe_inh)
dr_data_fn <- paste0(output_data_dir, "SNage_VSN_PDVsControl_all_lists_dorothea_GRN_internal_refined_selected.tsv")
gg_data_fn <- paste0(output_data_dir, "SNage_VSN_PDVsControl_all_lists_genego_GRN_internal_refined_selected.tsv")
write.table(dr_data_sel_final, file = dr_data_fn, sep = "\t", quote = FALSE, row.names = FALSE)
write.table(gg_data_sel_final, file = gg_data_fn, sep = "\t", quote = FALSE, row.names = FALSE)
rm(dr_data_fn, gg_data_fn)
# We prepare the pseudo SIF files (for Cytoscape).
dr_data_assif <- dr_data_sel_final %>% mutate(sif = paste(Source, Obs_regulation, Target)) %>% select(sif)
gg_data_assif <- gg_data_sel_final %>% mutate(sif = paste(Source, Obs_regulation, Target)) %>% select(sif)
# We prepare the annotations (for Cytoscape again).
dr_data_annots <- rbind(dr_data_sel_final %>%
select(Source, Source.F.logFC, Source.F.adj.P.value, Source.M.logFC, Source.M.adj.P.value) %>%
unique() %>%
rename(Gene = Source,
F.logFC = Source.F.logFC,
F.adj.P.value = Source.F.adj.P.value,
M.logFC = Source.M.logFC,
M.adj.P.value = Source.M.adj.P.value),
dr_data_sel_final %>%
select(Target, Target.F.logFC, Target.F.adj.P.value, Target.M.logFC, Target.M.adj.P.value) %>%
unique() %>%
rename(Gene = Target,
F.logFC = Target.F.logFC,
F.adj.P.value = Target.F.adj.P.value,
M.logFC = Target.M.logFC,
M.adj.P.value = Target.M.adj.P.value)) %>%
mutate(is_TF = case_when(Gene %in% sel_tfs ~ 1, TRUE ~ 0)) %>% unique() %>%
mutate(core_set = (is_TF | abs(F.logFC - M.logFC) >= 1 | sign(F.logFC) == -sign(M.logFC)))
gg_data_annots <- rbind(gg_data_sel_final %>%
select(Source, Source.F.logFC, Source.F.adj.P.value, Source.M.logFC, Source.M.adj.P.value) %>%
unique() %>%
rename(Gene = Source,
F.logFC = Source.F.logFC,
F.adj.P.value = Source.F.adj.P.value,
M.logFC = Source.M.logFC,
M.adj.P.value = Source.M.adj.P.value),
gg_data_sel_final %>%
select(Target, Target.F.logFC, Target.F.adj.P.value, Target.M.logFC, Target.M.adj.P.value) %>%
unique() %>%
rename(Gene = Target,
F.logFC = Target.F.logFC,
F.adj.P.value = Target.F.adj.P.value,
M.logFC = Target.M.logFC,
M.adj.P.value = Target.M.adj.P.value)) %>%
mutate(is_TF = case_when(Gene %in% sel_tfs ~ 1, TRUE ~ 0)) %>% unique() %>%
mutate(core_set = (is_TF | abs(F.logFC - M.logFC) >= 1 | sign(F.logFC) == -sign(M.logFC)))
rm(dr_data_sel_final, gg_data_sel_final)
# Save the network and annot files.
dr_data_sif_fn <- paste0(output_data_dir, "SNage_VSN_PDVsControl_all_lists_dorothea_GRN_internal_refined_selected_network.sif")
write.table(dr_data_assif, file = dr_data_sif_fn, sep = "\t", quote = FALSE, row.names = FALSE, col.names = FALSE)
gg_data_sif_fn <- paste0(output_data_dir, "SNage_VSN_PDVsControl_all_lists_genego_GRN_internal_refined_selected_network.sif")
write.table(gg_data_assif, file = gg_data_sif_fn, sep = "\t", quote = FALSE, row.names = FALSE, col.names = FALSE)
rm(dr_data_sif_fn, dr_data_assif, gg_data_sif_fn, gg_data_assif)
dr_data_annots_fn <- paste0(output_data_dir, "SNage_VSN_PDVsControl_all_lists_dorothea_GRN_internal_refined_selected_annotations.tsv")
write.table(dr_data_annots, file = dr_data_annots_fn, sep = "\t", quote = FALSE, row.names = FALSE)
gg_data_annots_fn <- paste0(output_data_dir, "SNage_VSN_PDVsControl_all_lists_genego_GRN_internal_refined_selected_annotations.tsv")
write.table(gg_data_annots, file = gg_data_annots_fn, sep = "\t", quote = FALSE, row.names = FALSE)
rm(dr_data_annots_fn, dr_data_annots, gg_data_annots_fn, gg_data_annots)
# In a second step, we now take care of the regulons interactions that are obtained via the R package.
# These are the interactions that are being used by DoRothEA to compute the enrichment and they can be
# different than the ones DoRothEA retrieves from OmniPath.
# There is more work in this case as the annotations (logFC and adjusted P values) are missing.
# Load TF regulon genesets in VIPER format (as in DoRothEA manual).
regulon_path <- paste0(config$global_raw_data_dir, "Else/DoRothEA/data/TFregulons/consensus/",
"Robjects_VIPERformat/normal/TOP10score_viperRegulon.rdata")
load(regulon_path)
rm(regulon_path)
# We update the regulon object here based on the DoRothEA_unmapped_mapped_clean.tsv file.
update_info_fn <- paste0(output_data_dir, "Dorothea_geneids_unmapped_mapped_clean.tsv")
update_info <- read.delim(update_info_fn, stringsAsFactors = FALSE, header = FALSE)
rm(update_info_fn)
# We get the viper names (also has evidence level, aka EGFR_A).
vp_fullnames <- unlist(str_split(names(viper_regulon), "_"))
dim(vp_fullnames) <- c(2, length(names(viper_regulon)))
vp_names <- vp_fullnames[1, ]
# We udpate the names and then the object.
for (ii in seq_len(dim(update_info)[1])) {
vp_names[vp_names == update_info[ii, 1]] <- update_info[ii, 2]
}
names(viper_regulon) <- paste(vp_names, vp_fullnames[2, ], sep = "_")
rm(update_info, vp_fullnames, vp_names, ii)
# Update the list of all known TFs in viper_regulon.
vp_fullnames <- unlist(str_split(names(viper_regulon), "_"))
dim(vp_fullnames) <- c(2, length(names(viper_regulon)))
# We get the relevant lists of DEGs for the PDVSControl_males and PDVSControl_females, both for
# the third integration scheme (SNage).
integration <- config$integrations[[3]]
limma_parameters <- config$limma_analyses[[5]]
analysis_prefix_F <- paste0(integration$name, "_VSN_", limma_parameters$name, "_max-avg")
ranking_file_F <- paste0(input_data_dir, analysis_prefix_F, "_all_pivalue_rankings.tsv")
limma_parameters <- config$limma_analyses[[6]]
analysis_prefix_M <- paste0(integration$name, "_VSN_", limma_parameters$name, "_max-avg")
ranking_file_M <- paste0(input_data_dir, analysis_prefix_M, "_all_pivalue_rankings.tsv")
rm(integration, limma_parameters)
# We read the prepared rankings and keep only the values we will need for the annotations.
ranking_F <- read.delim(ranking_file_F, stringsAsFactors = FALSE) %>%
select(Gene, log_fold_change, P_value, adj_P_value)
ranking_M <- read.delim(ranking_file_M, stringsAsFactors = FALSE) %>%
select(Gene, log_fold_change, P_value, adj_P_value)
rm(ranking_file_F, analysis_prefix_F, ranking_file_M, analysis_prefix_M)
# We clean genes (no duplicate entries with pipe separated gene symbols).
if (sum(grepl("\\|", ranking_F$Gene)) > 0) {
tmp_res <- t(apply(ranking_F, 1, duplicate_row_first_cell))
ranking_clean_F <- data.frame(do.call(rbind, c(tmp_res)),
row.names = NULL,
stringsAsFactors = FALSE)
ranking_clean_F$LFC <- as.numeric(ranking_clean_F$LFC)
ranking_clean_F$P_value <- as.numeric(ranking_clean_F$P_value)
ranking_clean_F$adj_P_value <- as.numeric(ranking_clean_F$adj_P_value)
ranking_clean_F <- ranking_clean_F %>% arrange(adj_P_value)
rm(tmp_res)
} else {
ranking_clean_F <- ranking_F
ranking_clean_F$LFC <- as.numeric(ranking_clean_F$ref_logFC)
ranking_clean_F$P_value <- as.numeric(ranking_clean_F$P_value)
ranking_clean_F$adj_P_value <- as.numeric(ranking_clean_F$adj_P_value)
ranking_clean_F <- ranking_clean_F %>% arrange(adj_P_value)
}
row.names(ranking_clean_F) <- ranking_clean_F$Gene
rm(ranking_F)
if (sum(grepl("\\|", ranking_M$Gene)) > 0) {
tmp_res <- t(apply(ranking_M, 1, duplicate_row_first_cell))
ranking_clean_M <- data.frame(do.call(rbind, c(tmp_res)),
row.names = NULL,
stringsAsFactors = FALSE)
ranking_clean_M$LFC <- as.numeric(ranking_clean_M$LFC)
ranking_clean_M$P_value <- as.numeric(ranking_clean_M$P_value)
ranking_clean_M$adj_P_value <- as.numeric(ranking_clean_M$adj_P_value)
ranking_clean_M <- ranking_clean_M %>% arrange(adj_P_value)
rm(tmp_res)
} else {
ranking_clean_M <- ranking_M
ranking_clean_M$LFC <- as.numeric(ranking_clean_M$ref_logFC)
ranking_clean_M$P_value <- as.numeric(ranking_clean_M$P_value)
ranking_clean_M$adj_P_value <- as.numeric(ranking_clean_M$adj_P_value)
ranking_clean_M <- ranking_clean_M %>% arrange(adj_P_value)
}
row.names(ranking_clean_M) <- ranking_clean_M$Gene
rm(ranking_M)
# We merge both lists to get the list of all significantly expressed genes.
all_diff_exp_genes <- c((ranking_clean_F %>% filter(adj_P_value < 0.05) %>% select(Gene))$Gene,
(ranking_clean_M %>% filter(adj_P_value < 0.05) %>% select(Gene))$Gene)
# We gather the evidence level of the seletedTFs as the keys in the viper_regulon object
# is the transcription factor name followed by its evidence level.
names <- vp_fullnames[1, ]
evidence_levels <- vp_fullnames[2, ]
sel_tfs_keys <- paste(names[vp_fullnames[1, ] %in% sel_tfs],
evidence_levels[vp_fullnames[1, ] %in% sel_tfs], sep = "_")
rm(vp_fullnames, names, evidence_levels)
nmax <- 1000
vr_data <- data.frame(Source = rep(NA, nmax),
Target = rep(NA, nmax),
Regulation = rep(NA, nmax),
Source.F.logFC = rep(NA, nmax),
Source.F.adj.P.value = rep(NA, nmax),
Source.M.logFC = rep(NA, nmax),
Source.M.adj.P.value = rep(NA, nmax),
Target.F.logFC = rep(NA, nmax),
Target.F.adj.P.value = rep(NA, nmax),
Target.M.logFC = rep(NA, nmax),
Target.M.adj.P.value = rep(NA, nmax)
)
rm(nmax)
idf <- 1
for (key in sel_tfs_keys) {
raw_links <- viper_regulon[[key]]$tfmode
key_name <- unlist(str_split(key, "_"))[1]
# We keep only the links for which the target is differentially expressed.
links <- raw_links[names(raw_links) %in% all_diff_exp_genes]
# We update the dataframe.
for (target in names(links)) {
vr_data[idf, ]$Source <- key_name
vr_data[idf, ]$Target <- target
vr_data[idf, ]$Regulation <- "Activation"
if (links[target] == -1) {
vr_data[idf, ]$Regulation <- "Inhibition"
}
vr_data[idf, ]$Source.F.logFC <- ranking_clean_F[key_name, ]$LFC
vr_data[idf, ]$Source.F.adj.P.value <- ranking_clean_F[key_name, ]$adj_P_value
vr_data[idf, ]$Source.M.logFC <- ranking_clean_M[key_name, ]$LFC
vr_data[idf, ]$Source.M.adj.P.value <- ranking_clean_M[key_name, ]$adj_P_value
vr_data[idf, ]$Target.F.logFC <- ranking_clean_F[target, ]$LFC
vr_data[idf, ]$Target.F.adj.P.value <- ranking_clean_F[target, ]$adj_P_value
vr_data[idf, ]$Target.M.logFC <- ranking_clean_M[target, ]$LFC
vr_data[idf, ]$Target.M.adj.P.value <- ranking_clean_M[target, ]$adj_P_value
idf <- idf + 1
} # End of for target in names(links).
rm(target)
rm(raw_links, key_name, links)
} # End of for key in sel_tfs_keys.
rm(key)
vr_data <- vr_data[1:(idf-1),]
rm(viper_regulon, idf, sel_tfs_keys)
rm(all_diff_exp_genes, ranking_clean_F, ranking_clean_M)
# We now have approxiamtelly the same format as for DR and GG so we follow the same procedure.
# Code have been copied from above.
vr_data$Source.F.logFC[is.na(vr_data$Source.F.logFC)] <- 0
vr_data$Target.F.logFC[is.na(vr_data$Target.F.logFC)] <- 0
vr_data$Source.M.logFC[is.na(vr_data$Source.M.logFC)] <- 0
vr_data$Target.M.logFC[is.na(vr_data$Target.M.logFC)] <- 0
vr_data$Source.F.adj.P.value[is.na(vr_data$Source.F.adj.P.value)] <- 1
vr_data$Target.F.adj.P.value[is.na(vr_data$Target.F.adj.P.value)] <- 1
vr_data$Source.M.adj.P.value[is.na(vr_data$Source.M.adj.P.value)] <- 1
vr_data$Target.M.adj.P.value[is.na(vr_data$Target.M.adj.P.value)] <- 1
# No filtering in this case as the filtering already took place while building the dataframe.
vr_data_sel <-vr_data
rm(vr_data)
# We then filter the links based on the differential expression and the known regulation.
# The idea is to keep an interaction if and only if the experimental signal agrees with
# the known regulation. We start by adding some more fields as to ease the retrieval of the
# suitable interactions.
vr_data_sel <- vr_data_sel %>%
mutate(Source_F_best = abs(Source.F.logFC) > abs(Source.M.logFC)) %>%
mutate(Target_F_best = abs(Target.F.logFC) > abs(Target.M.logFC)) %>%
mutate(Source_best_sign = ifelse(abs(Source.F.logFC) > abs(Source.M.logFC),
sign(Source.F.logFC), sign(Source.M.logFC))) %>%
mutate(Target_best_sign = ifelse(abs(Target.F.logFC) > abs(Target.M.logFC),
sign(Target.F.logFC), sign(Target.M.logFC)))
# We then first select the suitable activations.
vr_data_sel_act <- vr_data_sel %>% filter(Regulation == "Activation") %>%
filter(Source_F_best == Target_F_best,
Source_best_sign == Target_best_sign) %>%
mutate(Obs_regulation = "Activation")
# Then, the inhibitions.
vr_data_sel_inh <- vr_data_sel %>% filter(Regulation == "Inhibition") %>%
filter(Source_F_best == Target_F_best,
Source_best_sign != Target_best_sign) %>%
mutate(Obs_regulation = "Inhibition")
# No unspecified for viper_regulon.
# We concatenate all selected interactions and save the files.
vr_data_sel_final <- rbind(vr_data_sel_act, vr_data_sel_inh) %>% unique()
rm(vr_data_sel, vr_data_sel_act, vr_data_sel_inh)
vr_data_fn <- paste0(output_data_dir, "SNage_VSN_PDVsControl_all_lists_regulon_GRN_internal_refined_selected.tsv")
write.table(vr_data_sel_final, file = vr_data_fn, sep = "\t", quote = FALSE, row.names = FALSE)
rm(vr_data_fn)
# We prepare the pseudo SIF files (for Cytoscape).
vr_data_assif <- vr_data_sel_final %>% mutate(sif = paste(Source, Obs_regulation, Target)) %>% select(sif)
# We prepare the annotations (for Cytoscape again).
vr_data_annots <- rbind(vr_data_sel_final %>%
select(Source, Source.F.logFC, Source.F.adj.P.value, Source.M.logFC, Source.M.adj.P.value) %>%
unique() %>%
rename(Gene = Source,
F.logFC = Source.F.logFC,
F.adj.P.value = Source.F.adj.P.value,
M.logFC = Source.M.logFC,
M.adj.P.value = Source.M.adj.P.value),
vr_data_sel_final %>%
select(Target, Target.F.logFC, Target.F.adj.P.value, Target.M.logFC, Target.M.adj.P.value) %>%
unique() %>%
rename(Gene = Target,
F.logFC = Target.F.logFC,
F.adj.P.value = Target.F.adj.P.value,
M.logFC = Target.M.logFC,
M.adj.P.value = Target.M.adj.P.value)) %>%
mutate(is_TF = case_when(Gene %in% sel_tfs ~ 1, TRUE ~ 0)) %>% unique() %>%
mutate(core_set = (is_TF | abs(F.logFC - M.logFC) >= 1 | sign(F.logFC) == -sign(M.logFC)))
rm(vr_data_sel_final, sel_tfs)
# Save the network and annot files.
vr_data_sif_fn <- paste0(output_data_dir, "SNage_VSN_PDVsControl_all_lists_regulon_GRN_internal_refined_selected_network.sif")
write.table(vr_data_assif, file = vr_data_sif_fn, sep = "\t", quote = FALSE, row.names = FALSE, col.names = FALSE)
rm(vr_data_sif_fn, vr_data_assif)
vr_data_annots_fn <- paste0(output_data_dir, "SNage_VSN_PDVsControl_all_lists_regulon_GRN_internal_refined_selected_annotations.tsv")
write.table(vr_data_annots, file = vr_data_annots_fn, sep = "\t", quote = FALSE, row.names = FALSE)
rm(vr_data_annots_fn, vr_data_annots)
# We log the session details for reproducibility.
rm(config, output_data_dir, input_data_dir)
sessionInfo()
#!/bin/bash -l
#SBATCH -J geneder:18:carnival
#SBATCH --mail-type=all
#SBATCH --mail-user=leon-charles.tranchevent@uni.lu
#SBATCH -N 1
#SBATCH -n 10
#SBATCH --time=0-01:00:00
#SBATCH -p batch
#SBATCH --qos=qos-batch
echo "== Starting run at $(date)"
echo "== Job ID: ${SLURM_JOBID}"
echo "== Node list: ${SLURM_NODELIST}"
echo "== Submit dir. : ${SLURM_SUBMIT_DIR}"
echo ""
# Loading modules.
module load lang/R/3.6.0-foss-2019a-bare
module load math/CPLEX/12.9-foss-2019a
# Defining global parameters.
INPUT_FOLDER=/home/leon/Projects/GeneDER/Results/Regulatory_v05/Refined/
OUTPUT_FOLDER=/home/users/ltranchevent/Data/GeneDER/Analysis/18/
CODE_FOLDER=/home/users/ltranchevent/Projects/GeneDER/Analysis/18-RegulatoryNetwork_all/
# Copy the data locally (teh manually refined GeneGO and DoRothEA files).
cp ${INPUT_FOLDER}SNage_VSN_PDVsControl_all_lists_*_GRN_internal_refined.tsv ${OUTPUT_FOLDER}
cp ${INPUT_FOLDER}SelectedTFs_* ${OUTPUT_FOLDER}
# Actual job in R.
Rscript --vanilla ${CODE_FOLDER}select_interactions.R > ${OUTPUT_FOLDER}select_ints_log.out 2> ${OUTPUT_FOLDER}select_ints_log.err
# We concatenate the individual files to create the global files.
head -n 1 ${OUTPUT_FOLDER}*dorothea*annotations.tsv > ${OUTPUT_FOLDER}SNage_VSN_PDVsControl_all_lists_global_GRN_internal_refined_selected_annotations.tsv
cat ${OUTPUT_FOLDER}*annotations* | grep -v Gene | sort -u >> ${OUTPUT_FOLDER}SNage_VSN_PDVsControl_all_lists_global_GRN_internal_refined_selected_annotations.tsv
cat ${OUTPUT_FOLDER}*_network.sif | sort -u > ${OUTPUT_FOLDER}SNage_VSN_PDVsControl_all_lists_global_GRN_internal_refined_selected_network.sif
# Moving the slurm log file to data
mv ${CODE_FOLDER}/slurm-*out ${OUTPUT_FOLDER}/
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment