Commit a07ae9cd authored by Leon-Charles Tranchevent's avatar Leon-Charles Tranchevent
Browse files

Added the code to create gene expression heatmaps and modified the code that...

Added the code to create gene expression heatmaps and modified the code that creates the pathway visualizations.
parent d7b41601
......@@ -26,3 +26,5 @@ doc:
@sbatch ${CODE_FOLDER}doc.sh
rankings:
@sbatch ${CODE_FOLDER}/prepare_rankings.sh
heatmaps:
@sbatch ${CODE_FOLDER}/create_heatmaps.sh
#!/usr/bin/env Rscript
# ================================================================================================
# Libraries
# ================================================================================================
library("yaml")
library("tidyverse")
library("RColorBrewer")
source("../libs/conf/confR.R")
source("../libs/utils/utils.R")
message(paste0("[", Sys.time(), "] Libraries loaded."))
# ================================================================================================
# Configuration
# ================================================================================================
options(bitmapType = "cairo")
config <- read_config(config_dirs = c("../Confs/", "./"))
output_data_dir <- paste0(config$global_data_dir, config$local_data_dir)
input_data_dir <- paste0(config$global_data_dir, config$local_input_data_dir)
exp_data_dir <- paste0(config$global_data_dir, config$local_exp_data_dir)
rsexp_data_dir <- paste0(config$global_data_dir, config$local_rs_exp_data_dir)
clinical_data_dir <- paste0(config$global_data_dir, config$local_clinical_data_dir)
message(paste0("[", Sys.time(), "] Configuration done."))
# ================================================================================================
# Functions
# ================================================================================================
#' @title Function to create and save a heatmap of a data matrix.
#'
#' @description This function takes a data matrix, creates a configurable heatmap and then save
#' it as an image (PNG). Only the column dendrogram is displayed although rows are also
#' re-ordered. Side colors are used to tage the columns.
#'
#' @param data_mat The data-frame or matrix that contains the data to be plotted.
#' @param tag_colors The array that contains the colors to be used to tag the columns.
#' @param file_name The name of the file to be used to save the image of the heatmap.
#' @param col_dendro A Boolean indicating whether the column dendrogram should be plotted.
#' Default to TRUE. When set to FALSE, the columns are ordered based on their labels.
#' @param main_label A string representing the main label of the figure. Default to ''.
create_heatmap <- function(data_mat, tag_colors, file_name, col_dendro = TRUE, main_label = "") {
# We configure the color palette from Rbrewer.
col_palette <- colorRampPalette(brewer.pal(8, "PuOr"))(25)
# We build the dendrogram outside of heatmap to be able to re-order the rows without
# actually plotting the dendrogram.
myDendro <- as.dendrogram(hclust(dist(data_mat)))
rowInd <- order.dendrogram(myDendro)
# We then either re-order the columns before hand based on labels or let the
# dendrogram do it.
if (col_dendro) {
# We plot the heatmap as a PNG image.
png(file_name)
heatmap(x = as.matrix(data_mat)[rowInd,],
Rowv = NA,
scale = "row",
na.rm = TRUE,
ColSideColors = tag_colors,
col = col_palette,
labRow = NA,
labCol = NA,
main = main_label
)
dev.off()
} else {
# We first need to re-order the rows based on their labels.
names(tag_colors) <- colnames(data_mat)
tag_colors <- tag_colors[sort(colnames(data_mat))]
data_mat <- data_mat[, sort(colnames(data_mat)) ]
# We plot the heatmap as a PNG image.
png(file_name)
heatmap(x = as.matrix(data_mat)[rowInd,],
Rowv = NA,
Colv = NA,
scale = "row",
na.rm = TRUE,
ColSideColors = tag_colors,
col = col_palette,
labRow = NA,
labCol = NA,
main = main_label
)
dev.off()
rm(col_palette, myDendro, rowInd)
}
} # End function create_heatmap.
# ================================================================================================
# Main
# ================================================================================================
# We read the results of the integration (after classification of the genes as GS / GD).
# in order to get the list of genes that will go in the heatmap.
# These genes are only the GS/GD genes (purely PD genes without any gender effect excluded).
# Contrary to create_individual_heatmaps.R, we now only focus on the genes
# found to be differentially expressed across all datasets (after integration).
M_GS_genes_fn <- paste0(output_data_dir, "SNage_VSN_PDVsControl_males_max-avg_gs_pivalue_rankings.tsv")
M_GS_genes <- read.table(file = M_GS_genes_fn,
sep = "\t",
header = TRUE,
row.names = 1,
as.is = TRUE) %>%
filter(adj_P_value < 0.05)
F_GS_genes_fn <- paste0(output_data_dir, "SNage_VSN_PDVsControl_females_max-avg_gs_pivalue_rankings.tsv")
F_GS_genes <- read.table(file = F_GS_genes_fn,
sep = "\t",
header = TRUE,
row.names = 1,
as.is = TRUE) %>%
filter(adj_P_value < 0.05)
M_GD_genes_fn <- paste0(output_data_dir, "SNage_VSN_PDVsControl_males_max-avg_gd_pivalue_rankings.tsv")
M_GD_genes <- read.table(file = M_GD_genes_fn,
sep = "\t",
header = TRUE,
row.names = 1,
as.is = TRUE) %>%
filter(adj_P_value < 0.05)
F_GD_genes_fn <- paste0(output_data_dir, "SNage_VSN_PDVsControl_females_max-avg_gd_pivalue_rankings.tsv")
F_GD_genes <- read.table(file = F_GD_genes_fn,
sep = "\t",
header = TRUE,
row.names = 1,
as.is = TRUE) %>%
filter(adj_P_value < 0.05)
rm(M_GS_genes_fn, F_GS_genes_fn, M_GD_genes_fn, F_GD_genes_fn)
short_gsgd_genes <- c(row.names(M_GS_genes %>% arrange(adj_P_value) %>% head(50)),
row.names(F_GS_genes %>% arrange(adj_P_value) %>% head(50)),
row.names(M_GD_genes %>% arrange(adj_P_value) %>% head(50)),
row.names(F_GD_genes %>% arrange(adj_P_value) %>% head(50)))
full_gsgd_genes <- c(row.names(M_GS_genes),
row.names(F_GS_genes),
row.names(M_GD_genes),
row.names(F_GD_genes))
rm(M_GS_genes, F_GS_genes, M_GD_genes, F_GD_genes)
# We create once for all the list of all datasets (microarray and RNA-seq).
all_datasets <- c(config$datasets, config$rs_datasets)
# These objects will contain the data data across all datasets.
exp_data_full_all <- data.frame()
zsc_data_full_all <- data.frame()
exp_data_short_all <- data.frame()
zsc_data_short_all <- data.frame()
# We only run the 3rd integration scheme.
integration <- config$integrations[[3]]
int_criteria <- str_split(integration$criteria, ",")[[1]]
# We do all datasets one by one.
for (i in seq_len(length(all_datasets))) {
# We get the dataset details.
dataset <- all_datasets[[i]]
dataset_name <- dataset$dataset_name
dataset_arraytype <- dataset$array_type
# We only do the dataset if it corresponds to the integration criteria.
satisfy_all <- TRUE
for (c in seq_len(length(int_criteria))) {
int_criterium <- str_split(int_criteria[c], ";")[[1]]
if (dataset[int_criterium[1]] != int_criterium[2]) {
satisfy_all <- FALSE
}
rm(int_criterium)
}
if (satisfy_all != TRUE) {
rm(satisfy_all, dataset, dataset_name, dataset_arraytype)
next
}
rm(c, satisfy_all)
# We read the expression data (at the gene level) that was previously created.
exp_data_fn <- paste0(output_data_dir, dataset_name, "_VSN_gene_expr_data.tsv")
exp_data <- read.table(file = exp_data_fn,
sep = "\t",
header = TRUE,
as.is = TRUE)
rm(exp_data_fn)
# We compute the Z-scores across all genes.
exp_zdata <- data.frame(as.matrix(scale(exp_data[, 2:ncol(exp_data)])))
exp_zdata$genes <- exp_data$genes
# We then filter the data (processed and Z-scored).
exp_zdata_full <- exp_zdata %>% filter(genes %in% full_gsgd_genes)
exp_data_full <- exp_data %>% filter(genes %in% full_gsgd_genes)
exp_zdata_short <- exp_zdata %>% filter(genes %in% short_gsgd_genes)
exp_data_short <- exp_data %>% filter(genes %in% short_gsgd_genes)
rm(exp_data, exp_zdata)
# We then read the clinical data as well.
local_exp_data_dir <- exp_data_dir
if (dataset$platform == "RNAseq") {
local_exp_data_dir <- rsexp_data_dir
}
clin_data_fn <- paste0(local_exp_data_dir, dataset_name, "_clinical.tsv")
clin_data <- read.table(file = clin_data_fn,
sep = "\t",
header = TRUE,
as.is = TRUE) %>%
mutate(tag = paste(Gender, Disease.status, sep = ".")) %>%
select(X, tag)
rm(clin_data_fn, local_exp_data_dir)
row.names(clin_data) <- str_replace(clin_data$X, ".txt", "")
clin_data <- clin_data %>% select(-X)
# We change the column labels with TAGs (checking order) for easy visualization.
# Order should be the same for data / zdata and full/short except for the Gene column
# that we do not map anyway.
current_labels <- colnames(exp_data_full)[2:ncol(exp_data_full)]
new_labels <- clin_data$tag
names(new_labels) <- row.names(clin_data)
tags <- new_labels[current_labels]
colnames(exp_data_full) [2:ncol(exp_data_full)] <- tags
colnames(exp_zdata_full) [1:ncol(exp_zdata_full) - 1] <- tags
colnames(exp_data_short) [2:ncol(exp_data_short)] <- tags
colnames(exp_zdata_short)[1:ncol(exp_zdata_short) - 1] <- tags
rm(clin_data, current_labels, new_labels)
# Prepare the tag colors
tag_colors <- rep("grey", length(tags))
tag_colors[tags == "F.Control"] <- "#7da7de"
tag_colors[tags == "F.PD"] <- "#00599d"
tag_colors[tags == "M.Control"] <- "#c2e0ae"
tag_colors[tags == "M.PD"] <- "#62a73b"
rm(tags)
# Build and save the heatmaps.
# First, the full expression data.
data_mat <- exp_data_full[2:ncol(exp_data_full)]
hm_fn <- paste0(output_data_dir, dataset_name, "_exp_full_heatmap.png")
create_heatmap(data_mat = data_mat, tag_colors = tag_colors, file_name = hm_fn, main_label = dataset_name)
rm(data_mat, hm_fn)
# Second, the full z-score data.
data_mat <- exp_zdata_full[1:ncol(exp_zdata_full) - 1]
hm_fn <- paste0(output_data_dir, dataset_name, "_zsc_full_heatmap.png")
create_heatmap(data_mat = data_mat, tag_colors = tag_colors, file_name = hm_fn, main_label = dataset_name)
rm(data_mat, hm_fn)
# Third, the short expression data.
data_mat <- exp_data_short[2:ncol(exp_data_short)]
hm_fn <- paste0(output_data_dir, dataset_name, "_exp_short_heatmap.png")
create_heatmap(data_mat = data_mat, tag_colors = tag_colors, file_name = hm_fn, main_label = dataset_name)
rm(data_mat, hm_fn)
# Last, the short z-score data.
data_mat <- exp_zdata_short[1:ncol(exp_zdata_short) - 1]
hm_fn <- paste0(output_data_dir, dataset_name, "_zsc_short_heatmap.png")
create_heatmap(data_mat = data_mat, tag_colors = tag_colors, file_name = hm_fn, main_label = dataset_name)
rm(data_mat, hm_fn, tag_colors)
# Then, we also concatenate the data across datasets to run one single later.
if (dim(exp_data_full_all)[1] == 0) {
exp_data_full_all <- exp_data_full
zsc_data_full_all <- exp_zdata_full
exp_data_short_all <- exp_data_short
zsc_data_short_all <- exp_zdata_short
} else {
exp_data_full_all <- merge(x = exp_data_full_all,
y = exp_data_full,
by = "genes", all = TRUE, suffixes = c("", ""))
zsc_data_full_all <- merge(x = zsc_data_full_all,
y = exp_zdata_full,
by = "genes", all = TRUE, suffixes = c("", ""))
exp_data_short_all <- merge(x = exp_data_short_all,
y = exp_data_short,
by = "genes", all = TRUE, suffixes = c("", ""))
zsc_data_short_all <- merge(x = zsc_data_short_all,
y = exp_zdata_short,
by = "genes", all = TRUE, suffixes = c("", ""))
}
rm(exp_data_full, exp_zdata_full, exp_data_short, exp_zdata_short)
message(paste0("[", Sys.time(), "][", dataset_name, "] Heatmaps created."))
rm(dataset_arraytype, dataset_name, dataset)
} # End for each dataset.
rm(i, all_datasets, integration, int_criteria)
rm(full_gsgd_genes, short_gsgd_genes)
# We then plot giant heatmaps for the merged data.
# Prepare the tag colors
tags <- colnames(exp_data_full_all)[2:ncol(exp_data_full_all)]
tag_colors <- rep("grey", length(tags))
tag_colors[tags == "F.Control"] <- "#7da7de"
tag_colors[tags == "F.PD"] <- "#00599d"
tag_colors[tags == "M.Control"] <- "#c2e0ae"
tag_colors[tags == "M.PD"] <- "#62a73b"
rm(tags)
# Build and save the heatmaps.
# First, the full expression data.
data_mat <- exp_data_full_all[2:ncol(exp_data_full_all)]
hm_fn <- paste0(output_data_dir, "All_exp_full_heatmap.png")
create_heatmap(data_mat = data_mat, tag_colors = tag_colors, file_name = hm_fn, main_label = "All")
rm(hm_fn)
hm_fn <- paste0(output_data_dir, "All_exp_full_heatmap_sorted.png")
create_heatmap(data_mat = data_mat, tag_colors = tag_colors, file_name = hm_fn, col_dendro = FALSE, main_label = "All")
rm(data_mat, hm_fn)
# Second, the full z-score data.
data_mat <- zsc_data_full_all[2:ncol(zsc_data_full_all)]
hm_fn <- paste0(output_data_dir, "All_zsc_full_heatmap.png")
create_heatmap(data_mat = data_mat, tag_colors = tag_colors, file_name = hm_fn, main_label = "All")
rm(hm_fn)
hm_fn <- paste0(output_data_dir, "All_zsc_full_heatmap_sorted.png")
create_heatmap(data_mat = data_mat, tag_colors = tag_colors, file_name = hm_fn, col_dendro = FALSE, main_label = "All")
rm(data_mat, hm_fn)
# Third, the short expression data.
data_mat <- exp_data_short_all[2:ncol(exp_data_short_all)]
hm_fn <- paste0(output_data_dir, "All_exp_short_heatmap.png")
create_heatmap(data_mat = data_mat, tag_colors = tag_colors, file_name = hm_fn, main_label = "All")
rm(hm_fn)
hm_fn <- paste0(output_data_dir, "All_exp_short_heatmap_sorted.png")
create_heatmap(data_mat = data_mat, tag_colors = tag_colors, file_name = hm_fn, col_dendro = FALSE, main_label = "All")
rm(data_mat, hm_fn)
# Last, the short z-score data.
data_mat <- zsc_data_short_all[2:ncol(zsc_data_short_all)]
hm_fn <- paste0(output_data_dir, "All_zsc_short_heatmap.png")
create_heatmap(data_mat = data_mat, tag_colors = tag_colors, file_name = hm_fn, main_label = "All")
rm(hm_fn)
hm_fn <- paste0(output_data_dir, "All_zsc_short_heatmap_sorted.png")
create_heatmap(data_mat = data_mat, tag_colors = tag_colors, file_name = hm_fn, col_dendro = FALSE, main_label = "All")
rm(data_mat, hm_fn, tag_colors)
rm(exp_data_full_all, exp_data_short_all, zsc_data_full_all, zsc_data_short_all)
message(paste0("[", Sys.time(), "] Global heatmaps created."))
# We log the session details for reproducibility.
rm(config, output_data_dir, exp_data_dir, input_data_dir, clinical_data_dir, rsexp_data_dir)
sessionInfo()
#!/bin/bash -l
#SBATCH -J geneder:16:heatmaps
#SBATCH --mail-type=all
#SBATCH --mail-user=leon-charles.tranchevent@uni.lu
#SBATCH -N 1
#SBATCH -n 1
#SBATCH --time=0-0:05:00
#SBATCH -p batch
#SBATCH --qos=qos-batch
echo "== Starting run at $(date)"
echo "== Job ID: ${SLURM_JOBID}"
echo "== Node list: ${SLURM_NODELIST}"
echo "== Submit dir. : ${SLURM_SUBMIT_DIR}"
echo ""
# Defining global parameters.
OUTPUT_FOLDER=/home/users/ltranchevent/Data/GeneDER/Analysis/16/
CODE_FOLDER=/home/users/ltranchevent/Projects/GeneDER/Analysis/16-Data_integration_all/
# Loading modules.
module load lang/R/3.6.0-foss-2019a-bare
# Actual jobs
Rscript --vanilla ${CODE_FOLDER}create_heatmaps.R > ${OUTPUT_FOLDER}heatmaps_log.out 2> ${OUTPUT_FOLDER}heatmaps_log.err
# Moving the slurm log file to data
mv ${CODE_FOLDER}slurm-*out ${OUTPUT_FOLDER}
\ No newline at end of file
#!/usr/bin/env Rscript
# ================================================================================================
# Libraries
# ================================================================================================
library("yaml")
library("tidyverse")
library("RColorBrewer")
source("../libs/conf/confR.R")
source("../libs/utils/utils.R")
message(paste0("[", Sys.time(), "] Libraries loaded."))
# ================================================================================================
# Configuration
# ================================================================================================
options(bitmapType = "cairo")
config <- read_config(config_dirs = c("../Confs/", "./"))
output_data_dir <- paste0(config$global_data_dir, config$local_data_dir)
input_data_dir <- paste0(config$global_data_dir, config$local_input_data_dir)
exp_data_dir <- paste0(config$global_data_dir, config$local_exp_data_dir)
rsexp_data_dir <- paste0(config$global_data_dir, config$local_rs_exp_data_dir)
clinical_data_dir <- paste0(config$global_data_dir, config$local_clinical_data_dir)
message(paste0("[", Sys.time(), "] Configuration done."))
# ================================================================================================
# Functions
# ================================================================================================
#' @title Function to create and save a heatmap of a data matrix.
#'
#' @description This function takes a data matrix, creates a configurable heatmap and then save
#' it as an image (PNG). Only the column dendrogram is displayed although rows are also
#' re-ordered. Side colors are used to tage the columns.
#'
#' @param data_mat The data-frame or matrix that contains the data to be plotted.
#' @param tag_colors The array that contains the colors to be used to tag the columns.
#' @param file_name The name of the file to be used to save the image of the heatmap.
#' @param col_dendro A Boolean indicating whether the column dendrogram should be plotted.
#' Default to TRUE. When set to FALSE, the columns are ordered based on their labels.
#' @param main_label A string representing the main label of the figure. Default to ''.
create_heatmap <- function(data_mat, tag_colors, file_name, col_dendro = TRUE, main_label = "") {
# We configure the color palette from Rbrewer.
col_palette <- colorRampPalette(brewer.pal(8, "PuOr"))(25)
# We build the dendrogram outside of heatmap to be able to re-order the rows without
# actually plotting the dendrogram.
myDendro <- as.dendrogram(hclust(dist(data_mat)))
rowInd <- order.dendrogram(myDendro)
# We then either re-order the columns before hand based on labels or let the
# dendrogram do it.
if (col_dendro) {
# We plot the heatmap as a PNG image.
png(file_name)
heatmap(x = as.matrix(data_mat)[rowInd,],
Rowv = NA,
scale = "row",
na.rm = TRUE,
ColSideColors = tag_colors,
col = col_palette,
labRow = NA,
labCol = NA,
main = main_label
)
dev.off()
} else {
# We first need to re-order the rows based on their labels.
names(tag_colors) <- colnames(data_mat)
tag_colors <- tag_colors[sort(colnames(data_mat))]
data_mat <- data_mat[, sort(colnames(data_mat)) ]
# We plot the heatmap as a PNG image.
png(file_name)
heatmap(x = as.matrix(data_mat)[rowInd,],
Rowv = NA,
Colv = NA,
scale = "row",
na.rm = TRUE,
ColSideColors = tag_colors,
col = col_palette,
labRow = NA,
labCol = NA,
main = main_label
)
dev.off()
rm(col_palette, myDendro, rowInd)
}
} # End function create_heatmap.
# ================================================================================================
# Main
# ================================================================================================
# We read the results of the integration (after classification of the genes as GS / GD).
# in order to get the list of genes that will go in the heatmap.
# These genes are only the GS/GD genes (purely PD genes without any gender effect excluded).
# Contrary to create_heatmaps.R, these are used not as a reference but as a filter
# to filter the differentially expressed genes from each dataset.
M_GS_genes_fn <- paste0(output_data_dir, "SNage_VSN_PDVsControl_males_max-avg_gs_pivalue_rankings.tsv")
M_GS_genes <- read.table(file = M_GS_genes_fn,
sep = "\t",
header = TRUE,
row.names = 1,
as.is = TRUE) %>%
filter(adj_P_value < 0.05)
F_GS_genes_fn <- paste0(output_data_dir, "SNage_VSN_PDVsControl_females_max-avg_gs_pivalue_rankings.tsv")
F_GS_genes <- read.table(file = F_GS_genes_fn,
sep = "\t",
header = TRUE,
row.names = 1,
as.is = TRUE) %>%
filter(adj_P_value < 0.05)
M_GD_genes_fn <- paste0(output_data_dir, "SNage_VSN_PDVsControl_males_max-avg_gd_pivalue_rankings.tsv")
M_GD_genes <- read.table(file = M_GD_genes_fn,
sep = "\t",
header = TRUE,
row.names = 1,
as.is = TRUE) %>%
filter(adj_P_value < 0.05)
F_GD_genes_fn <- paste0(output_data_dir, "SNage_VSN_PDVsControl_females_max-avg_gd_pivalue_rankings.tsv")
F_GD_genes <- read.table(file = F_GD_genes_fn,
sep = "\t",
header = TRUE,
row.names = 1,
as.is = TRUE) %>%
filter(adj_P_value < 0.05)
rm(M_GS_genes_fn, F_GS_genes_fn, M_GD_genes_fn, F_GD_genes_fn)
gsgd_genes <- c(row.names(M_GS_genes),
row.names(F_GS_genes),
row.names(M_GD_genes),
row.names(F_GD_genes))
rm(M_GS_genes, F_GS_genes, M_GD_genes, F_GD_genes)
# We create once for all the list of all datasets (microarray and RNA-seq).
all_datasets <- c(config$datasets, config$rs_datasets)
# We only run the 3rd integration scheme.
integration <- config$integrations[[3]]
int_criteria <- str_split(integration$criteria, ",")[[1]]
# We do all datasets one by one.
for (i in seq_len(length(all_datasets))) {
# We get the dataset details.
dataset <- all_datasets[[i]]
dataset_name <- dataset$dataset_name
dataset_arraytype <- dataset$array_type
# We only do the dataset if it corresponds to the integration criteria.
satisfy_all <- TRUE
for (c in seq_len(length(int_criteria))) {
int_criterium <- str_split(int_criteria[c], ";")[[1]]
if (dataset[int_criterium[1]] != int_criterium[2]) {
satisfy_all <- FALSE
}
rm(int_criterium)
}
if (satisfy_all != TRUE) {
rm(satisfy_all, dataset, dataset_name, dataset_arraytype)
next
}
rm(c, satisfy_all)
# We read the results of the differnetial analysis in order to get the list of genes that will
# go in the heatmap. This time (and contrary to create_heatmaps.R), we focus on the genes
# found to be differentially expressed in this dataset but still we filter out the genes
# that do not make it in the final GS/GD list.
needed_cols <- c("SYMBOL", paste0("P_", dataset_name))
M_genes_fn <- paste0(output_data_dir, "SNage_VSN_PDVsControl_males_max-avg_integration.tsv")
M_genes <- tryCatch({
read.table(file = M_genes_fn,
sep = "\t",
header = TRUE,
row.names = 1,
as.is = TRUE) %>%
select(one_of(needed_cols)) %>% arrange(across(starts_with("P_"))) %>% head(100) %>% select(SYMBOL)
}, warning = function(warning_condition) {}, error = function(error_condition) {}, finally = {})
rm(M_genes_fn)
F_genes_fn <- paste0(output_data_dir, "SNage_VSN_PDVsControl_females_max-avg_integration.tsv")
F_genes <- tryCatch({
read.table(file = F_genes_fn,
sep = "\t",
header = TRUE,
row.names = 1,
as.is = TRUE) %>%
select(one_of(needed_cols)) %>% arrange(across(starts_with("P_"))) %>% head(100) %>% select(SYMBOL)
}, warning = function(warning_condition) {}, error = function(error_condition) {}, finally = {})
rm(F_genes_fn, needed_cols)
# We filter out the genes that are not in GS/GD
diff_exp_genes <- intersect(unique(c(F_genes$SYMBOL, M_genes$SYMBOL)), gsgd_genes)