Commit 6ede0b4c authored by Leon-Charles Tranchevent's avatar Leon-Charles Tranchevent
Browse files

Modified enrichment procedure to take into account gene sets with very few genes.

parent 57624928
......@@ -204,7 +204,8 @@ for (i in seq_len(length(config$scdatasets))) {
analysis_prefix <- strsplit(ranking_file, ".tsv")[[1]]
ofile_prefix <- paste0("CP_", analysis_prefix, "_")
input_filename <- paste0(output_data_subdir, ranking_file)
message(paste0("[", Sys.time(), "][", analysis_prefix, "] CP analysis started."))
# We read the prepared ranking and keep only the value used to rank.
ranking <- read.delim(input_filename, stringsAsFactors = FALSE) %>%
select(Gene, adj_P_value)
......@@ -212,12 +213,26 @@ for (i in seq_len(length(config$scdatasets))) {
ranking_clean <- ranking
ranking_clean$adj_P_value <- as.numeric(ranking_clean$adj_P_value)
rm(ranking)
message(paste0("[", Sys.time(), "][", analysis_prefix, "] Data loaded (",
dim(ranking_clean)[1], " genes)."))
# We map the gene symbols to the EntrezGene identifiers.
mapped_egenes <- bitr(ranking_clean$Gene,
fromType = "SYMBOL",
toType = c("ENTREZID"),
OrgDb = org.Hs.eg.db) #nolint
mapped_egenes <- tryCatch (
expr = {
bitr(ranking_clean$Gene,
fromType = "SYMBOL",
toType = c("ENTREZID"),
OrgDb = org.Hs.eg.db) #nolint
},
error = function(e) {
message(paste0("[", Sys.time(), "][", analysis_prefix, "] Error in gene identifier mapping:"))
message(e)
}
)
if (is.null(mapped_egenes)) {
rm(analysis_prefix, ofile_prefix, ranking_clean, mapped_egenes)
next
}
ranking_final <- merge(x = ranking_clean,
y = mapped_egenes,
by.x = "Gene",
......@@ -229,12 +244,16 @@ for (i in seq_len(length(config$scdatasets))) {
ranking_final <- ranking_final %>% arrange(adj_P_value)
ranking_final_eg <- ranking_final %>% filter(!is.na(EGene))
rm(mapped_egenes, ranking_clean)
message(paste0("[", Sys.time(), "][", analysis_prefix, "] Data mapped (",
dim(ranking_final_eg)[1], "/", dim(ranking_final)[1], " genes)."))
# We then prepare the gene lists for Fisher-based enrichment. These are just gene lists based on
# the adjusted P values.
selection_sy <- ranking_final %>% filter(adj_P_value < 0.05)
selection_eg <- selection_sy %>% filter(!is.na(EGene))
rm(ranking_final, ranking_final_eg)
message(paste0("[", Sys.time(), "][", analysis_prefix, "] Data filtered (",
dim(selection_eg)[1], "/", dim(selection_sy)[1], " genes)."))
# We loop over the functional sources.
for (k in seq_len(length(config$functional_sources))) {
......@@ -273,6 +292,7 @@ for (i in seq_len(length(config$scdatasets))) {
func_source$cp_name, ") done."))
rm(func_source, fisher_adjP, selection, background)
} # End for each functional source.
message(paste0("[", Sys.time(), "][", analysis_prefix, "] CP analysis done."))
rm(k, selection_sy, selection_eg)
rm(analysis_prefix, ofile_prefix)
} # End for each integration.
......
......@@ -3,7 +3,7 @@
#SBATCH --mail-type=all
#SBATCH --mail-user=leon-charles.tranchevent@uni.lu
#SBATCH -N 1
#SBATCH --time=0-08:00:00
#SBATCH --time=0-09:00:00
#SBATCH --mem=10GB
#SBATCH -p batch
#SBATCH --qos=normal
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment