Commit af9a48ef authored by Leon-Charles Tranchevent's avatar Leon-Charles Tranchevent
Browse files

Updated method to select the best probe per gene (no more checking on the LFC).

parent 0b393707
......@@ -281,8 +281,8 @@ vsn <- config$variance_methods[[j]]
# We rely on the last limma analyses (gender_disease_status) but this is only to get the
# 4 categories of interest.
k <- 5
limma_analysis <- config$limma_analyses[[k]]
##k <- 5
##limma_analysis <- config$limma_analyses[[k]]
# We use the first gene-probe selection method (max-avg).
l <- 1
......@@ -362,6 +362,43 @@ rm(m)
#gene_datasets <- c("ALL", "GSE20159", "GSE26927", "GSE49036", "NBB")
#for (gene in config$genes_to_plot) {#}
# DEBUG
gene <- "RFC5"
##gene_datasets <- c("GSE20159", "GSE20163", "GSE20292", "GSE26927", "GSE49036", "GSE8397", "NBB")
gene_datasets <- c("GSE20163")
plot_gene(gene = gene,
gene_datasets = gene_datasets,
output_data_dir = output_data_dir,
integration_name = integration$name,
limma_analysis = config$limma_analyses[[5]],
selection_name = selection$name,
palette = current_plt,
clin_data_all, exp_data_all, zexp_data_all, m_indx_map, config,
plot_all = FALSE, plot_datasets = TRUE)
plot_gene(gene = gene,
gene_datasets = gene_datasets,
output_data_dir = output_data_dir,
integration_name = integration$name,
limma_analysis = config$limma_analyses[[6]],
selection_name = selection$name,
palette = current_plt,
clin_data_all, exp_data_all, zexp_data_all, m_indx_map, config,
plot_all = FALSE, plot_datasets = TRUE)
plot_gene(gene = gene,
gene_datasets = gene_datasets,
output_data_dir = output_data_dir,
integration_name = integration$name,
limma_analysis = config$limma_analyses[[7]],
selection_name = selection$name,
palette = current_plt,
clin_data_all, exp_data_all, zexp_data_all, m_indx_map, config,
plot_all = FALSE, plot_datasets = TRUE)
# FEMALE ANALYSIS
k <- 5
limma_analysis <- config$limma_analyses[[k]]
......@@ -379,9 +416,10 @@ limma_analysis <- config$limma_analyses[[k]]
# gene_datasets <- c("GSE26927", "GSE49036", "NBB")
# gene <- "SOD2"
# gene_datasets <- c("GSE26927", "GSE49036", "NBB")
gene <- "CA2"
gene_datasets <- c("GSE20163", "GSE20164", "GSE20292", "GSE26927", "GSE49036", "GSE8397", "NBB")
plot_gene(gene = gene,
gene_datasets = gene_datasets,
output_data_dir = output_data_dir,
......@@ -434,8 +472,8 @@ plot_gene(gene = gene,
selection_name = selection$name,
palette = current_plt,
clin_data_all, exp_data_all, zexp_data_all, m_indx_map, config,
#plot_all = FALSE, plot_datasets = TRUE)
plot_all = TRUE, plot_datasets = FALSE)
plot_all = FALSE, plot_datasets = TRUE)
#plot_all = TRUE, plot_datasets = FALSE)
gene_datasets <- c("GSE20292", "GSE8397", "GSE49036")
plot_gene(gene = gene,
gene_datasets = gene_datasets,
......
#!/bin/bash -l
#SBATCH -J geneder:17:subtract
#SBATCH -J geneder:16:rankings
#SBATCH --mail-type=all
#SBATCH --mail-user=leon-charles.tranchevent@uni.lu
#SBATCH -N 1
......
......@@ -42,19 +42,6 @@ message(paste0("[", Sys.time(), "] Configuration done."))
select_best <- function(row, array_indexes) {
to_return <- character(3 * length(array_indexes))
# We compute the median log fold change over all datasets.
# For one dataset, if there are multiple probes, we also take the median.
# This is then used to keep only the probes that have a log FC that corresponds
# to the majority (otherwise we might select a probe that won't be used).
# For microarray data, we used row$genes, now we use row$SYMBOL.
datasets_df_gene <- datasets_data_df %>%
filter(SYMBOL == row$SYMBOL) %>%
select(dataset, logFC) %>%
group_by(dataset) %>%
summarise(medLogFC = median(logFC))
gene_medlfc <- median(datasets_df_gene$medLogFC)
rm(datasets_df_gene)
# We then loop over all array types to identify the most relevant probes.
for (i in seq_len(length(array_indexes))) {
array_index <- array_indexes[i]
......@@ -77,7 +64,7 @@ select_best <- function(row, array_indexes) {
tmp <- datasets_data_df %>%
dplyr::filter(X == probe, array_type == marray_type) %>%
dplyr::select(AveExpr, P.Value, logFC) # nolint
# The probe can be associated with no signal at all if some datasets were used
# to create the mapping but not for the actual integration.
if (dim(tmp)[1] == 0) {
......@@ -86,13 +73,6 @@ select_best <- function(row, array_indexes) {
} else {
all_avgs[j] <- mean(tmp$AveExpr) # nolint
all_pvals[j] <- mean(-log10(tmp$P.Value)) # nolint
# Special case of a probe that does not qualify because its log FC is
# not in the right direction (wrt to the median logFC across all datasets).
if (sign(mean(tmp$logFC)) != sign(gene_medlfc)) {
all_avgs[j] <- 0
all_pvals[j] <- 0
}
}
rm(probe, tmp)
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment