Commit 7d886d78 authored by Leon-Charles Tranchevent's avatar Leon-Charles Tranchevent
Browse files

Updated analysis and PDF generation for the integration step.

parent 3707a2d0
......@@ -111,7 +111,7 @@ for (i in seq_len(length(config$datasets))) {
gene_annots <- gene_annots[order(match(gene_annots$PROBEID, rownames(exp_eset))), ]
# Just in case, for RNA-seq data, we use the following alternative to the above statment.
# This is only placed there for information since it was not used for miicroarray data.
# This is only placed there for information since it was not used for microarray data.
# The current solution is equivalent.
#gene_annots <- merge(data.frame(PROBEID = rownames(exp_eset)),
# gene_annots, by = "PROBEID", all.x = TRUE, sort = FALSE)
......
......@@ -158,7 +158,7 @@ for (i in seq_len(length(config$integrations))) {
use_count_full <- dim(results_core_matrix_full)[1] - nb_nas
names(use_count_full)[1] <- "Total"
use_counts <- data.frame(Dataset = names(use_count_full), Count = use_count_full)
rm(results_core_matrix_full, nb_nas, use_count_full)
rm(results_core_matrix_full, nb_nas, use_count_full, results_csss_lfc)
# Plotting of the use counts (aka the number of P values that were combined via the
# integration).
......@@ -196,7 +196,7 @@ for (i in seq_len(length(config$integrations))) {
pval_comb$name)]] < config$p_val_sign_thres)
# Update the global structures (for comparison).
local_index <- 3 * (l - 1)
local_index <- length(config$p_val_combinations) * (l - 1)
sign_genes_all[[local_index + o]] <- sign_genes
all_local_counts <- c(all_local_counts, dim(sign_genes)[1])
rm(local_index)
......
......@@ -245,7 +245,7 @@ for (i in seq_len(length(config$integrations))) {
# We get the integration name for that scheme.
integration <- config$integrations[[i]]
int_criteria <- str_split(integration$criteria, ";")[[1]]
int_criteria <- str_split(integration$criteria, ",")[[1]]
# We repeat the analysis for all VSN usages.
for (j in seq_len(length(config$variance_methods))) {
......@@ -291,8 +291,17 @@ for (i in seq_len(length(config$integrations))) {
dataset_arraytype <- dataset$array_type
# We only read the dataset if it corresponds to the integration criteria.
if (dataset[int_criteria[1]] != int_criteria[2]) {
rm(dataset, dataset_name, dataset_arraytype)
satisfy_all <- TRUE
for (c in seq_len(length(int_criteria))) {
int_criterium <- str_split(int_criteria[c], ";")[[1]]
if (dataset[int_criterium[1]] != int_criterium[2]) {
satisfy_all <- FALSE
}
rm(int_criterium)
}
rm(c)
if (satisfy_all != TRUE) {
rm(satisfy_all, dataset, dataset_name, dataset_arraytype)
next
}
......
......@@ -106,10 +106,14 @@ do
# Prepare tables for LaTeX.
sed -r 's/\_//g' ${OUTPUT_FOLDER}${integrationName}_${varName}_${analysisName}_max-avg_fc_global_correlations.tsv | sed -r 's/\tNA/\t1/g' > ${OUTPUT_FOLDER}${integrationName}_${varName}_${analysisName}_max-avg_fc_global_correlations_4latex.tsv
sed -r 's/\_//g' ${OUTPUT_FOLDER}${integrationName}_${varName}_${analysisName}_max-avg_pval_global_correlations.tsv | sed -r 's/\tNA/\t1/g' > ${OUTPUT_FOLDER}${integrationName}_${varName}_${analysisName}_max-avg_pval_global_correlations_4latex.tsv
sed -r 's/\_//g' ${OUTPUT_FOLDER}${integrationName}_${varName}_${analysisName}_max-avg_fc_top_global_correlations.tsv | sed -r 's/\tNA/\t1/g' > ${OUTPUT_FOLDER}${integrationName}_${varName}_${analysisName}_max-avg_fc_top_global_correlations_4latex.tsv
sed -r 's/\_//g' ${OUTPUT_FOLDER}${integrationName}_${varName}_${analysisName}_max-avg_pval_top_global_correlations.tsv | sed -r 's/\tNA/\t1/g' > ${OUTPUT_FOLDER}${integrationName}_${varName}_${analysisName}_max-avg_pval_top_global_correlations_4latex.tsv
sed -r 's/\_//g' ${OUTPUT_FOLDER}${integrationName}_${varName}_${analysisName}_best-pval_fc_global_correlations.tsv | sed -r 's/\tNA/\t1/g' > ${OUTPUT_FOLDER}${integrationName}_${varName}_${analysisName}_best-pval_fc_global_correlations_4latex.tsv
sed -r 's/\_//g' ${OUTPUT_FOLDER}${integrationName}_${varName}_${analysisName}_best-pval_pval_global_correlations.tsv | sed -r 's/\tNA/\t1/g' > ${OUTPUT_FOLDER}${integrationName}_${varName}_${analysisName}_best-pval_pval_global_correlations_4latex.tsv
sed -r 's/\_//g' ${OUTPUT_FOLDER}${integrationName}_${varName}_${analysisName}_best-pval_fc_top_global_correlations.tsv | sed -r 's/\tNA/\t1/g' > ${OUTPUT_FOLDER}${integrationName}_${varName}_${analysisName}_best-pval_fc_top_global_correlations_4latex.tsv
sed -r 's/\_//g' ${OUTPUT_FOLDER}${integrationName}_${varName}_${analysisName}_best-pval_pval_top_global_correlations.tsv | sed -r 's/\tNA/\t1/g' > ${OUTPUT_FOLDER}${integrationName}_${varName}_${analysisName}_best-pval_pval_top_global_correlations_4latex.tsv
# AVG - fc
# T1: AVG - fc.
echo '\begin{table}' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' \begin{center}' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' \setlength{\tabcolsep}{0.5\tabcolsep}' >> ${OUTPUT_FOLDER}results_summary.tex
......@@ -129,7 +133,8 @@ do
echo ' and Limma analysis \textit{'"${analysisName}"'}. Pearson correlations are in the upper triangle, Spearman rank' | sed -r 's/_/\\_/g' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' correlations are in the lower triangle. NOTE: Gene probes selected based on the highest average expression (AVG).}' >> ${OUTPUT_FOLDER}results_summary.tex
echo '' >> ${OUTPUT_FOLDER}results_summary.tex
# AVG - pval
# T1: AVG - pval.
echo ' \resizebox{\linewidth}{!}{\pgfplotstabletypeset[' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' col sep=tab,' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' every head row/.style={' >> ${OUTPUT_FOLDER}results_summary.tex
......@@ -149,7 +154,48 @@ do
echo '\end{table}' >> ${OUTPUT_FOLDER}results_summary.tex
echo '' >> ${OUTPUT_FOLDER}results_summary.tex
# PVAL - fc
# T2: AVG - fc (top only).
echo '\begin{table}' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' \begin{center}' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' \setlength{\tabcolsep}{0.5\tabcolsep}' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' \resizebox{\linewidth}{!}{\pgfplotstabletypeset[' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' col sep=tab,' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' every head row/.style={' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' before row={\toprule},' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' after row={\midrule}' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' },' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' every last row/.style={after row=\bottomrule},' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' display columns/0/.style={' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' string type,' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' column type={l}' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' },' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' ]{'"$OUTPUT_FOLDER"''"${integrationName}"'_'"${varName}"'_'"${analysisName}"'_max-avg_fc_top_global_correlations_4latex.tsv}}' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' \caption{Fold change correlations among the datasets for integration scheme \textit{'"${integrationName}"'}, variance configuration \textit{'"${varName}"'}' | sed -r 's/_/\\_/g' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' and Limma analysis \textit{'"${analysisName}"'} and restricted to the genes with the best P values only. Pearson correlations are in the upper triangle, Spearman rank' | sed -r 's/_/\\_/g' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' correlations are in the lower triangle. NOTE: Gene probes selected based on the highest average expression (AVG).}' >> ${OUTPUT_FOLDER}results_summary.tex
echo '' >> ${OUTPUT_FOLDER}results_summary.tex
# T2: AVG - pval (top only).
echo ' \resizebox{\linewidth}{!}{\pgfplotstabletypeset[' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' col sep=tab,' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' every head row/.style={' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' before row={\toprule},' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' after row={\midrule}' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' },' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' every last row/.style={after row=\bottomrule},' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' display columns/0/.style={' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' string type,' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' column type={l}' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' },' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' ]{'"$OUTPUT_FOLDER"''"${integrationName}"'_'"${varName}"'_'"${analysisName}"'_max-avg_pval_top_global_correlations_4latex.tsv}}' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' \caption{P value correlations among the datasets for integration scheme \textit{'"${integrationName}"'}, variance configuration \textit{'"${varName}"'} ' | sed -r 's/_/\\_/g' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' and Limma analysis \textit{'"${analysisName}"'} and restricted to the genes with the best P values only. P values are transformed using -log10(). Pearson correlations are in the upper triangle, Spearman rank' | sed -r 's/_/\\_/g' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' correlations are in the lower triangle. NOTE: Gene probes selected based on the highest average expression (AVG).}' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' \end{center}' >> ${OUTPUT_FOLDER}results_summary.tex
echo '\end{table}' >> ${OUTPUT_FOLDER}results_summary.tex
echo '' >> ${OUTPUT_FOLDER}results_summary.tex
# T3: PVAL - fc.
echo '\begin{table}' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' \begin{center}' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' \setlength{\tabcolsep}{0.5\tabcolsep}' >> ${OUTPUT_FOLDER}results_summary.tex
......@@ -169,7 +215,8 @@ do
echo ' and Limma analysis \textit{'"${analysisName}"'}. Pearson correlations are in the upper triangle, Spearman rank' | sed -r 's/_/\\_/g' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' correlations are in the lower triangle. NOTE: Gene probes selected based on the best P value (PVAL).}' >> ${OUTPUT_FOLDER}results_summary.tex
echo '' >> ${OUTPUT_FOLDER}results_summary.tex
# PVAL - pval
# T3: PVAL - pval.
echo ' \resizebox{\linewidth}{!}{\pgfplotstabletypeset[' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' col sep=tab,' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' every head row/.style={' >> ${OUTPUT_FOLDER}results_summary.tex
......@@ -189,6 +236,47 @@ do
echo '\end{table}' >> ${OUTPUT_FOLDER}results_summary.tex
echo '' >> ${OUTPUT_FOLDER}results_summary.tex
# T4: PVAL - fc (top only).
echo '\begin{table}' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' \begin{center}' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' \setlength{\tabcolsep}{0.5\tabcolsep}' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' \resizebox{\linewidth}{!}{\pgfplotstabletypeset[' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' col sep=tab,' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' every head row/.style={' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' before row={\toprule},' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' after row={\midrule}' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' },' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' every last row/.style={after row=\bottomrule},' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' display columns/0/.style={' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' string type,' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' column type={l}' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' },' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' ]{'"$OUTPUT_FOLDER"''"${integrationName}"'_'"${varName}"'_'"${analysisName}"'_best-pval_fc_top_global_correlations_4latex.tsv}}' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' \caption{Fold change correlations among the datasets for integration scheme \textit{'"${integrationName}"'}, variance configuration \textit{'"${varName}"'}' | sed -r 's/_/\\_/g' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' and Limma analysis \textit{'"${analysisName}"'} and restricted to the genes with the best P values only. Pearson correlations are in the upper triangle, Spearman rank' | sed -r 's/_/\\_/g' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' correlations are in the lower triangle. NOTE: Gene probes selected based on the best P value (PVAL).}' >> ${OUTPUT_FOLDER}results_summary.tex
echo '' >> ${OUTPUT_FOLDER}results_summary.tex
# T4: PVAL - pval (top only).
echo ' \resizebox{\linewidth}{!}{\pgfplotstabletypeset[' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' col sep=tab,' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' every head row/.style={' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' before row={\toprule},' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' after row={\midrule}' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' },' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' every last row/.style={after row=\bottomrule},' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' display columns/0/.style={' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' string type,' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' column type={l}' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' },' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' ]{'"$OUTPUT_FOLDER"''"${integrationName}"'_'"${varName}"'_'"${analysisName}"'_best-pval_pval_top_global_correlations_4latex.tsv}}' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' \caption{P value correlations among the datasets for integration scheme \textit{'"${integrationName}"'}, variance configuration \textit{'"${varName}"'}' | sed -r 's/_/\\_/g' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' and Limma analysis \textit{'"${analysisName}"'} and restricted to the genes with the best P values only. P values are transformed using -log10(). Pearson correlations are in the upper triangle, Spearman rank' | sed -r 's/_/\\_/g' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' correlations are in the lower triangle. NOTE: Gene probes selected based on the best P value (PVAL).}' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' \end{center}' >> ${OUTPUT_FOLDER}results_summary.tex
echo '\end{table}' >> ${OUTPUT_FOLDER}results_summary.tex
echo '' >> ${OUTPUT_FOLDER}results_summary.tex
# Comparing AVG and PVAL.
echo '\begin{figure}[ht]' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' \centering' >> ${OUTPUT_FOLDER}results_summary.tex
......@@ -279,8 +367,27 @@ echo '\clearpage' >> ${OUTPUT_FOLDER}results_summary.tex
echo '' >> ${OUTPUT_FOLDER}results_summary.tex
# Prepare tables for LaTeX.
grep -v '^DA' ${OUTPUT_FOLDER}all_counts.tsv > ${OUTPUT_FOLDER}all_counts_SN_4latex.tsv
grep -v '^SN' ${OUTPUT_FOLDER}all_counts.tsv > ${OUTPUT_FOLDER}all_counts_DA_4latex.tsv
grep -v '^SN-' ${OUTPUT_FOLDER}all_counts.tsv | grep -v '^DA-' > ${OUTPUT_FOLDER}all_counts_SNage_4latex.tsv
grep -v '^SNage-' ${OUTPUT_FOLDER}all_counts.tsv | grep -v '^DA-' > ${OUTPUT_FOLDER}all_counts_SN_4latex.tsv
grep -v '^SN-' ${OUTPUT_FOLDER}all_counts.tsv | grep -v '^SNage-' > ${OUTPUT_FOLDER}all_counts_DA_4latex.tsv
# All count table for SNage.
echo '\begin{table}' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' \begin{center}' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' \setlength{\tabcolsep}{0.5\tabcolsep}' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' \resizebox{\linewidth}{!}{\pgfplotstabletypeset[' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' col sep=tab,' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' every head row/.style={' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' before row={\toprule},' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' after row={\midrule}' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' },' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' every last row/.style={after row=\bottomrule},' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' display columns/0/.style={string type,column type={l}},' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' ]{'"$OUTPUT_FOLDER"'all_counts_SNage_4latex.tsv}}' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' \caption{Numbers of significant genes for each configuration (tissue = SN, age = TRUE).}' | sed -r 's/_/\\_/g' >> ${OUTPUT_FOLDER}results_summary.tex
echo ' \end{center}' >> ${OUTPUT_FOLDER}results_summary.tex
echo '\end{table}' >> ${OUTPUT_FOLDER}results_summary.tex
echo '' >> ${OUTPUT_FOLDER}results_summary.tex
# All count table for SN.
echo '\begin{table}' >> ${OUTPUT_FOLDER}results_summary.tex
......
......@@ -174,19 +174,19 @@ for (i in seq_len(length(config$integrations))) {
dataset_nb_samples <- sqrt(nb_samples_analysis_min / sum(nb_samples_analysis_min))
# We collect the dataset qualities.
gemma_qc_allscores <- get_dataset_properties(config,
"gemma_qc_score",
"dataset_name")[datasets]
gemma_qc_scores <- as.numeric(gemma_qc_allscores)
names(gemma_qc_scores) <- names(gemma_qc_allscores)
gemma_qc_scores[is.na(gemma_qc_scores)] <- median(gemma_qc_scores, na.rm = TRUE)
quality_allscores <- get_dataset_properties(config,
"quality_score",
"dataset_name")[datasets]
quality_scores <- as.numeric(quality_allscores)
names(quality_scores) <- names(quality_allscores)
quality_scores[is.na(quality_scores)] <- median(quality_scores, na.rm = TRUE)
# We compute the final dataset weights.
g <- config$gamma
dataset_weights <- (g + (1 - g) * gemma_qc_scores) * dataset_nb_samples
dataset_weights <- (g + (1 - g) * quality_scores) * dataset_nb_samples
rm(nb_samples_analysis, relevant_categories, dataset_nb_samples)
rm(g, gemma_qc_scores, gemma_qc_allscores)
rm(g, quality_scores, quality_allscores)
# We derive a consensus from the log fold changes and computes associated metrics
# like the weigted average of consensus and conflicting values.
relevant_fc_data <- apply(foldchanges_values, 1, identify_consensus,
......@@ -262,7 +262,7 @@ for (i in seq_len(length(config$integrations))) {
# Third, we then try to restrict the correlation analysis to the top P values only.
vplot_ds_fc <- vplot_ds_fc %>% mutate(id = row.names(vplot_ds_fc))
vplot_ds_pv <- vplot_ds_pv %>% mutate(id = row.names(vplot_ds_pv))
vplot_ds_pv_top <- vplot_ds_pv %>% mutate(best = pmin(v1, v2)) %>% filter(best < 0.05)
vplot_ds_pv_top <- vplot_ds_pv %>% mutate(best = pmin(v1, v2)) %>% filter(best < config$p_val_sign_thres)
vplot_ds_fc_top <- merge(x = vplot_ds_fc, y = vplot_ds_pv_top, by = "id")
# P value correlation restricted (we use -log10 for P values).
......
......@@ -207,6 +207,8 @@ compute_delta <- function(FM, has_ctrl = TRUE) {
# combine them in such a way that the genes that are differentially expressed in both males and
# females should be ranked lower in the gender specific rankings than in the original rankings.
#TODO: include a for loop over the integrations here and until the end.
# Inputs. The results of the male and female comparisons (from the integration).
B_fn <- paste0(input_data_dir, "SN_VSN_PDVsControl_max-avg_integration.tsv")
G_fn <- paste0(input_data_dir, "SN_VSN_Gender_disease_status_max-avg_integration.tsv")
......@@ -295,8 +297,8 @@ rm(FM, MF, G, F, M, B)
# We save the data.
B_ofile <- paste0(output_data_dir, "PDvsControl_rankings.tsv")
G_ofile <- paste0(output_data_dir, "Gender_disease_status_rankings.tsv")
F_ofile <- paste0(output_data_dir, "Female_rankings.tsv")
M_ofile <- paste0(output_data_dir, "Male_rankings.tsv")
F_ofile <- paste0(output_data_dir, "Female_rankings.tsv")
M_ofile <- paste0(output_data_dir, "Male_rankings.tsv")
FM_ofile <- paste0(output_data_dir, "Female_specific_rankings.tsv")
MF_ofile <- paste0(output_data_dir, "Male_specific_rankings.tsv")
write.table(B_enriched, file = B_ofile, sep = "\t", quote = FALSE, col.names = NA)
......
......@@ -9,7 +9,7 @@ datasets:
cleaning: 'FALSE'
has_age: 'TRUE'
tissue: DA
gemma_qc_score: 0.8
quality_score: 1.0 # Was 0.8
-
dataset_name: GSE20163
platform: Affymetrix
......@@ -20,7 +20,7 @@ datasets:
cleaning: 'FALSE'
has_age: 'TRUE'
tissue: SN
gemma_qc_score: 0.8
quality_score: 1.0 # Was 0.8
-
dataset_name: GSE20164
platform: Affymetrix
......@@ -31,7 +31,7 @@ datasets:
cleaning: 'FALSE'
has_age: 'TRUE'
tissue: SN
gemma_qc_score: 0.5
quality_score: 1.0 # Was 0.5
-
dataset_name: GSE20292
platform: Affymetrix
......@@ -42,7 +42,7 @@ datasets:
cleaning: 'FALSE'
has_age: 'TRUE'
tissue: SN
gemma_qc_score: 1
quality_score: 1.0 # Was 1.0
-
dataset_name: GSE7307
platform: Affymetrix
......@@ -53,7 +53,7 @@ datasets:
cleaning: 'FALSE'
has_age: 'FALSE'
tissue: SN
gemma_qc_score: 0.1
quality_score: 1.0 # Was 0.1
-
dataset_name: GSE7621
platform: Affymetrix
......@@ -64,7 +64,7 @@ datasets:
cleaning: 'FALSE'
has_age: 'FALSE'
tissue: SN
gemma_qc_score: 0.7
quality_score: 1.0 # Was 0.7
-
dataset_name: GSE8397
platform: Affymetrix
......@@ -75,7 +75,7 @@ datasets:
cleaning: 'FALSE'
has_age: 'TRUE'
tissue: SN
gemma_qc_score: 1
quality_score: 1.0 # Was 1.0
-
dataset_name: Simunovic
platform: Affymetrix
......@@ -86,7 +86,7 @@ datasets:
cleaning: 'FALSE'
has_age: 'TRUE'
tissue: DA
gemma_qc_score: NA
quality_score: 1.0 # Was NA
-
dataset_name: GSE24378
platform: Affymetrix
......@@ -97,7 +97,7 @@ datasets:
cleaning: 'FALSE'
has_age: 'TRUE'
tissue: DA
gemma_qc_score: 0.9
quality_score: 1.0 # Was 0.9
-
dataset_name: GSE20333
platform: Affymetrix
......@@ -108,7 +108,7 @@ datasets:
cleaning: 'FALSE'
has_age: 'TRUE'
tissue: SN
gemma_qc_score: 0.7
quality_score: 1.0 # Was 0.7
-
dataset_name: Moreira
platform: Agilent
......@@ -119,7 +119,7 @@ datasets:
cleaning: 'FALSE'
has_age: 'TRUE'
tissue: SN
gemma_qc_score: 0.2
quality_score: 1.0 # Was 0.2
-
dataset_name: GSE20159
platform: Illumina
......@@ -130,7 +130,7 @@ datasets:
cleaning: 'FALSE'
has_age: 'TRUE'
tissue: SN
gemma_qc_score: 1
quality_score: 1.0 # Was 1.0
-
dataset_name: GSE26927
platform: Illumina
......@@ -141,7 +141,7 @@ datasets:
cleaning: 'TRUE'
has_age: 'TRUE'
tissue: SN
gemma_qc_score: 0.2
quality_score: 1.0 # Was 0.2
-
dataset_name: GSE49036
platform: Affymetrix
......@@ -152,7 +152,7 @@ datasets:
cleaning: 'FALSE'
has_age: 'TRUE'
tissue: SN
gemma_qc_score: 0.6
quality_score: 1.0 # Was 0.6
rs_datasets:
-
dataset_name: NBB
......@@ -164,4 +164,4 @@ rs_datasets:
cleaning: 'FALSE'
has_age: 'TRUE'
tissue: SN
gemma_qc_score: NA
quality_score: 1.0 # Was NA
......@@ -19,7 +19,7 @@ collapser <- function(df) {
paste(collapse = "|")
}
#' @title Reduces a set to a given size (only to make ti smaller).
#' @title Reduces a set to a given size (only to make it smaller).
#'
#' @description This function accepts a set of numeric values and a given desired size.
#' It then creates a new set of the desired size that can be considered as a reduction
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment