Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
BDS
GeneDER
GeneDER_core
Commits
faa593c1
Commit
faa593c1
authored
May 12, 2020
by
Leon-Charles Tranchevent
Browse files
New method to compute the gene specificity (use raw results of the integration as control).
parent
1aae2498
Changes
2
Hide whitespace changes
Inline
Side-by-side
17-Enrichment_all/subtract_rankings.R
View file @
faa593c1
...
...
@@ -35,7 +35,7 @@ message(paste0("[", Sys.time(), "] Configuration done."))
#' no delta values are returned.
#' @return An enriched data-frame that contains additional columns, in particular one that
#' contains the delta values, representing the specificity to the "ref" comparison (unless
#' has_ctrl was s
r
t to FALSE).
#' has_ctrl was s
e
t to FALSE).
compute_delta
<-
function
(
FM
,
has_ctrl
=
TRUE
)
{
# First, we compute the Pi values for both comparisons ("ref" and "control"). For this, we use
...
...
@@ -217,25 +217,40 @@ for (i in seq_len(length(config$integrations))) {
# We otherwise use default parameters SVN = TRUE, probe selection = max-avg
# integration = Marot-Mayer.
# We use four of the seven limma comparisons.
# Note: we use the filtered results of the integration ("integration") as well
# as the non filtered results ("integration_raw") but only as a control. The idea
# is that to identify say female specific genes, we focus on the female DEGs
# (filtered results) since we want the genes to be at least DEG in females but
# we control for the behaviour in males using the raw results since it does not
# matter if the gene is DEG for males (actually it is even better if it is not
# and the raw results contain more of these non expressed or non differentially
# expressed genes).
limmas
<-
config
$
limma_analyses
B_fn
<-
paste0
(
input_data_dir
,
integration
$
name
,
"_VSN_"
,
limmas
[[
2
]]
$
name
,
"_max-avg_integration.tsv"
)
F_fn
<-
paste0
(
input_data_dir
,
integration
$
name
,
"_VSN_"
,
limmas
[[
5
]]
$
name
,
"_max-avg_integration.tsv"
)
M_fn
<-
paste0
(
input_data_dir
,
integration
$
name
,
"_VSN_"
,
limmas
[[
6
]]
$
name
,
"_max-avg_integration.tsv"
)
G_fn
<-
paste0
(
input_data_dir
,
integration
$
name
,
"_VSN_"
,
limmas
[[
7
]]
$
name
,
"_max-avg_integration.tsv"
)
B
<-
read.delim
(
B_fn
,
stringsAsFactors
=
FALSE
,
row.names
=
1
)
F
<-
read.delim
(
F_fn
,
stringsAsFactors
=
FALSE
,
row.names
=
1
)
M
<-
read.delim
(
M_fn
,
stringsAsFactors
=
FALSE
,
row.names
=
1
)
G
<-
read.delim
(
G_fn
,
stringsAsFactors
=
FALSE
,
row.names
=
1
)
rm
(
B_fn
,
F_fn
,
M_fn
,
G_fn
)
B_fn
<-
paste0
(
input_data_dir
,
integration
$
name
,
"_VSN_"
,
limmas
[[
2
]]
$
name
,
"_max-avg_integration.tsv"
)
F_fn
<-
paste0
(
input_data_dir
,
integration
$
name
,
"_VSN_"
,
limmas
[[
5
]]
$
name
,
"_max-avg_integration.tsv"
)
Fr_fn
<-
paste0
(
input_data_dir
,
integration
$
name
,
"_VSN_"
,
limmas
[[
5
]]
$
name
,
"_max-avg_integration_raw.tsv"
)
M_fn
<-
paste0
(
input_data_dir
,
integration
$
name
,
"_VSN_"
,
limmas
[[
6
]]
$
name
,
"_max-avg_integration.tsv"
)
Mr_fn
<-
paste0
(
input_data_dir
,
integration
$
name
,
"_VSN_"
,
limmas
[[
6
]]
$
name
,
"_max-avg_integration_raw.tsv"
)
G_fn
<-
paste0
(
input_data_dir
,
integration
$
name
,
"_VSN_"
,
limmas
[[
7
]]
$
name
,
"_max-avg_integration.tsv"
)
B
<-
read.delim
(
B_fn
,
stringsAsFactors
=
FALSE
,
row.names
=
1
)
F
<-
read.delim
(
F_fn
,
stringsAsFactors
=
FALSE
,
row.names
=
1
)
Fr
<-
read.delim
(
Fr_fn
,
stringsAsFactors
=
FALSE
,
row.names
=
1
)
M
<-
read.delim
(
M_fn
,
stringsAsFactors
=
FALSE
,
row.names
=
1
)
Mr
<-
read.delim
(
Mr_fn
,
stringsAsFactors
=
FALSE
,
row.names
=
1
)
G
<-
read.delim
(
G_fn
,
stringsAsFactors
=
FALSE
,
row.names
=
1
)
rm
(
B_fn
,
F_fn
,
Fr_fn
,
M_fn
,
Mr_fn
,
G_fn
)
# We start by merging the male and female rankings. Most genes are present in both and the few
# that are not are removed (since we can not say whether they are gender specific or not).
FM
<-
merge
(
x
=
F
,
y
=
M
,
by
=
"SYMBOL"
,
all
=
FALSE
)
MF
<-
merge
(
x
=
M
,
y
=
F
,
by
=
"SYMBOL"
,
all
=
FALSE
)
FM
<-
merge
(
x
=
F
,
y
=
Mr
,
by
=
"SYMBOL"
,
all
=
FALSE
)
MF
<-
merge
(
x
=
M
,
y
=
Fr
,
by
=
"SYMBOL"
,
all
=
FALSE
)
rm
(
Fr
,
Mr
)
# We select the fields we need:
# Gene:
...
...
@@ -364,7 +379,7 @@ for (i in seq_len(length(config$integrations))) {
rm
(
FM_deltaplot_ofile
,
MF_deltaplot_ofile
)
# Another way to look at it.
# The gender DEGs and the gender specific DEGs are different if the correction
s
is effective.
# The gender DEGs and the gender specific DEGs are different if the correction is effective.
# Thus the mean/median change (correction effect) in ranks can be computed (it should not
# be null).
FM_rank_diff
<-
FM_enriched
$
ref_pivalue_rank
-
rank
(
FM_enriched
$
Delta
)
...
...
Confs/project_config.yml
View file @
faa593c1
...
...
@@ -32,6 +32,7 @@ limma_analyses:
clinical_factor
:
"
Gender"
use_for_enrichment
:
"
FALSE"
can_be_specific
:
"
FALSE"
use_for_network
:
"
FALSE"
-
factor
:
Disease.status
coefficient
:
"
PD
-
Control"
...
...
@@ -39,6 +40,7 @@ limma_analyses:
clinical_factor
:
"
Disease_status"
use_for_enrichment
:
"
FALSE"
can_be_specific
:
"
FALSE"
use_for_network
:
"
TRUE"
-
factor
:
gender_disease_status
coefficient
:
"
F.PD
-
M.PD"
...
...
@@ -46,6 +48,7 @@ limma_analyses:
clinical_factor
:
"
Gender_PD"
use_for_enrichment
:
"
FALSE"
can_be_specific
:
"
FALSE"
use_for_network
:
"
FALSE"
-
factor
:
gender_disease_status
coefficient
:
"
F.Control
-
M.Control"
...
...
@@ -53,6 +56,7 @@ limma_analyses:
clinical_factor
:
"
Gender_Control"
use_for_enrichment
:
"
FALSE"
can_be_specific
:
"
FALSE"
use_for_network
:
"
FALSE"
-
factor
:
gender_disease_status
coefficient
:
"
F.PD
-
F.Control"
...
...
@@ -60,6 +64,7 @@ limma_analyses:
clinical_factor
:
"
Disease_status_females"
use_for_enrichment
:
"
TRUE"
can_be_specific
:
"
TRUE"
use_for_network
:
"
TRUE"
-
factor
:
gender_disease_status
coefficient
:
"
M.PD
-
M.Control"
...
...
@@ -67,6 +72,7 @@ limma_analyses:
clinical_factor
:
"
Disease_status_males"
use_for_enrichment
:
"
TRUE"
can_be_specific
:
"
TRUE"
use_for_network
:
"
TRUE"
-
factor
:
gender_disease_status
coefficient
:
"
(F.PD
-
F.Control)
-
(M.PD
-
M.Control)"
...
...
@@ -74,6 +80,7 @@ limma_analyses:
clinical_factor
:
"
Gender_disease_status"
use_for_enrichment
:
"
TRUE"
can_be_specific
:
"
FALSE"
use_for_network
:
"
TRUE"
# Integration schemes
nb_min_pval
:
2
perc_min_pval
:
0.33334
...
...
@@ -82,13 +89,15 @@ integrations:
-
name
:
SN
criteria
:
tissue;SN
use_for_network
:
"
FALSE"
-
name
:
DA
criteria
:
tissue;DA
use_for_network
:
"
FALSE"
-
name
:
SNage
criteria
:
tissue;SN,has_age;TRUE
use_for_network
:
"
TRUE"
# Probe selection methods
selections
:
-
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment