Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
BDS
GeneDER
GeneDER_core
Commits
c9b5b26b
Commit
c9b5b26b
authored
Sep 23, 2020
by
Leon-Charles Tranchevent
Browse files
Improved DoRothEA enrichment procedure (enriched outputs and figure generation).
parent
62ce0a0b
Changes
1
Hide whitespace changes
Inline
Side-by-side
18-RegulatoryNetwork_all/run_dorothea_enrichment.R
View file @
c9b5b26b
...
...
@@ -44,7 +44,8 @@ duplicate_row_first_cell <- function(row, sep = "|") {
keys
<-
strsplit
(
row
[
1
],
sep
,
fixed
=
TRUE
)[[
1
]]
val_1
<-
rep
(
row
[
2
],
length
(
keys
))
val_2
<-
rep
(
row
[
3
],
length
(
keys
))
to_ret
<-
cbind
(
Gene
=
keys
,
LFC
=
val_1
,
ranking_value
=
val_2
)
val_3
<-
rep
(
row
[
4
],
length
(
keys
))
to_ret
<-
cbind
(
Gene
=
keys
,
LFC
=
val_1
,
P_value
=
val_2
,
adj_P_value
=
val_3
)
return
(
to_ret
)
}
...
...
@@ -75,6 +76,36 @@ for (ii in seq_len(dim(update_info)[1])) {
names
(
viper_regulon
)
<-
paste
(
vp_names
,
vp_fullnames
[
2
,
],
sep
=
"_"
)
rm
(
update_info
,
vp_fullnames
,
vp_names
,
ii
)
# We compute the number of percentages of activations / inhibitions
# from the pre-defined regulons as to better understand the actual meaning
# of the enrichment scores (NES).
# An inhibitor whose targets are depleted is associated with a positive score.
# An activator whose targets are depleted is associated with a negative score.
# An inhibitor whose targets are enriched is associated with a negaive score.
# An activator whose targets are enriched is associated with a positive score.
all_npos
<-
array
(
dim
=
length
(
names
(
viper_regulon
)))
all_nneg
<-
array
(
dim
=
length
(
names
(
viper_regulon
)))
all_ppos
<-
array
(
dim
=
length
(
names
(
viper_regulon
)))
all_pneg
<-
array
(
dim
=
length
(
names
(
viper_regulon
)))
i
<-
1
for
(
gene
in
names
(
viper_regulon
))
{
data
<-
viper_regulon
[[
gene
]]
$
tfmode
n
<-
length
(
data
)
all_npos
[
i
]
<-
length
(
data
[
data
==
1
])
all_nneg
[
i
]
<-
length
(
data
[
data
==
-1
])
all_ppos
[
i
]
<-
100
*
all_npos
[
i
]
/
n
all_pneg
[
i
]
<-
100
*
all_nneg
[
i
]
/
n
i
<-
i
+
1
rm
(
data
,
n
)
}
rm
(
gene
,
i
)
viper_regulon_infos
<-
data.frame
(
nb_pos
=
all_npos
,
nb_neg
=
all_nneg
,
pc_pos
=
all_ppos
,
pc_neg
=
all_pneg
)
rownames
(
viper_regulon_infos
)
<-
names
(
viper_regulon
)
rm
(
all_npos
,
all_nneg
,
all_ppos
,
all_pneg
)
# We do all integration schemes one by one.
for
(
i
in
seq_len
(
length
(
config
$
integrations
)))
{
...
...
@@ -83,6 +114,7 @@ for (i in seq_len(length(config$integrations))) {
# We only do the enrichment if it is necessary.
if
(
integration
$
use_for_network
==
"FALSE"
)
{
rm
(
integration
)
next
}
...
...
@@ -94,16 +126,17 @@ for (i in seq_len(length(config$integrations))) {
# We only do the enrichment if it is necessary.
if
(
limma_parameters
$
use_for_network
==
"FALSE"
)
{
rm
(
limma_parameters
)
next
}
# We define the I/Os.
analysis_prefix
<-
paste0
(
integration
$
name
,
"_"
,
limma_parameters
$
name
)
ranking_file
<-
paste0
(
input_data_dir
,
analysis_prefix
,
"_rankings.tsv"
)
analysis_prefix
<-
paste0
(
integration
$
name
,
"_
VSN_
"
,
limma_parameters
$
name
,
"_max-avg"
)
ranking_file
<-
paste0
(
input_data_dir
,
analysis_prefix
,
"_
all_pivalue_
rankings.tsv"
)
# We read the prepared ranking and keep only the value used to rank.
ranking
<-
read.delim
(
ranking_file
,
stringsAsFactors
=
FALSE
,
row.names
=
1
)
%>%
select
(
Gene
,
ref_logFC
,
ranking
_value
)
ranking
<-
read.delim
(
ranking_file
,
stringsAsFactors
=
FALSE
)
%>%
select
(
Gene
,
log_fold_change
,
P_value
,
adj_P
_value
)
rm
(
ranking_file
)
# We clean genes (no duplicate entries with pipe separated gene symbols).
...
...
@@ -112,28 +145,28 @@ for (i in seq_len(length(config$integrations))) {
ranking_clean
<-
data.frame
(
do.call
(
rbind
,
c
(
tmp_res
)),
row.names
=
NULL
,
stringsAsFactors
=
FALSE
)
ranking_clean
$
ranking_value
<-
as.numeric
(
ranking_clean
$
ranking_value
)
ranking_clean
$
LFC
<-
as.numeric
(
ranking_clean
$
LFC
)
ranking_clean
<-
ranking_clean
%>%
arrange
(
desc
(
ranking_value
))
ranking_clean
$
LFC
<-
as.numeric
(
ranking_clean
$
LFC
)
ranking_clean
$
P_value
<-
as.numeric
(
ranking_clean
$
P_value
)
ranking_clean
$
adj_P_value
<-
as.numeric
(
ranking_clean
$
adj_P_value
)
ranking_clean
<-
ranking_clean
%>%
arrange
(
adj_P_value
)
rm
(
tmp_res
)
}
else
{
ranking_clean
<-
ranking
ranking_clean
$
ranking_value
<-
as.numeric
(
ranking_clean
$
ranking_value
)
ranking_clean
$
LFC
<-
as.numeric
(
ranking_clean
$
ref_logFC
)
ranking_clean
<-
ranking_clean
%>%
select
(
Gene
,
LFC
,
ranking_value
)
%>%
arrange
(
desc
(
ranking_value
))
ranking_clean
$
LFC
<-
as.numeric
(
ranking_clean
$
ref_logFC
)
ranking_clean
$
P_value
<-
as.numeric
(
ranking_clean
$
P_value
)
ranking_clean
$
adj_P_value
<-
as.numeric
(
ranking_clean
$
adj_P_value
)
ranking_clean
<-
ranking_clean
%>%
arrange
(
adj_P_value
)
}
rm
(
ranking
)
# Estimate Z-score values for the GES (As in DoRothEA manual, cheeck VIPER manual for details).
# In our case, we compute the pseudo P values based on the pi values with 10 ^ -pi.
# In addition, we correct through the median logFC (since pi = -log(p) * logFC).
# Estimate Z-score values for the GES (As in DoRothEA manual, check VIPER manual for details).
myStatistics
<-
matrix
(
ranking_clean
$
LFC
,
dimnames
=
list
(
ranking_clean
$
Gene
,
"logFC"
)
)
my
p
Pvalue
<-
matrix
(
10
^
(
-
ranking_clean
$
ranking_value
*
median
(
abs
(
ranking_clean
$
LFC
)))
,
dimnames
=
list
(
ranking_clean
$
Gene
,
"
p
P.Value"
)
)
mySignature
<-
(
qnorm
(
my
p
Pvalue
/
2
,
lower.tail
=
FALSE
)
*
sign
(
myStatistics
))[,
1
]
dimnames
=
list
(
ranking_clean
$
Gene
,
"logFC"
))
myPvalue
<-
matrix
(
ranking_clean
$
adj_P_value
,
dimnames
=
list
(
ranking_clean
$
Gene
,
"P.Value"
))
mySignature
<-
(
qnorm
(
myPvalue
/
2
,
lower.tail
=
FALSE
)
*
sign
(
myStatistics
))[,
1
]
mySignature
<-
mySignature
[
order
(
mySignature
,
decreasing
=
T
)]
rm
(
ranking_clean
,
my
p
Pvalue
,
myStatistics
)
rm
(
ranking_clean
,
myPvalue
,
myStatistics
)
# Estimate TF activities via Viper.
mrs
<-
msviper
(
ges
=
mySignature
,
regulon
=
viper_regulon
,
minsize
=
4
,
ges.filter
=
F
)
...
...
@@ -146,20 +179,34 @@ for (i in seq_len(length(config$integrations))) {
Size
=
mrs
$
es
$
size
[
names
(
mrs
$
es
$
nes
)
],
NES
=
mrs
$
es
$
nes
,
p.value
=
mrs
$
es
$
p.value
)
TF_activities
<-
merge
(
viper_regulon_infos
,
TF_activities
,
by
=
0
,
all.y
=
TRUE
)
TF_activities
<-
TF_activities
[
order
(
TF_activities
$
p.value
),
]
%>%
filter
(
evdc_lvl
%in%
c
(
"A"
,
"B"
,
"C"
,
"D"
))
%>%
mutate
(
FDR
=
p.adjust
(
p.value
,
method
=
"fdr"
))
rm
(
mrs
,
mySignature
,
tf_name
,
evidence_level
)
rownames
(
TF_activities
)
<-
TF_activities
$
Row.names
TF_activities
$
Row.names
<-
NULL
rm
(
mrs
,
tf_name
,
evidence_level
)
# Save results
TF_act_fn
<-
paste0
(
output_data_dir
,
analysis_prefix
,
"_TFactivities.tsv"
)
write.table
(
TF_activities
,
file
=
TF_act_fn
,
sep
=
"\t"
,
quote
=
FALSE
,
row.names
=
FALSE
)
rm
(
TF_act_fn
,
TF_activities
,
analysis_prefix
,
limma_parameters
)
# We create pseudo-GSEA images to check the enrichment visually as well.
for
(
gene
in
names
(
viper_regulon
))
{
targets
<-
names
(
viper_regulon
[[
gene
]]
$
tfmode
)
targets_idx
<-
match
(
targets
,
names
(
mySignature
))
png
(
paste0
(
output_data_dir
,
analysis_prefix
,
"_"
,
gene
,
".png"
))
plot
(
mySignature
,
main
=
paste
(
gene
,
"="
,
round
(
TF_activities
[
gene
,
]
$
NES
,
2
)))
abline
(
v
=
targets_idx
,
col
=
"red"
)
points
(
targets_idx
,
mySignature
[
targets
],
col
=
"red"
,
bg
=
"red"
,
pch
=
25
)
dev.off
()
rm
(
targets
,
targets_idx
)
}
rm
(
gene
,
TF_act_fn
,
TF_activities
,
analysis_prefix
,
limma_parameters
,
mySignature
)
}
# End for each limma comparison.
rm
(
j
,
integration
)
}
# End for each integration.
rm
(
i
,
viper_regulon
)
rm
(
i
,
viper_regulon
,
viper_regulon_infos
)
# We log the session details for reproducibility.
rm
(
config
,
output_data_dir
,
input_data_dir
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment