Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
BDS
GeneDER
GeneDER_core
Commits
fbfefcbd
Commit
fbfefcbd
authored
Mar 08, 2021
by
LeonCharles Tranchevent
Browse files
Cosmetic changes to the integration step.
parent
3e4e2db5
Changes
2
Hide whitespace changes
Inline
Sidebyside
06Data_integration/compute_dataset_overlap.R
View file @
fbfefcbd
...
...
@@ 47,7 +47,7 @@ compute_overlap < function(row) {
m_ds
<
unlist
(
strsplit
(
row
[
"male_used_datasets"
],
"\\"
))
overlap
<
intersect
(
f_ds
,
m_ds
)
local_min
<
min
(
length
(
f_ds
),
length
(
m_ds
))
over_perc
<
length
(
overlap
)
/
local_min
over_perc
<
length
(
overlap
)
/
local_min
over_str
<
paste
(
overlap
,
collapse
=
""
)
return
(
c
(
over_str
,
length
(
f_ds
),
length
(
m_ds
),
length
(
overlap
),
over_perc
))
}
...
...
06Data_integration/compute_gender_specificity.R
View file @
fbfefcbd
...
...
@@ 39,7 +39,7 @@ message(paste0("[", Sys.time(), "] Configuration done."))
#' contains the genderspecificity scores, representing the specificity to the "ref" comparison
#' (unless has_ctrl is set to FALSE).
compute_genderspecificity_scores
<
function
(
FM
,
has_ctrl
=
TRUE
)
{
# First, we compute the Pi values for both comparisons ("ref" and "control"). For this, we use
# the nominal P values and the log fold changes. Using the corrected P values would be more
# difficult since most values would be 1. We anyway intend to use the gender specific rankings
...
...
@@ 59,14 +59,14 @@ compute_genderspecificity_scores < function(FM, has_ctrl = TRUE) {
# create the ranking for GSEA when we analyse gender DEGs (and not gender specific
# DEGs).
#
# We start by the most obvious cases for which all values are present.
# The Pi value is simply the absolute log fold change * the P value (on a log10 scale).
FM
$
ref_pivalue
<

log10
(
FM
$
ref_Pval
)
*
abs
(
FM
$
ref_logFC
)
if
(
has_ctrl
)
{
FM
$
ctrl_pivalue
<

log10
(
FM
$
ctrl_Pval
)
*
abs
(
FM
$
ctrl_logFC
)
}
# We continue with special cases, that is when P value == 1.
# These are the genes that are definitely not differentially expressed.
# Because P value == 1, we have Pi value == 0.
...
...
@@ 83,12 +83,12 @@ compute_genderspecificity_scores < function(FM, has_ctrl = TRUE) {
rm
(
pseudo_pval_ctrl
,
pseudo_pi_ctrl
)
}
rm
(
pseudo_pval_ref
,
pseudo_pi_ref
)
# We then aim at creating a delta that represents the difference between the differential
# expression in "ref" and in "ctrl". The comparison is done based on the rankings, which
# are themseleves based on the Pi values. We can not use the Pi values directly since they
# can be so different between the two comparisons.
# Creating the ranks and rank ratios based on the Pi values for the "ref" comparison.
# Nothing special there, we simply order and rank.
#
...
...
@@ 105,13 +105,13 @@ compute_genderspecificity_scores < function(FM, has_ctrl = TRUE) {
max_rank
<
max
(
FM
$
ref_pivalue_rank
)
FM
$
ref_pivalue_rankratio
<
FM
$
ref_pivalue_rank
/
max_rank
rm
(
max_rank
)
# In case, we have no control, the job is done at this stage.
if
(
!
has_ctrl
)
{
FM
$
ranking_value
<
FM
$
ref_pivalue
return
(
FM
)
}
# Now creating the ranks and rank ratios based on the Pi values for the "ctrl" comparison.
# We will add two fields:
# ctrl_pivalue_rank:
...
...
@@ 128,7 +128,7 @@ compute_genderspecificity_scores < function(FM, has_ctrl = TRUE) {
max_rank
<
max
(
FM
$
ctrl_pivalue_rank
)
FM
$
ctrl_pivalue_rankratio
<
FM
$
ctrl_pivalue_rank
/
max_rank
rm
(
max_rank
)
# The delta is simply the difference between the "ref" and "ctrl" rank ratios, with
# rank ratios based on the respective Pi values.
# We will add one field:
...
...
@@ 140,7 +140,71 @@ compute_genderspecificity_scores < function(FM, has_ctrl = TRUE) {
# correspond to neutral genes (as differentially expressed in "ref" and
# in "ctrl").
FM
$
Delta
<
FM
$
ref_pivalue_rankratio

FM
$
ctrl_pivalue_rankratio
# We return the enriched dataframe.
FM
$
gender_specific_score
<
1

FM
$
Delta
return
(
FM
)
}
# To do the ranking stuff but based on FDR instead of pi.
compute_genderspecificity_scores_FDR
<
function
(
FM
,
has_ctrl
=
TRUE
)
{
# We then aim at creating a delta that represents the difference between the differential
# expression in "ref" and in "ctrl". The comparison is done based on the rankings, which
# are themseleves based on the FDR values.
# Creating the ranks and rank ratios based on the P values for the "ref" comparison.
# Nothing special there, we simply order and rank.
#
# We will add two fields:
# ref_pvalue_rank:
# The rank of the genes based on their P values (for the "ref" comparison).
# Values are ~[1, 22000]. Small ranks correspond to small P values (and therefore
# significant DEGs).
# ref_pvalue_rankratio:
# The rank ratios of the genes based on the P value ranks (for the "ref"
# comparison). Values are [0, 1]. Small ratios correspond to small P values
# (and therefore significant DEGs).
FM
$
ref_pvalue_rank
<
rank
(
FM
$
ref_adj_Pval
,
ties.method
=
"average"
)
max_rank
<
max
(
FM
$
ref_pvalue_rank
)
FM
$
ref_pvalue_rankratio
<
FM
$
ref_pvalue_rank
/
max_rank
rm
(
max_rank
)
# In case, we have no control, the job is done at this stage.
if
(
!
has_ctrl
)
{
FM
$
ranking_value
<
FM
$
ref_pvalue
return
(
FM
)
}
# Now creating the ranks and rank ratios based on the P values for the "ctrl" comparison.
# We will add two fields:
# ctrl_pvalue_rank:
# The rank of the genes based on their P values (for the "ctrl" comparison).
# Values are ~[1, 22000]. Small ranks correspond to small P values (and therefore
# significant DEGs). High ranks correspond to non differentially expressed
# genes (P value == 1).
# ctrl_pvalue_rankratio:
# The rank ratios of the genes based on the P value ranks (for the "ctrl"
# comparison). Values are [0, 1]. Small ratios correspond to small P values
# (and therefore significant DEGs). High ratios correspond to non
# differentially expressed genes (P value == 1).
FM
$
ctrl_pvalue_rank
<
rank
(
FM
$
ctrl_adj_Pval
,
ties.method
=
"average"
)
max_rank
<
max
(
FM
$
ctrl_pvalue_rank
)
FM
$
ctrl_pvalue_rankratio
<
FM
$
ctrl_pvalue_rank
/
max_rank
rm
(
max_rank
)
# The delta is simply the difference between the "ref" and "ctrl" rank ratios, with
# rank ratios based on the respective P values.
# We will add one field:
# Delta:
# The difference between the "ref" and "ctrl" P based rank ratios.
# Values are [1, 1]. Negative values correspond to "ref" specific genes
# Positive values correspond non "ref" specific genes, including but not
# limited to "ctrl" specific genes, values around 0 are in the middle and
# correspond to neutral genes (as differentially expressed in "ref" and
# in "ctrl").
FM
$
Delta
<
FM
$
ref_pvalue_rankratio

FM
$
ctrl_pvalue_rankratio
# We return the enriched dataframe.
FM
$
gender_specific_score
<
1

FM
$
Delta
return
(
FM
)
...
...
@@ 191,10 +255,12 @@ for (i in seq_len(length(config$integrations))) {
# We start by merging the male and female rankings. Most genes are present in both and the few
# that are not are removed (since we can not say whether they are gender specific or not).
##
FM < merge(x = F, y = Mr, by = "SYMBOL", all.x = TRUE)
##
MF < merge(x = M, y = Fr, by = "SYMBOL", all.x = TRUE)
## FM < merge(x = F, y = Mr, by = "SYMBOL", all.x = TRUE)
## MF < merge(x = M, y = Fr, by = "SYMBOL", all.x = TRUE)
FM
<
merge
(
x
=
Fr
,
y
=
Mr
,
by
=
"SYMBOL"
)
MF
<
merge
(
x
=
Mr
,
y
=
Fr
,
by
=
"SYMBOL"
)
## FM < merge(x = F, y = M, by = "SYMBOL", all.x = TRUE)
## MF < merge(x = M, y = F, by = "SYMBOL", all.x = TRUE)
rm
(
Fr
,
Mr
,
F
,
M
)
# We select the fields we need:
...
...
@@ 242,6 +308,10 @@ for (i in seq_len(length(config$integrations))) {
MF_enriched
<
compute_genderspecificity_scores
(
MF
)
rm
(
FM
,
MF
)
FM_enriched_FDR
<
compute_genderspecificity_scores_FDR
(
FM
)
MF_enriched_FDR
<
compute_genderspecificity_scores_FDR
(
MF
)
# We save the full data.
FM_ofile
<
paste0
(
output_data_dir
,
integration
$
name
,
"_"
,
limmas
[[
5
]]
$
name
,
"_maxavg_genderspecificityscore_fulldata.tsv"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment