Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
BDS
GeneDER
ArrayUtils
Commits
ebc03074
Commit
ebc03074
authored
Feb 14, 2020
by
Leon-Charles Tranchevent
Browse files
Added support for RNA-seq data analysis (differential expression / limma).
parent
f107b6ac
Changes
5
Hide whitespace changes
Inline
Side-by-side
DESCRIPTION
View file @
ebc03074
...
...
@@ -32,4 +32,4 @@ Imports:
methods,
illuminaHumanv3.db,
topconfects
RoxygenNote:
6.1.1
RoxygenNote:
7.0.2
NAMESPACE
View file @
ebc03074
...
...
@@ -7,3 +7,4 @@ importMethodsFrom("AnnotationDbi", "mget")
importFrom("stats", "model.matrix")
importFrom("utils", "read.delim")
importFrom("stats", "var")
importFrom("stats", "filter")
R/create_design.R
0 → 100644
View file @
ebc03074
#' @title Creates a design matrix for a limma analysis.
#'
#' @description This function creates a design matrix for a limma analysis (without actually
#' running the analysis. The function parameters define which factors are included and how.
#'
#' @param pheno_data The data-frame that contains the phenotypic data (aka clinical data).
#' @param limma_parameters A list of useful Limma parameters that include (1) the main clinical
#' factor used to create sample categories (i.e., Disease status) and (2) the Limma coefficients
#' provided as formulas (similar to the limma::makeContrasts function). Additional parameters are
#' ignored for the time being.
#' @param correct_for_age A boolean indicating whether we should correct for age in the limma
#' design matrix. This expects that a clinical factor termed Age exists. Default to FALSE.
#' @param correct_for_batch A boolean indicating whether we should correct for a poential batch
#' effect in the limma design matrix. This expects that the actual batch information exists in
#' the object pheno_data (as field Batch). Default to FALSE.
#' @param cofactor_name A string indicating a potential cofactor (included in the phenotypic data).
#' The default is NULL, and no cofactor is considered. This will replace the age correction if
#' both were deemed necessary.
#' @param verbose A boolean representing whether the function should display log information. This
#' is FALSE by default.
#' @return The limma fit.
create_design
<-
function
(
pheno_data
,
limma_parameters
,
correct_for_age
=
FALSE
,
correct_for_batch
=
FALSE
,
cofactor_name
=
NULL
,
verbose
=
FALSE
)
{
# We extract the limma analysis parameters
clinical_factor
<-
limma_parameters
[[
1
]]
has_block
<-
!
is.null
(
cofactor_name
)
clinical
<-
factor
(
pheno_data
[[
clinical_factor
]])
rm
(
clinical_factor
)
# We create the design matrix (regular case - no block).
# Depending on the co-factors, we have several possibilities.
# We sill decide based on (i) a block based on a co-factor (paired samples),
# (ii) the age clincal descriptor and (iii) the experimental batches.
# If we need to add a block based on a co-factor.
design
<-
NULL
if
(
has_block
==
FALSE
)
{
if
(
correct_for_age
==
FALSE
)
{
if
(
correct_for_batch
==
FALSE
)
{
# We do not have a block, we do not include age, nor batch.
design
<-
stats
::
model.matrix
(
~
0
+
clinical
)
colnames
(
design
)
<-
levels
(
clinical
)
# We log information.
if
(
verbose
==
TRUE
)
{
message
(
paste0
(
"["
,
Sys.time
(),
"] Design matrix created."
))
}
}
else
{
# Here we have correct_for_batch = TRUE
# We do not have a block, we do not include age but we do include batch.
batches
<-
factor
(
pheno_data
[[
"Batch"
]])
design
<-
stats
::
model.matrix
(
~
0
+
clinical
+
batches
)
colnames
(
design
)
<-
c
(
levels
(
clinical
),
levels
(
batches
)[
2
:
length
(
levels
(
batches
))])
# We log information.
rm
(
batches
)
if
(
verbose
==
TRUE
)
{
message
(
paste0
(
"["
,
Sys.time
(),
"] Design matrix created (batches)."
))
}
}
}
else
{
# Here we have correct_for_age = TRUE
if
(
correct_for_batch
==
FALSE
)
{
# We do not have a block, we do include age, but not batch.
ages
<-
as.numeric
(
pheno_data
[[
"Age"
]])
design
<-
stats
::
model.matrix
(
~
0
+
clinical
+
ages
)
colnames
(
design
)
<-
c
(
levels
(
clinical
),
"Age"
)
# We log information.
rm
(
ages
)
if
(
verbose
==
TRUE
)
{
message
(
paste0
(
"["
,
Sys.time
(),
"] Design matrix created (ages)."
))
}
}
else
{
# Here we have correct_for_batch = TRUE
# We do not have a block, we do include age and batch.
ages
<-
as.numeric
(
pheno_data
[[
"Age"
]])
batches
<-
factor
(
pheno_data
[[
"Batch"
]])
design
<-
stats
::
model.matrix
(
~
0
+
clinical
+
ages
+
batches
)
colnames
(
design
)
<-
c
(
levels
(
clinical
),
"Age"
,
levels
(
batches
)[
2
:
length
(
levels
(
batches
))])
# We log information.
rm
(
ages
,
batches
)
if
(
verbose
==
TRUE
)
{
message
(
paste0
(
"["
,
Sys.time
(),
"] Design matrix created (ages, batches)."
))
}
}
}
}
else
{
# Here we have has_block = TRUE
if
(
correct_for_age
==
FALSE
)
{
if
(
correct_for_batch
==
FALSE
)
{
# We do have a block, we do not include age, nor batch.
clinical_cofactor
<-
factor
(
pheno_data
[[
cofactor_name
]])
# nolint
design
<-
stats
::
model.matrix
(
~
0
+
clinical
+
clinical_cofactor
)
colnames
(
design
)
<-
c
(
levels
(
clinical
),
cofactor_name
)
# We log information.
rm
(
clinical_cofactor
)
if
(
verbose
==
TRUE
)
{
message
(
paste0
(
"["
,
Sys.time
(),
"] Design matrix created (pairs)."
))
}
}
else
{
# Here we have correct_for_batch = TRUE
# We do have a block, we do not include age but we do include batch.
clinical_cofactor
<-
factor
(
pheno_data
[[
cofactor_name
]])
# nolint
batches
<-
factor
(
pheno_data
[[
"Batch"
]])
design
<-
stats
::
model.matrix
(
~
0
+
clinical
+
batches
+
clinical_cofactor
)
colnames
(
design
)
<-
c
(
levels
(
clinical
),
levels
(
batches
)[
2
:
length
(
levels
(
batches
))],
cofactor_name
)
# We log information.
rm
(
clinical_cofactor
,
batches
)
if
(
verbose
==
TRUE
)
{
message
(
paste0
(
"["
,
Sys.time
(),
"] Design matrix created (pairs, batches)."
))
}
}
}
else
{
# Here we have correct_for_age = TRUE
if
(
correct_for_batch
==
FALSE
)
{
# We do have a block, we do include age, but not batch.
clinical_cofactor
<-
factor
(
pheno_data
[[
cofactor_name
]])
# nolint
ages
<-
as.numeric
(
pheno_data
[[
"Age"
]])
design
<-
stats
::
model.matrix
(
~
0
+
clinical
+
ages
+
clinical_cofactor
)
colnames
(
design
)
<-
c
(
levels
(
clinical
),
"Age"
,
cofactor_name
)
# We log information.
rm
(
clinical_cofactor
,
ages
)
if
(
verbose
==
TRUE
)
{
message
(
paste0
(
"["
,
Sys.time
(),
"] Design matrix created (pairs, ages)."
))
}
}
else
{
# Here we have correct_for_batch = TRUE
# We do have a block, we do include age and batch.
clinical_cofactor
<-
factor
(
pheno_data
[[
cofactor_name
]])
# nolint
ages
<-
as.numeric
(
pheno_data
[[
"Age"
]])
batches
<-
factor
(
pheno_data
[[
"Batch"
]])
design
<-
stats
::
model.matrix
(
~
0
+
clinical
+
ages
+
batches
+
clinical_cofactor
)
colnames
(
design
)
<-
c
(
levels
(
clinical
),
"Age"
,
levels
(
batches
)[
2
:
length
(
levels
(
batches
))],
cofactor_name
)
# We log information.
rm
(
clinical_cofactor
,
ages
,
batches
)
if
(
verbose
==
TRUE
)
{
message
(
paste0
(
"["
,
Sys.time
(),
"] Design matrix created (pairs, ages, batches)."
))
}
}
}
}
rm
(
clinical
)
# We return the design.
return
(
design
)
}
\ No newline at end of file
R/extract_DEGs.R
View file @
ebc03074
...
...
@@ -44,7 +44,8 @@ extract_degs <- function(fit, limma_coeffs, k, output_data_dir,
coef
=
paste0
(
"coeff"
,
k
),
n
=
Inf
,
adjust.method
=
pval_adjust_method
,
confint
=
TRUE
)
confint
=
TRUE
,
sort.by
=
"P"
)
# Another way to extract the differentially expressed genes (in parallel to topTable).
# This is only used to then create the Venn diagrams.
...
...
R/run_limma.R
View file @
ebc03074
...
...
@@ -6,7 +6,8 @@
#' be used to detect differentially expressed genes.
#'
#' @param pheno_data The data-frame that contains the phenotypic data (aka clinical data).
#' @param eset The ExpressionSet object that contains the pre-processed data.
#' @param eset The ExpressionSet object that contains the pre-processed data. Alternatively,
#' this can be an EList object (limma package), especially for RNA-seq data.
#' @param limma_parameters A list of useful Limma parameters that include (1) the main clinical
#' factor used to create sample categories (i.e., Disease status) and (2) the Limma coefficients
#' provided as formulas (similar to the limma::makeContrasts function). Additional parameters are
...
...
@@ -65,125 +66,12 @@ run_limma <- function(pheno_data, eset, limma_parameters,
# We sill decide based on (i) a block based on a co-factor (paired samples),
# (ii) the age clincal descriptor and (iii) the experimental batches.
# If we need to add a block based on a co-factor.
design
<-
NULL
if
(
has_block
==
FALSE
)
{
if
(
correct_for_age
==
FALSE
)
{
if
(
correct_for_batch
==
FALSE
)
{
# We do not have a block, we do not include age, nor batch.
design
<-
stats
::
model.matrix
(
~
0
+
clinical
)
colnames
(
design
)
<-
levels
(
clinical
)
# We log information.
if
(
verbose
==
TRUE
)
{
message
(
paste0
(
"["
,
Sys.time
(),
"] Design matrix created."
))
}
}
else
{
# Here we have correct_for_batch = TRUE
# We do not have a block, we do not include age but we do include batch.
batches
<-
factor
(
pheno_data
[[
"Batch"
]])
design
<-
stats
::
model.matrix
(
~
0
+
clinical
+
batches
)
colnames
(
design
)
<-
c
(
levels
(
clinical
),
levels
(
batches
)[
2
:
length
(
levels
(
batches
))])
# We log information.
rm
(
batches
)
if
(
verbose
==
TRUE
)
{
message
(
paste0
(
"["
,
Sys.time
(),
"] Design matrix created (batches)."
))
}
}
}
else
{
# Here we have correct_for_age = TRUE
if
(
correct_for_batch
==
FALSE
)
{
# We do not have a block, we do include age, but not batch.
ages
<-
as.numeric
(
pheno_data
[[
"Age"
]])
design
<-
stats
::
model.matrix
(
~
0
+
clinical
+
ages
)
colnames
(
design
)
<-
c
(
levels
(
clinical
),
"Age"
)
# We log information.
rm
(
ages
)
if
(
verbose
==
TRUE
)
{
message
(
paste0
(
"["
,
Sys.time
(),
"] Design matrix created (ages)."
))
}
}
else
{
# Here we have correct_for_batch = TRUE
# We do not have a block, we do include age and batch.
ages
<-
as.numeric
(
pheno_data
[[
"Age"
]])
batches
<-
factor
(
pheno_data
[[
"Batch"
]])
design
<-
stats
::
model.matrix
(
~
0
+
clinical
+
ages
+
batches
)
colnames
(
design
)
<-
c
(
levels
(
clinical
),
"Age"
,
levels
(
batches
)[
2
:
length
(
levels
(
batches
))])
# We log information.
rm
(
ages
,
batches
)
if
(
verbose
==
TRUE
)
{
message
(
paste0
(
"["
,
Sys.time
(),
"] Design matrix created (ages, batches)."
))
}
}
}
}
else
{
# Here we have has_block = TRUE
if
(
correct_for_age
==
FALSE
)
{
if
(
correct_for_batch
==
FALSE
)
{
# We do have a block, we do not include age, nor batch.
clinical_cofactor
<-
factor
(
pheno_data
[[
cofactor_name
]])
# nolint
design
<-
stats
::
model.matrix
(
~
0
+
clinical
+
clinical_cofactor
)
colnames
(
design
)
<-
c
(
levels
(
clinical
),
cofactor_name
)
# We log information.
rm
(
clinical_cofactor
)
if
(
verbose
==
TRUE
)
{
message
(
paste0
(
"["
,
Sys.time
(),
"] Design matrix created (pairs)."
))
}
}
else
{
# Here we have correct_for_batch = TRUE
# We do have a block, we do not include age but we do include batch.
clinical_cofactor
<-
factor
(
pheno_data
[[
cofactor_name
]])
# nolint
batches
<-
factor
(
pheno_data
[[
"Batch"
]])
design
<-
stats
::
model.matrix
(
~
0
+
clinical
+
batches
+
clinical_cofactor
)
colnames
(
design
)
<-
c
(
levels
(
clinical
),
levels
(
batches
)[
2
:
length
(
levels
(
batches
))],
cofactor_name
)
# We log information.
rm
(
clinical_cofactor
,
batches
)
if
(
verbose
==
TRUE
)
{
message
(
paste0
(
"["
,
Sys.time
(),
"] Design matrix created (pairs, batches)."
))
}
}
}
else
{
# Here we have correct_for_age = TRUE
if
(
correct_for_batch
==
FALSE
)
{
# We do have a block, we do include age, but not batch.
clinical_cofactor
<-
factor
(
pheno_data
[[
cofactor_name
]])
# nolint
ages
<-
as.numeric
(
pheno_data
[[
"Age"
]])
design
<-
stats
::
model.matrix
(
~
0
+
clinical
+
ages
+
clinical_cofactor
)
colnames
(
design
)
<-
c
(
levels
(
clinical
),
"Age"
,
cofactor_name
)
# We log information.
rm
(
clinical_cofactor
,
ages
)
if
(
verbose
==
TRUE
)
{
message
(
paste0
(
"["
,
Sys.time
(),
"] Design matrix created (pairs, ages)."
))
}
}
else
{
# Here we have correct_for_batch = TRUE
# We do have a block, we do include age and batch.
clinical_cofactor
<-
factor
(
pheno_data
[[
cofactor_name
]])
# nolint
ages
<-
as.numeric
(
pheno_data
[[
"Age"
]])
batches
<-
factor
(
pheno_data
[[
"Batch"
]])
design
<-
stats
::
model.matrix
(
~
0
+
clinical
+
ages
+
batches
+
clinical_cofactor
)
colnames
(
design
)
<-
c
(
levels
(
clinical
),
"Age"
,
levels
(
batches
)[
2
:
length
(
levels
(
batches
))],
cofactor_name
)
# We log information.
rm
(
clinical_cofactor
,
ages
,
batches
)
if
(
verbose
==
TRUE
)
{
message
(
paste0
(
"["
,
Sys.time
(),
"] Design matrix created (pairs, ages, batches)."
))
}
}
}
}
rm
(
clinical
)
design
<-
create_design
(
pheno_data
=
pheno_data
,
limma_parameters
=
limma_parameters
,
correct_for_age
=
correct_for_age
,
correct_for_batch
=
correct_for_batch
,
cofactor_name
=
cofactor_name
,
verbose
=
verbose
)
# We build the contrast (only one so far).
contrast
<-
eval
(
substitute
(
limma
::
makeContrasts
(
coeff1
=
c1
,
levels
=
design
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment