Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
BDS
GeneDER
ArrayUtils
Commits
64af219d
Commit
64af219d
authored
Apr 24, 2019
by
Leon-Charles Tranchevent
Browse files
Added the functions related to Limma analyses.
parent
a0ff6216
Changes
4
Hide whitespace changes
Inline
Side-by-side
DESCRIPTION
View file @
64af219d
...
...
@@ -12,8 +12,13 @@ LazyData: true
Imports:
Biobase,
utils,
stats,
affy,
arrayQualityMetrics,
SCAN.UPC,
massiR
massiR,
limma,
stringr,
AnnotationDbi,
statmod
RoxygenNote: 6.1.1
R/extract_DEGs.R
0 → 100644
View file @
64af219d
#' @title Extract DEGs from a Limma fit.
#'
#' @description The function extracts the differentially expressed genes from an existing
#' limma fit and for a predefined analysis (aka a coefficient). It saves the data in a TSV file
#' and creates several plots in the process (Venn diagrams, MD plots).
#'
#' @param fit A fit as provided by a complete Limma analysis (for instance using
#' ArrayUtils::run_limma).
#' @param limma_coeffs A list of all limma coefficient names associated with that fit.
#' It will be used to name the output files, and to identify the results in the Venn
#' diagrams.
#' @param k The index of the current coefficient (from limma_coeffs).
#' @param output_data_dir A string representing the folder in which the output files
#' are going to be stored.
#' @param file_suffix A string used as a suffix for the file names. Default to "".
#' @param file_prefix A string used to prefix the file names. Default to "".
#' @param pval_adjust_method A string code indicating the multiple testing correction
#' method to use. Default to BH.
#' @return NULL
extract_DEGs
<-
function
(
fit
,
limma_coeffs
,
k
,
output_data_dir
,
file_suffix
=
""
,
file_prefix
=
""
,
pval_adjust_method
=
"BH"
)
{
# We create the output file names.
if
(
file_suffix
!=
""
)
{
file_suffix
<-
paste0
(
file_suffix
,
"_"
)
}
if
(
file_prefix
!=
""
)
{
file_prefix
<-
paste0
(
"_"
,
file_prefix
)
}
venn_filename
<-
paste0
(
output_data_dir
,
file_suffix
,
"venn_"
,
limma_coeffs
[
k
],
file_prefix
,
".png"
)
md_fn
<-
paste0
(
output_data_dir
,
file_suffix
,
"MD_"
,
limma_coeffs
[
k
],
file_prefix
,
".png"
)
table_fn
<-
paste0
(
output_data_dir
,
file_suffix
,
"toptable_"
,
limma_coeffs
[
k
],
file_prefix
,
".tsv"
)
# We extract the DEGs using toptable.
table
<-
limma
::
topTable
(
fit
,
coef
=
paste0
(
"coeff"
,
k
),
n
=
Inf
,
adjust.method
=
pval_adjust_method
,
confint
=
TRUE
)
# Another way to extract the differentially expressed genes (in parallel to topTable).
# This is only used to then create the Venn diagrams.
results
<-
limma
::
decideTests
(
fit
,
coefficient
=
paste0
(
"coeff"
,
k
),
adjust.method
=
pval_adjust_method
)
# We plot Venn diagrams that summarize the results and show to which
# extend the analyses overlap.
grDevices
::
png
(
venn_filename
)
limma
::
vennDiagram
(
results
,
names
=
limma_coeffs
,
include
=
c
(
"up"
,
"down"
),
circle.col
=
c
(
"darkorange"
,
"darkorange2"
,
"darkorange4"
),
counts.col
=
c
(
"blue"
,
"green"
),
lwd
=
3
,
cex
=
c
(
1.0
,
0.1
,
0.1
),
show.include
=
TRUE
)
grDevices
::
dev.off
()
# We then plot the Mean vs Deviation of the comparisons and highlight the significant genes.
grDevices
::
png
(
md_fn
)
limma
::
plotMD
(
fit
,
coef
=
paste0
(
"coeff"
,
k
),
status
=
results
[,
k
],
values
=
c
(
1
,
-1
),
hl.col
=
c
(
"red"
,
"blue"
),
hl.pch
=
c
(
16
,
16
),
hl.cex
=
c
(
0.9
,
0.9
))
grDevices
::
dev.off
()
# Save the top tables to TSV files.
utils
::
write.table
(
table
,
file
=
table_fn
,
sep
=
"\t"
,
quote
=
FALSE
,
col.names
=
NA
)
}
R/get_affy_annots.R
0 → 100644
View file @
64af219d
#' @title Get Gene annotations for affy arrays.
#'
#' @description This function reads the library defined by the array type and
#' extract the relevant information into a data-frame.
#'
#' @param array_type A string representing the array type.
#' @param entities The actual set of object ids for which we want gene annotations.
#' It can for instance be probe ids or gene ids. The parameter keytype has to be
#' modified accordingly. By default, the expected type is probe id.
#' @param columns An array of column names to be returned. By default, it includes
#' the probe id, the gene official symbol, the EnsEMBL id and the Entrez-gene id.
#' @param keytype The type of key used. It will be used to map between the annotations
#' and the provided list of entities. The default is probe id.
#' @return A data-frame of the provided entities, annotated.
get_affy_annots
<-
function
(
array_type
,
entities
,
columns
=
c
(
"PROBEID"
,
"SYMBOL"
,
"ENSEMBL"
,
"ENTREZID"
),
keytype
=
"PROBEID"
)
{
# We define the library name (removing underscores and adding .db).
affy_library_name
<-
paste0
(
stringr
::
str_to_lower
(
stringr
::
str_remove
(
array_type
,
"_"
)),
".db"
)
cmd
<-
substitute
(
gene_annots
<-
AnnotationDbi
::
select
(
x
=
local_db
,
keys
=
entities
,
columns
=
columns
,
keytype
=
keytype
),
list
(
local_db
=
as.name
(
affy_library_name
)))
eval
(
cmd
)
return
(
gene_annots
)
}
R/run_limma.R
0 → 100644
View file @
64af219d
#' @title Run a limma analysis.
#'
#' @description This function prepares and runs a limma analysis up until the fitting procedure.
#' The Limma analysis can be simple (A vs B) or more complex ((A vs B) vs (C vs D)). One single
#' cofactor can also be taken into account in the limma. It returns the final fit that can then
#' be used to detect differentially expressed genes.
#'
#' @param pheno_data The data-frame that contains the phenotypic data (aka clinical data).
#' @param eset The ExpressionSet object that contains the pre-processed data.
#' @param limma_parameters A list of useful Limma parameters that include (1) the main clinical
#' factor used to create sample categories (i.e., Disease status) and (2) the Limma coefficients
#' provided as formulas (similar to the limma::makeContrasts function). Additional parameters are
#' ignored for the time being.
#' @param cofactor_name A string indicating a potential cofactor (included in the phenotypic data).
#' The default is NULL, and no cofactor is considered.
#' @param block_key A string indicating which column in the phenotyypic data should be used to build
#' blocks if there is a cofactor to take into account. For instance, if there are several replicates
#' per patient, the block_key might be "Patient" and the cofactor might be "Replicate".
#' @return The limma fit.
run_limma
<-
function
(
pheno_data
,
eset
,
limma_parameters
,
cofactor_name
=
NULL
,
block_key
=
NULL
)
{
# We extract the limma analysis parameters
clinical_factor
<-
limma_parameters
[[
1
]]
coefficients
<-
limma_parameters
[[
2
]]
has_block
<-
!
is.null
(
cofactor_name
)
is_factorial_limma
<-
length
(
coefficients
)
==
3
# We create the design matrix (regular case - no block).
clinical
<-
factor
(
pheno_data
[[
clinical_factor
]])
design
<-
stats
::
model.matrix
(
~
0
+
clinical
)
colnames
(
design
)
<-
levels
(
clinical
)
# If necessary we add a block based on a co-factor.
# For instance, if we have paired samples.
if
(
has_block
)
{
clinical_cofactor
<-
factor
(
pheno_data
[[
cofactor_name
]])
design
<-
stats
::
model.matrix
(
~
0
+
clinical
+
clinical_cofactor
)
colnames
(
design
)
<-
c
(
levels
(
clinical
),
clinical_cofactor
)
}
# We build the contrast (default only one).
contrast
<-
eval
(
substitute
(
limma
::
makeContrasts
(
coeff1
=
c1
,
levels
=
design
),
list
(
c1
=
coefficients
[
1
])))
# For larger lists of coefficients, we add them all.
if
(
is_factorial_limma
)
{
contrast
<-
eval
(
substitute
(
limma
::
makeContrasts
(
coeff1
=
c1
,
coeff2
=
c2
,
coeff3
=
c3
,
levels
=
design
),
list
(
c1
=
coefficients
[
1
],
c2
=
coefficients
[
2
],
c3
=
coefficients
[
3
])))
}
# We fit the data using the above models and the contrats
# to prepare the differential analyses.
# Default to no block.
lmfit
<-
limma
::
lmFit
(
eset
,
design
)
# We create a block if necessary.
if
(
has_block
)
{
corfit
<-
limma
::
duplicateCorrelation
(
eset
,
design
,
block
=
pheno_data
[[
block_key
]])
lmfit_gender
<-
limma
::
lmFit
(
eset
,
design
,
block
=
pheno_data
[[
block_key
]],
correlation
=
corfit
$
consensus
)
}
fit
<-
limma
::
contrasts.fit
(
lmfit
,
contrast
)
fit
<-
limma
::
eBayes
(
fit
)
return
(
fit
)
}
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment