Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
David Hoksza
bh19-rare-diseases
Commits
079b19ff
Commit
079b19ff
authored
Dec 10, 2019
by
David Hoksza
Browse files
Correct handling of sed's in-place replacement on Mac
parent
4b0d1ac6
Changes
2
Hide whitespace changes
Inline
Side-by-side
assemble.sh
View file @
079b19ff
...
...
@@ -127,8 +127,10 @@ log "Retrieving enriched pathways..."
tr
'\r'
'\n'
<
${
ENRICHMENT_CONFIG
}
>
${
ENRICHMENT_CONFIG_TMP
}
#update paremters file
sed
-E
-i
"s/(max_areas_per_map)(.*)/
\1\t
${
ENRICH_MAX_AREAS_PER_MAP
}
/g"
${
ENRICHMENT_CONFIG_TMP
}
sed
-E
-i
"s/(max_areas_per_pathway_db)(.*)/
\1\t
${
MAX_AREAS_PER_PATHWAY_DB
}
/g"
${
ENRICHMENT_CONFIG_TMP
}
#the .bak is because MAC uses BSD version of sed which requires the backup for in-place replacements
sed
-E
-i
.bak
"s/(max_areas_per_map)(.*)/
\1\t
${
ENRICH_MAX_AREAS_PER_MAP
}
/g"
${
ENRICHMENT_CONFIG_TMP
}
sed
-E
-i
.bak
"s/(max_areas_per_pathway_db)(.*)/
\1\t
${
MAX_AREAS_PER_PATHWAY_DB
}
/g"
${
ENRICHMENT_CONFIG_TMP
}
rm
${
ENRICHMENT_CONFIG_TMP
}
.bak
Rscript
--vanilla
${
ENRICHMENT_DIR
}
/enrich_maps.R
${
genes_out_path
}
${
ENRICHMENT_CONFIG_TMP
}
check_exit_code
...
...
enrichment/enrich_maps.R
View file @
079b19ff
### Map and pathway enrichment analysis.
### Requires commandline input or file "input.txt" to be present in the script directory.
required.packages
<-
c
(
"jsonlite"
,
"httr"
,
"enrichR"
)
new.packages
<-
required.packages
[
!
(
required.packages
%in%
installed.packages
()[,
"Package"
])]
if
(
length
(
new.packages
))
install.packages
(
new.packages
,
repos
=
'http://cran.us.r-project.org'
)
library
(
jsonlite
)
library
(
httr
)
ask_GET
<-
function
(
furl
,
fask
)
{
resp
<-
httr
::
GET
(
url
=
paste0
(
furl
,
fask
),
httr
::
add_headers
(
'Content-Type'
=
"application/x-www-form-urlencoded"
))
return
(
httr
::
content
(
resp
,
as
=
"text"
))
}
args
=
commandArgs
(
trailingOnly
=
TRUE
)
infile
<-
"input.txt"
if
(
length
(
args
)
>
0
)
{
infile
<-
args
[
1
]
}
if
(
!
file.exists
(
infile
))
{
message
(
"No correct input given!"
)}
input
<-
readLines
(
infile
)
config
<-
"config.txt"
if
(
length
(
args
)
>
0
)
{
config
<-
args
[
2
]
}
if
(
!
file.exists
(
config
))
{
message
(
"No correct config given!"
)}
config
<-
read.table
(
config
,
sep
=
"\t"
,
stringsAsFactors
=
F
,
header
=
T
)
message
(
"Enrichment of disease maps in config"
)
cat
(
file
=
"enriched_disease_maps.txt"
,
"minerva_instance\tproject_id\tmodel_id\tname\tbounds.height\tbounds.width\tbounds.x\tbounds.y\n"
,
append
=
F
)
for
(
map
in
config
[
config
$
type
==
"map"
,
"resource"
])
{
message
(
paste0
(
"Querying "
,
map
))
cfg
<-
fromJSON
(
ask_GET
(
map
,
"configuration/"
))
project_id
<-
cfg
$
options
[
cfg
$
options
$
type
==
"DEFAULT_MAP"
,
"value"
]
mnv_base
<-
paste0
(
map
,
"projects/"
,
project_id
,
"/"
)
### Ask for models
message
(
paste0
(
"Asking for models: "
,
mnv_base
,
"models/"
))
models
<-
fromJSON
(
ask_GET
(
mnv_base
,
"models/"
),
flatten
=
F
)
model_elements
<-
lapply
(
models
$
idObject
,
function
(
x
)
fromJSON
(
ask_GET
(
paste0
(
mnv_base
,
"models/"
,
x
,
"/"
),
"bioEntities/elements/?columns=id,name,type,references,bounds"
),
flatten
=
F
))
all_hgncs
<-
c
()
message
(
paste0
(
"Setting up enrichment parameters..."
))
### Prep run, go through all model elements and:
for
(
me
in
model_elements
)
{
### Get all HGNCs (universe for enrichment)
all_hgncs
<-
c
(
all_hgncs
,
sapply
(
me
$
references
,
function
(
x
)
x
[
x
$
type
==
"HGNC_SYMBOL"
,
"resource"
]))
all_hgncs
<-
all_hgncs
[
sapply
(
all_hgncs
,
function
(
x
)
ifelse
(
is.character
(
x
)
&
length
(
x
)
>
0
,
TRUE
,
FALSE
))]
}
N
<-
length
(
all_hgncs
)
n
<-
length
(
input
)
message
(
paste0
(
"Going through individual models..."
))
for
(
i
in
1
:
length
(
model_elements
))
{
me
<-
model_elements
[[
i
]]
if
(
length
(
unique
(
me
[
me
$
type
==
"Pathway"
,
"name"
]))
==
0
)
{
next
}
path_pVals
<-
data.frame
(
pname
=
unique
(
me
[
me
$
type
==
"Pathway"
,
"name"
]),
pVal
=
1
)
### Calculating the inclusion of the elements per bioentity set
for
(
up
in
1
:
nrow
(
path_pVals
))
{
res
<-
apply
(
me
[
me
$
name
==
path_pVals
$
pname
[
up
],
"bounds"
],
1
,
function
(
x
)
me
$
bounds
$
x
>=
x
[
"x"
]
&
me
$
bounds
$
x
<=
(
x
[
"x"
]
+
x
[
"width"
])
&
me
$
bounds
$
y
>=
x
[
"y"
]
&
me
$
bounds
$
y
<=
(
x
[
"y"
]
+
x
[
"height"
]))
hgncs
<-
unlist
(
sapply
(
me
$
references
[
res
&
me
$
type
%in%
c
(
"Protein"
,
"RNA"
,
"Gene"
)],
function
(
x
)
x
[
x
$
type
==
"HGNC_SYMBOL"
,
"resource"
]))
m
<-
length
(
hgncs
)
k
<-
length
(
intersect
(
input
,
hgncs
))
if
(
k
>
0
)
{
path_pVals
$
pVal
[
up
]
<-
(((
m
*
(
n
-
k
))
/
(
k
*
(
N
-
m
)))
**
k
)
*
(((
n
*
(
N
-
m
))
/
(
N
*
(
n
-
k
)))
**
n
)
}
}
### Bonferroni correction
# path_pVals$pVal <- path_pVals$pVal*nrow(path_pVals)
path_pVals
<-
path_pVals
[
path_pVals
$
pVal
<
0.1
,
]
if
(
nrow
(
path_pVals
)
>
0
)
{
path_pVals
<-
path_pVals
[
order
(
path_pVals
$
pVal
),
]
if
(
!
is.null
(
config
[
config
$
resource
==
"max_areas_per_map"
,]
$
value
))
{
path_pVals
<-
path_pVals
[
1
:
config
[
config
$
resource
==
"max_areas_per_map"
,]
$
value
,
]
}
pbounds
<-
cbind
(
name
=
me
[
me
$
name
%in%
path_pVals
$
pname
,]
$
name
,
me
[
me
$
name
%in%
path_pVals
$
pname
,]
$
bounds
)
pathways_to_pull
<-
cbind
(
minerva_instance
=
map
,
project_id
=
project_id
,
model_id
=
models
$
idObject
[
i
],
pbounds
,
stringsAsFactors
=
F
)
write.table
(
pathways_to_pull
,
file
=
"enriched_disease_maps.txt"
,
sep
=
"\t"
,
quote
=
F
,
col.names
=
F
,
row.names
=
F
,
append
=
T
)
message
(
paste0
(
"Output file updated..."
))
}
}
message
(
paste0
(
"Done."
))
}
library
(
enrichR
)
message
(
"Enrichment of pathways"
)
### Header of the enrichment table, it's consecutive segments will be written
### later below
cat
(
file
=
"enriched_pathways.txt"
,
"pathway_db\tTerm\tAdjusted.P.value\n"
,
append
=
F
)
pathway_dbs
<-
config
[
config
$
type
==
"pathway_db"
,
"resource"
]
### Run Enrichr
enriched_pathways
<-
enrichr
(
input
,
databases
=
pathway_dbs
)
### Leave only the pathways with adjusted p lower than 0.1
adj_pathways
<-
lapply
(
enriched_pathways
,
function
(
x
)
{
selected
<-
which
(
x
$
Adjusted.P.value
<
0.1
)
### Cap the amount of pathways to the predefined max in config
cap
<-
min
(
length
(
selected
),
config
[
config
$
resource
==
"max_areas_per_pathway_db"
,]
$
value
)
selected
<-
head
(
selected
,
cap
)
x
[
selected
,
]
})
### Clean empty results
adj_pathways
<-
adj_pathways
[
sapply
(
adj_pathways
,
dim
)[
1
,]
>
0
]
### Write results down in the file
for
(
aep
in
names
(
adj_pathways
))
{
write.table
(
cbind
(
pathway_db
=
aep
,
adj_pathways
[[
aep
]][,
c
(
"Term"
,
"Adjusted.P.value"
)],
stringsAsFactors
=
F
),
file
=
"enriched_pathways.txt"
,
sep
=
"\t"
,
quote
=
F
,
col.names
=
F
,
row.names
=
F
,
append
=
T
)
}
message
(
"Enrichment finished."
)
### Map and pathway enrichment analysis.
### Requires commandline input or file "input.txt" to be present in the script directory.
required.packages
<-
c
(
"jsonlite"
,
"httr"
,
"enrichR"
)
new.packages
<-
required.packages
[
!
(
required.packages
%in%
installed.packages
()[,
"Package"
])]
if
(
length
(
new.packages
))
install.packages
(
new.packages
,
repos
=
'http://cran.us.r-project.org'
)
library
(
jsonlite
)
library
(
httr
)
ask_GET
<-
function
(
furl
,
fask
)
{
resp
<-
httr
::
GET
(
url
=
paste0
(
furl
,
fask
),
httr
::
add_headers
(
'Content-Type'
=
"application/x-www-form-urlencoded"
))
return
(
httr
::
content
(
resp
,
as
=
"text"
))
}
args
=
commandArgs
(
trailingOnly
=
TRUE
)
infile
<-
"input.txt"
if
(
length
(
args
)
>
0
)
{
infile
<-
args
[
1
]
}
if
(
!
file.exists
(
infile
))
{
message
(
"No correct input given!"
)}
input
<-
readLines
(
infile
)
config
<-
"config.txt"
if
(
length
(
args
)
>
0
)
{
config
<-
args
[
2
]
}
if
(
!
file.exists
(
config
))
{
message
(
"No correct config given!"
)}
config
<-
read.table
(
config
,
sep
=
"\t"
,
stringsAsFactors
=
F
,
header
=
T
)
message
(
"Enrichment of disease maps in config"
)
cat
(
file
=
"enriched_disease_maps.txt"
,
"minerva_instance\tproject_id\tmodel_id\tname\tbounds.height\tbounds.width\tbounds.x\tbounds.y\n"
,
append
=
F
)
for
(
map
in
config
[
config
$
type
==
"map"
,
"resource"
])
{
message
(
paste0
(
"Querying "
,
map
))
cfg
<-
fromJSON
(
ask_GET
(
map
,
"configuration/"
))
project_id
<-
cfg
$
options
[
cfg
$
options
$
type
==
"DEFAULT_MAP"
,
"value"
]
mnv_base
<-
paste0
(
map
,
"projects/"
,
project_id
,
"/"
)
### Ask for models
message
(
paste0
(
"Asking for models: "
,
mnv_base
,
"models/"
))
models
<-
fromJSON
(
ask_GET
(
mnv_base
,
"models/"
),
flatten
=
F
)
model_elements
<-
lapply
(
models
$
idObject
,
function
(
x
)
fromJSON
(
ask_GET
(
paste0
(
mnv_base
,
"models/"
,
x
,
"/"
),
"bioEntities/elements/?columns=id,name,type,references,bounds"
),
flatten
=
F
))
all_hgncs
<-
c
()
message
(
paste0
(
"Setting up enrichment parameters..."
))
### Prep run, go through all model elements and:
for
(
me
in
model_elements
)
{
### Get all HGNCs (universe for enrichment)
all_hgncs
<-
c
(
all_hgncs
,
sapply
(
me
$
references
,
function
(
x
)
x
[
x
$
type
==
"HGNC_SYMBOL"
,
"resource"
]))
all_hgncs
<-
all_hgncs
[
sapply
(
all_hgncs
,
function
(
x
)
ifelse
(
is.character
(
x
)
&
length
(
x
)
>
0
,
TRUE
,
FALSE
))]
}
N
<-
length
(
all_hgncs
)
n
<-
length
(
input
)
message
(
paste0
(
"Going through individual models..."
))
for
(
i
in
1
:
length
(
model_elements
))
{
me
<-
model_elements
[[
i
]]
if
(
length
(
unique
(
me
[
me
$
type
==
"Pathway"
,
"name"
]))
==
0
)
{
next
}
path_pVals
<-
data.frame
(
pname
=
unique
(
me
[
me
$
type
==
"Pathway"
,
"name"
]),
pVal
=
1
)
### Calculating the inclusion of the elements per bioentity set
for
(
up
in
1
:
nrow
(
path_pVals
))
{
res
<-
apply
(
me
[
me
$
name
==
path_pVals
$
pname
[
up
],
"bounds"
],
1
,
function
(
x
)
me
$
bounds
$
x
>=
x
[
"x"
]
&
me
$
bounds
$
x
<=
(
x
[
"x"
]
+
x
[
"width"
])
&
me
$
bounds
$
y
>=
x
[
"y"
]
&
me
$
bounds
$
y
<=
(
x
[
"y"
]
+
x
[
"height"
]))
hgncs
<-
unlist
(
sapply
(
me
$
references
[
res
&
me
$
type
%in%
c
(
"Protein"
,
"RNA"
,
"Gene"
)],
function
(
x
)
x
[
x
$
type
==
"HGNC_SYMBOL"
,
"resource"
]))
m
<-
length
(
hgncs
)
k
<-
length
(
intersect
(
input
,
hgncs
))
if
(
k
>
0
)
{
path_pVals
$
pVal
[
up
]
<-
(((
m
*
(
n
-
k
))
/
(
k
*
(
N
-
m
)))
**
k
)
*
(((
n
*
(
N
-
m
))
/
(
N
*
(
n
-
k
)))
**
n
)
}
}
### Bonferroni correction
# path_pVals$pVal <- path_pVals$pVal*nrow(path_pVals)
path_pVals
<-
path_pVals
[
path_pVals
$
pVal
<
0.1
,
]
if
(
nrow
(
path_pVals
)
>
0
)
{
path_pVals
<-
path_pVals
[
order
(
path_pVals
$
pVal
),
]
if
(
!
is.null
(
config
[
config
$
resource
==
"max_areas_per_map"
,]
$
value
))
{
path_pVals
<-
path_pVals
[
1
:
config
[
config
$
resource
==
"max_areas_per_map"
,]
$
value
,
]
}
pbounds
<-
cbind
(
name
=
me
[
me
$
name
%in%
path_pVals
$
pname
,]
$
name
,
me
[
me
$
name
%in%
path_pVals
$
pname
,]
$
bounds
)
pathways_to_pull
<-
cbind
(
minerva_instance
=
map
,
project_id
=
project_id
,
model_id
=
models
$
idObject
[
i
],
pbounds
,
stringsAsFactors
=
F
)
write.table
(
pathways_to_pull
,
file
=
"enriched_disease_maps.txt"
,
sep
=
"\t"
,
quote
=
F
,
col.names
=
F
,
row.names
=
F
,
append
=
T
)
message
(
paste0
(
"Output file updated..."
))
}
}
message
(
paste0
(
"Done."
))
}
library
(
enrichR
)
message
(
"Enrichment of pathways"
)
### Header of the enrichment table, it's consecutive segments will be written
### later below
cat
(
file
=
"enriched_pathways.txt"
,
"pathway_db\tTerm\tAdjusted.P.value\n"
,
append
=
F
)
pathway_dbs
<-
config
[
config
$
type
==
"pathway_db"
,
"resource"
]
### Run Enrichr
enriched_pathways
<-
enrichr
(
input
,
databases
=
pathway_dbs
)
### Leave only the pathways with adjusted p lower than 0.1
adj_pathways
<-
lapply
(
enriched_pathways
,
function
(
x
)
{
selected
<-
which
(
x
$
Adjusted.P.value
<
0.1
)
### Cap the amount of pathways to the predefined max in config
cap
<-
min
(
length
(
selected
),
config
[
config
$
resource
==
"max_areas_per_pathway_db"
,]
$
value
)
selected
<-
head
(
selected
,
cap
)
x
[
selected
,
]
})
### Clean empty results
adj_pathways
<-
adj_pathways
[
sapply
(
adj_pathways
,
dim
)[
1
,]
>
0
]
### Write results down in the file
for
(
aep
in
names
(
adj_pathways
))
{
write.table
(
cbind
(
pathway_db
=
aep
,
adj_pathways
[[
aep
]][,
c
(
"Term"
,
"Adjusted.P.value"
)],
stringsAsFactors
=
F
),
file
=
"enriched_pathways.txt"
,
sep
=
"\t"
,
quote
=
F
,
col.names
=
F
,
row.names
=
F
,
append
=
T
)
}
message
(
"Enrichment finished."
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment