Gitlab is now using https://gitlab.lcsb.uni.lu as it's primary address. Please update your bookmarks. FAQ.

Commit a3acfe93 authored by cylon-x's avatar cylon-x 🤖
Browse files

Merge branch 'repo_update' of https://git-r3lab.uni.lu/covid/models into repo_update

parents 303a75c0 d5ae43b3
Pipeline #37218 passed with stages
in 2 minutes and 9 seconds
......@@ -7,16 +7,46 @@
### Helper functions to access and retrieve content from MINERVA-hosted diagrams
source("https://git-r3lab.uni.lu/covid/models/-/raw/master/Integration/MINERVA_access/minerva_access_functions.R")
### For handling WikiPathway queries
library(rWikiPathways)
### A function retrieving element identifiers from MINERVA and WikiPathway diagrams
get_grouped_refs <- function(fcomponents, fgroups = NULL) {
### For handling Reactome pathways
### Get HGNCs of UniProts
translate_up_hgnc <- function(upid, hard_filter = T) {
resp <- ask_GET(paste0("https://www.ebi.ac.uk/proteins/api/genecentric/", upid))
if(is.null(resp)) { return("") }
res <- fromJSON(resp)
if(is.null(res$relatedGene)) { return("") }
### This is a patchy reference; the correct way would be to get HGNC ID (numeric) and retrieve the symbol based on this
### but I'd like to avoid yet another API call
hgncs <- with(res$relatedGene, geneName[accession == upid & geneNameType == "Gene name"])
### Gene symbols retrieved this way may not be HGNCs; we hard filter out non-uppercased or too short ids
if(hard_filter) {
hgncs <- hgncs[(nchar(hgncs) > 2) & (toupper(hgncs) == hgncs)]
}
return(hgncs)
}
### Get participants of a pathway and translate them into HGNCs
get_reactome_pathway_contents <- function(reactome_path_id) {
res <- fromJSON(ask_GET(paste0("https://reactome.org/ContentService/data/participants/", reactome_path_id)))
ids <- sapply(res$refEntities, "[[", "identifier")
symbols <- sapply(unique(unlist(ids)), translate_up_hgnc)
return(unlist(unique(symbols)))
}
### A function retrieving element identifiers from MINERVA
get_mnv_refs <- function(fcomponents) {
### Get MINERVA HGNCs as a named list
message("Retrieving MINERVA HGNCs collection")
refs <- sapply(fcomponents$map_elements, function(x) get_annotation(x$references, "HGNC_SYMBOL"))
names(refs) <- fcomponents$models$name
### Get WikiPathways HGNCs as a named list
mnv_refs <- sapply(fcomponents$map_elements, function(x) get_annotation(x$references, "HGNC_SYMBOL"))
names(mnv_refs) <- fcomponents$models$name
return(mnv_refs)
}
### A function retrieving element identifiers from WikiPathway diagrams
get_wp_refs <- function() {
message("Retrieving WP collection")
### This file contains all the pathways in the WP COVID-19 repo
wpids <- readLines("https://raw.githubusercontent.com/wikipathways/SARS-CoV-2-WikiPathways/master/pathways.txt")
......@@ -24,12 +54,39 @@ get_grouped_refs <- function(fcomponents, fgroups = NULL) {
message("Retrieving WP HGNCs")
wp_hgncs <- sapply(wpids, getXrefList, "H")
names(wp_hgncs) <- wpids
refs <- c(refs, wp_hgncs)
### If no grouping needed, return the lists as they are
if(is.null(fgroups)) { return(refs) }
### If by-name groupings provided, group unique HGNCs and return
grefs <- sapply(fgroups, function(x) setdiff(unique(unlist(refs[x])), c("", NA)))
names(grefs) <- names(fgroups)
return(wp_hgncs)
}
### A function retrieving element identifiers from Reactome diagrams
get_rct_refs <- function() {
message("Retrieving Reactome collection")
rct_cov_hgncs <- sapply(c("R-HSA-9678110", "R-HSA-9679504", "R-HSA-9679514", "R-HSA-9683701", "R-HSA-9679509"),
get_reactome_pathway_contents)
rct_cov2_hgncs <- sapply(c("R-HSA-9694614", "R-HSA-9694676", "R-HSA-9694682", "R-HSA-9694635", "R-HSA-9694322"),
get_reactome_pathway_contents)
return(c(rct_cov_hgncs, rct_cov2_hgncs))
}
### Beautification of the graph output: WikiPathway names
name_wp_nicely <- function(wp_ids) {
### For each WP id, grab the "name" parameter from the output of the getPathwayInfo()
sapply(wp_ids, function(x)
paste0(x, ": ", rWikiPathways::getPathwayInfo(x)$name
))
}
### Beautification of the graph output: Reactome names
name_rct_nicely <- function(rct_ids) {
### For each Reactome id, grab the "displayName" parameter from the JSON describing the pathway
sapply(rct_ids, function(x)
paste0(x, ": ",
fromJSON(ask_GET(paste0("https://reactome.org/ContentService/data/query/enhanced/",x)))$displayName))
}
### A function grouping the references according to a given structure
group_refs <- function(refs, groups) {
### Using by-name groupings provided, group unique HGNCs and return
grefs <- sapply(groups, function(x) setdiff(unique(unlist(refs[x])), c("", NA)))
names(grefs) <- names(groups)
return(grefs)
}
......@@ -92,15 +149,20 @@ get_external_crosstalks <- function(frefs, external_interactions) {
### A function operating on the external cross-talks table (see above)
### following the format: from, to, from pathway, from pathway n, to pathway, to pathway n
### list external edges to and from pathways
get_external_edges <- function(fcrosstalk) {
get_external_edges <- function(fcrosstalk, simplify = T) {
ext_edgelist <- c()
for(i in 1:nrow(fcrosstalk)) {
from <- unlist(strsplit(fcrosstalk[i,3],";"))
for(j in from) { ext_edgelist <- rbind(ext_edgelist, c(j, fcrosstalk[i,1])) }
ext_edgelist <- rbind(ext_edgelist, c(fcrosstalk[i,1], fcrosstalk[i,2]))
to <- unlist(strsplit(fcrosstalk[i,5],";"))
if(simplify) { ### Same direction: pathway to element
for(j in from) { ext_edgelist <- rbind(ext_edgelist, c(j, fcrosstalk[i,1])) }
for(j in to) { ext_edgelist <- rbind(ext_edgelist, c(j, fcrosstalk[i,2])) }
} else {
for(j in from) { ext_edgelist <- rbind(ext_edgelist, c(j, fcrosstalk[i,1])) }
for(j in to) { ext_edgelist <- rbind(ext_edgelist, c(fcrosstalk[i,2], j)) }
}
}
return(unique(ext_edgelist))
}
......@@ -109,47 +171,21 @@ map <- "https://covid19map.elixir-luxembourg.org/minerva/api/"
### Get the components of MINERVA and WikiPathways (see minerva_access.R)
map_components <- get_map_components(map)
### Introduce pathway groups (following 'https://covid.pages.uni.lu/map_contents')
map_groups <- list(
`Virus replication cycle` = c(
"WP4846", "WP4799",
"Virus replication cycle", "Nsp9 protein interactions", "SARS-CoV-2 RTC and transcription",
"Nsp4 and Nsp6 protein interactions","E protein interactions", "Orf3a protein interactions"),
`ER stress and unfolded protein response` = c(
"WP4861",
"Endoplasmatic Reticulum stress", "Orf10 Cul2 pathway"),
`Autophagy and protein degradation` = c(
"WP4860", "WP4936", "WP4863",
"Endoplasmatic Reticulum stress"),
`Apoptosis pathway` = c(
"WP4864", "WP4877",
"Apoptosis pathway", "JNK pathway", "Electron Transport Chain disruption"),
`Renin-angiotensin system` = c(
"WP4883", "WP4965", "WP4969",
"Renin-angiotensin pathway"),
`Coagulopathy pathway` = c(
"WP4927",
"Coagulation pathway"),
`PAMP signalling` = c(
"WP4912",
"PAMP signalling"),
`Induction of interferons and the cytokine storm` = c(
"WP4868", "WP4880", "WP4876", "WP4884", "WP4961",
"Interferon 1 pathway", "Interferon lambda pathway"),
`Altered host metabolism` = c(
"WP4853", "WP4904",
"Kynurenine synthesis pathway", "HMOX1 pathway", "Nsp14 and metabolism", "Pyrimidine deprivation"),
`Other diagrams` = c(
"WP4891", "WP5017", "TGFbeta signalling", "overview"
)
)
map_groups <- fromJSON("map_groups.json")
### Detailed lists
mnv_refs <- get_mnv_refs(map_components)
wp_refs <- get_wp_refs()
rct_refs <- get_rct_refs()
### For a non-grouped version, re-run without 'map_groups'
map_grefs <- get_grouped_refs(map_components, map_groups)
### Get existing between-pathway crosstalks
ix_edgelist <- get_crosstalk_edges(map_grefs, compress = F)
### Global list of all identifiers
all_hgncs <- unique(unlist(map_grefs))
### Grouped list
map_grefs <- group_refs(c(mnv_refs, wp_refs, rct_refs), map_groups)
### For downstream display of graphs
names(mnv_refs) <- paste0("MINERVA: ", names(mnv_refs))
names(wp_refs) <- name_wp_nicely(names(wp_refs))
names(rct_refs) <- name_rct_nicely(names(rct_refs))
map_refs <- c(mnv_refs, wp_refs, rct_refs)
###
### For new interactions, we will use OmniPathDB (https://omnipathdb.org) and
......@@ -206,26 +242,39 @@ emmaa_interactors <- t(unname(sapply(emmaa_hgnc, function(x) c(x$subj$name, x$ob
### Add filtering by the OmniPathDB contents
emmaa_op <- paste0(emmaa_interactors[,1],"-",emmaa_interactors[,2]) %in%
paste0(op_narrow$source_genesymbol,"-", op_narrow$target_genesymbol)
emmaa_interactors_op <- emmaa_interactors[emmaa_op,]
### New crosstalks based on EMMAA TM results, raw and filtered by OmniPathDB
ix_emmaa <- get_external_crosstalks(map_grefs, emmaa_interactors)
ix_emmaa_op <- get_external_crosstalks(map_grefs, emmaa_interactors[emmaa_op,])
### Create a network of new inter-pathway crosstalks (with more than one 'from_pathway_n' and 'to_pathway_n')
### (raw and OmniPathDB-filtered)
ix_emmaa_edges <- get_external_edges(ix_emmaa[ix_emmaa[,'from_pathway_n'] > 1 &
ix_emmaa[,'to_pathway_n'] > 1,])
ix_emmaa_edges_op <- get_external_edges(ix_emmaa_op[ix_emmaa_op[,'from_pathway_n'] > 1 &
ix_emmaa_op[,'to_pathway_n'] > 1,])
### Generate results for different graph combinations
### selected_refs are either pathways or groups
### selected_emmaa_interactors can be NULL (for existing crosstalks), raw (without op) or filtered (with op)
### new_elements T for new regulators or F for new crosstalks
generate_combination <- function(selected_refs, selected_emmaa_interactors = NULL, new_elements = F) {
if(is.null(selected_emmaa_interactors)) {
### Get existing crosstalks between selected references (pathways/groups)
return(get_crosstalk_edges(selected_refs, compress = F))
} else {
### Combine pathway/group interactions with selected EMMAA interactions
ix_refs_emmaa <- get_external_crosstalks(selected_refs, selected_emmaa_interactors)
### Create a network of new (HGNC not among the map contents) upstream regulators (with 'from_pathway_n' == 0, and 'to_pathway_n' > 0)
### (raw and OmniPathDB-filtered)
ix_reg_emmaa_edges <- get_external_edges(ix_emmaa[ix_emmaa[,'from_pathway_n'] == 0 &
ix_emmaa[,'to_pathway_n'] > 0 &
!(ix_emmaa[,'from'] %in% all_hgncs),])
ix_reg_emmaa_edges_op <- get_external_edges(ix_emmaa_op[ix_emmaa_op[,'from_pathway_n'] == 0 &
ix_emmaa_op[,'to_pathway_n'] > 0 &
!(ix_emmaa_op[,'from'] %in% all_hgncs),])
if(new_elements) {
### New upstream regulators: return new interactions that have
### 1. no incoming interactions from existing elements
### 2. at least one outgoing interaction to an existing element
### 3. are not among existing elements
return(get_external_edges(ix_refs_emmaa[ix_refs_emmaa[,'from_pathway_n'] == 0 &
ix_refs_emmaa[,'to_pathway_n'] > 0 &
!(ix_refs_emmaa[,'from'] %in% unique(unlist(selected_refs))),]))
} else {
### Existing crosstalks: return new interactions that have
### 1. more than one incoming interaction from an existing element
### 2. more than one outgoing interaction to an existing element
return(get_external_edges(ix_refs_emmaa[ix_refs_emmaa[,'from_pathway_n'] > 1 &
ix_refs_emmaa[,'to_pathway_n'] > 1,]))
}
}
return(NULL)
}
### Use igraph to create xgmml files (for CytoScape) and pdf files
library(igraph)
......@@ -235,23 +284,16 @@ library(igraph)
produce_graph <- function(fedgelist, fxgmml = NULL, fpdf = NULL) {
### Create a graph from a non-empty names
f_g <- graph.edgelist(fedgelist[fedgelist[,1] != "",])
### Graphical properties
### Pathway nodes contain spaces in their names, we need them for a different visualisation
pathway_indices <- grep(" ", V(f_g)$name)
### Basic HGNC node colour
f_color <- rep("lightblue", gorder(f_g))
### Three-neighbour colour
if(quantile(degree(f_g))[2] > 2) { f_color[degree(f_g) >= quantile(degree(f_g))[2]] <- "yellow" }
### Four-or-more-neighbour colour
if(quantile(degree(f_g))[3] > 3) { f_color[degree(f_g) >= quantile(degree(f_g))[3]] <- "red" }
### Pathway colour
f_color[pathway_indices] <- "lightgreen"
### Height (for the rectangle shape)
f_height <- rep(40, gorder(f_g))
f_color <- rep("lightblue", gorder(f_g)) ### Basic HGNC node colour
f_color[degree(f_g) >= 3] <- "yellow" ### Three-neighbour colour
f_color[degree(f_g) >= 5] <- "red" ### Five-or-more-neighbour colour
f_color[grep(" ", V(f_g)$name)] <- "lightgreen" ### Pathway colour
f_height <- rep(35, gorder(f_g)) ### Height (for the rectangle shape)
f_height[grep(" ", V(f_g)$name)] <- 45 ### Pathway colour
f_g <- set.vertex.attribute(f_g, "color", value = f_color) ### Set the colours
### Set the colours
f_g <- set.vertex.attribute(f_g, "color", value = f_color)
### Create layout, stretch a bit
l = layout_with_fr(f_g)
l[,1] <- l[,1]*2000
......@@ -268,15 +310,26 @@ produce_graph <- function(fedgelist, fxgmml = NULL, fpdf = NULL) {
return(f_g)
}
### Create a graph of direct crosstalks
ix_g <- produce_graph(ix_edgelist, fxgmml = "ix_out.graphml", fpdf = "ix_out.pdf")
### Create a graph of new EMMAA crosstalks (raw)
ix_emmaa_g <- produce_graph(ix_emmaa_edges, fxgmml = "ix_emmaa_out.graphml", fpdf = "ix_emmaa_out.pdf")
### Create a graph of new EMMAA crosstalks (OmniPathDB-filtered)
ix_emmaa_op_g <- produce_graph(ix_emmaa_edges_op, fxgmml = "ix_emmaa_op_out.graphml", fpdf = "ix_emmaa_op_out.pdf")
### Create a graph of new EMMAA upstream regulators (raw)
ix_reg_emmaa_g <- produce_graph(ix_reg_emmaa_edges, fxgmml = "ix_reg_emmaa_out.graphml", fpdf = "ix_reg_emmaa_out.pdf")
### Create a graph of new EMMAA upstream regulators (OmniPathDB-filtered)
ix_reg_emmaa_op_g <- produce_graph(ix_reg_emmaa_edges_op, fxgmml = "ix_reg_emmaa_op_out.graphml", fpdf = "ix_reg_emmaa_op_out.pdf")
### Here, only a selection of graphs is built
### (other combinations can be created by using "generate_combination()" with other parameters)
### 1. Pathway existing crosstalks
ix_g <- produce_graph(generate_combination(selected_refs = map_refs),
fxgmml = "ix.graphml", fpdf = "ix.pdf")
### 2. Group existing crosstalks
grix_g <- produce_graph(generate_combination(selected_refs = map_grefs),
fxgmml = "grix.graphml", fpdf = "grix.pdf")
### 3. Pathway new crosstalks EMMAA + OmniGraffle
ix_emmaa_op_g <- produce_graph(generate_combination(selected_refs = map_refs,
selected_emmaa_interactors = emmaa_interactors_op),
fxgmml = "ix_emmaa_op.graphml", fpdf = "ix_emmaa_op.pdf")
### 4. Group new crosstalks EMMAA + OmniGraffle
grix_emmaa_op_g <- produce_graph(generate_combination(selected_refs = map_grefs,
selected_emmaa_interactors = emmaa_interactors_op),
fxgmml = "grix_emmaa_op.graphml", fpdf = "grix_emmaa_op.pdf")
### 5. Group new upstream regulators EMMAA + OmniGraffle
greg_emmaa_op_g <- produce_graph(generate_combination(selected_refs = map_grefs,
selected_emmaa_interactors = emmaa_interactors_op,
new_elements = T),
fxgmml = "greg_emmaa_op.graphml", fpdf = "greg_emmaa_op.pdf")
message("Done.")
\ No newline at end of file
{
"Virus replication cycle": [
"WP4846",
"WP4799",
"Virus replication cycle",
"Nsp9 protein interactions",
"SARS-CoV-2 RTC and transcription",
"Nsp4 and Nsp6 protein interactions",
"E protein interactions",
"Orf3a protein interactions",
"R-HSA-9678110",
"R-HSA-9679504",
"R-HSA-9694514",
"R-HSA-9683701",
"R-HSA-9679509",
"R-HSA-9694614",
"R-HSA-9694676",
"R-HSA-9694682",
"R-HSA-9694635",
"R-HSA-9694322"
],
"ER stress and unfolded protein response": [
"WP4861",
"WP5027",
"Endoplasmatic Reticulum stress",
"Orf10 Cul2 pathway"
],
"Autophagy and protein degradation": [
"WP4860",
"WP4936",
"WP4863",
"Endoplasmatic Reticulum stress"
],
"Apoptosis pathway": [
"WP4864",
"WP4877",
"WP5038",
"Apoptosis pathway",
"JNK pathway",
"Electron Transport Chain disruption"
],
"Renin-angiotensin system": [
"WP4883",
"WP4965",
"WP4969",
"Renin-angiotensin pathway"
],
"Coagulopathy pathway": [
"WP4927",
"Coagulation pathway"
],
"PAMP signalling": [
"WP4912",
"PAMP signalling"
],
"Induction of interferons and the cytokine storm": [
"WP4868",
"WP4880",
"WP4876",
"WP4884",
"WP4961",
"WP5039",
"Interferon 1 pathway",
"Interferon lambda pathway",
"TGFbeta signalling",
"NLRP3 inflammasome activation"
],
"Altered host metabolism": [
"WP4853",
"WP4904",
"Kynurenine synthesis pathway",
"HMOX1 pathway",
"Nsp14 and metabolism",
"Pyrimidine deprivation"
],
"Other diagrams": [
"WP4891",
"WP5017",
"overview"
]
}
##################################################
## Project: COVID-19 Disease Map
## Script purpose: Convenience functions for accessing the MINERVA Platform
## Date: 24.12.2020
## Author: Marek Ostaszewski
##################################################
library(httr)
library(jsonlite)
### A convenience function to handle API queries
ask_GET <- function(fask_url, verbose = F) {
if(verbose) {
message(URLencode(fask_url))
}
resp <- httr::GET(url = URLencode(fask_url),
httr::add_headers('Content-Type' = "application/x-www-form-urlencoded"))
if(httr::status_code(resp) == 200) {
return(httr::content(resp, as = "text"))
}
return(NULL)
}
### Get the components of a given map/project on the MINERVA Platform
get_map_components <- function(map_api, project_id = NULL) {
if(is.null(project_id)) {
### If project id not given, get configuration of the map, to obtain the latest (default) version
cfg <- fromJSON(ask_GET(paste0(map_api, "configuration/")))
project_id <- cfg$options[cfg$options$type == "DEFAULT_MAP","value"]
}
### The address of the latest (default) build
mnv_base <- paste0(map_api, "projects/",project_id,"/")
message(paste0("Asking for diagrams in: ", mnv_base, "models/"))
### Get diagrams
models <- fromJSON(ask_GET(paste0(mnv_base, "models/")))
### Get elements of the chosen diagram
model_elements <- lapply(models$idObject,
function(x)
fromJSON(ask_GET(paste0(mnv_base,"models/",x,"/",
"bioEntities/elements/?columns=id,name,type,references,elementId,complexId")),
flatten = F))
### Request for reactions that have at least one top 10 element as participant
model_reactions <- lapply(models$idObject,
function(x)
fromJSON(ask_GET(paste0(mnv_base,"models/",x,"/",
"bioEntities/reactions/?columns=modifiers,products,reactants")),
flatten = F))
### Pack all into a list and return
return(list(models = models, map_elements = model_elements, map_reactions = model_reactions))
}
### Get annotation of a given type, from element/reaction references
get_annotation <- function(freferences, ftype) {
sapply(freferences,
function(x) {
ifelse(any(x$type == ftype), x$resource[x$type == ftype], NA)
})
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment