Commit 84d4d627 authored by Marek Ostaszewski's avatar Marek Ostaszewski
Browse files

crosstalks results and code

parent d175914c
Pipeline #36531 passed with stages
in 57 seconds
##################################################
## Project: COVID-19 Disease Map
## Script purpose: Discover crosstalks between COVID-19 Disease Map diagrams
## Date: 27.11.2020
## Author: Marek Ostaszewski
##################################################
### Helper functions to access and retrieve content from MINERVA-hosted diagrams
source("minerva_access.R")
### For handling WikiPathway queries
library(rWikiPathways)
### A function retrieving element identifiers from MINERVA and WikiPathway diagrams
get_grouped_refs <- function(fcomponents, fgroups = NULL) {
### Get MINERVA HGNCs as a named list
message("Retrieving MINERVA HGNCs collection")
refs <- sapply(fcomponents$map_elements, function(x) get_annotation(x$references, "HGNC_SYMBOL"))
names(refs) <- fcomponents$models$name
### Get WikiPathways HGNCs as a named list
message("Retrieving WP collection")
### This file contains all the pathways in the WP COVID-19 repo
wpids <- readLines("https://raw.githubusercontent.com/wikipathways/SARS-CoV-2-WikiPathways/master/pathways.txt")
require(rWikiPathways)
message("Retrieving WP HGNCs")
wp_hgncs <- sapply(wpids, getXrefList, "H")
names(wp_hgncs) <- wpids
refs <- c(refs, wp_hgncs)
### If no grouping needed, return the lists as they are
if(is.null(fgroups)) { return(refs) }
### If by-name groupings provided, group unique HGNCs and return
grefs <- sapply(fgroups, function(x) setdiff(unique(unlist(refs[x])), c("", NA)))
names(grefs) <- names(fgroups)
return(grefs)
}
### A function to calculate common elements across pathways
get_crosstalk_edges <- function(frefs, compress = T) {
### Frequencies table, needed for compression
freq_refs <- table(unlist(frefs))
### Global table of crosslinks; two-column (HGNC, Pathway name)
ix_edgelist <- c()
### Pairwise checks for shared identifiers
for(i in 1:(length(frefs)-1)) {
for(j in (i+1):length(frefs)) {
ix <- intersect(frefs[[i]], frefs[[j]])
ix <- ix[!is.na(ix)]
if(length(ix) == 0) next
### This compresses the network by gathering all HGNCs in
### unique crosstalks (between a pair of pathways) and gathering them
### into a single entry, semicolon separated
if(compress) {
compress_idx <- which(ix %in% names(freq_refs == 2))
ix <- c(ix, paste(ix[compress_idx], collapse = ";"))
ix <- ix[-compress_idx]
}
### Add the crosstalk links to the global table
for(ti in ix) {
ix_edgelist <- rbind(ix_edgelist, c(ti, names(frefs)[i]))
ix_edgelist <- rbind(ix_edgelist, c(ti, names(frefs)[j]))
ix_edgelist <- unique(ix_edgelist)
}
}
}
colnames(ix_edgelist) <- c("Participant", "Pathway")
return(ix_edgelist)
}
### A function listing interactions between pathways from an external list
### interactions between HGNCs within a pathway are excluded
get_external_crosstalks <- function(frefs, external_interactions) {
### Global return table
ext_ix_edgelist <- data.frame(from = external_interactions[,1], to = external_interactions[,2],
from_pathway = "", from_pathway_n = 0,
to_pathway = "", to_pathway_n = 0)
for(i in 1:(length(frefs))) {
### For each pathway
### Calculate "internal" pathway crosstalks for the provided interaction list
in_hits <- (external_interactions[,1] %in% frefs[[i]] & external_interactions[,2] %in% frefs[[i]])
### List external interactions upstream pathway elements
from_hits <- external_interactions[,1] %in% frefs[[i]] & !in_hits
### List external interactions downstream pathway elements
to_hits <- external_interactions[,2] %in% frefs[[i]] & !in_hits
### Add entries to the global table
ext_ix_edgelist[from_hits,"from_pathway"] <- paste0(ext_ix_edgelist[from_hits,"from_pathway"], names(frefs)[i], ";")
ext_ix_edgelist[from_hits,"from_pathway_n"] <- ext_ix_edgelist[from_hits,"from_pathway_n"] + 1
ext_ix_edgelist[to_hits,"to_pathway"] <- paste0(ext_ix_edgelist[to_hits,"to_pathway"], names(frefs)[i], ";")
ext_ix_edgelist[to_hits,"to_pathway_n"] <- ext_ix_edgelist[to_hits,"to_pathway_n"] + 1
}
return(ext_ix_edgelist)
}
### A function operating on the external cross-talks table (see above)
### following the format: from, to, from pathway, from pathway n, to pathway, to pathway n
### list external edges to and from pathways
get_external_edges <- function(fcrosstalk) {
ext_edgelist <- c()
for(i in 1:nrow(fcrosstalk)) {
from <- unlist(strsplit(fcrosstalk[i,3],";"))
for(j in from) { ext_edgelist <- rbind(ext_edgelist, c(j, fcrosstalk[i,1])) }
ext_edgelist <- rbind(ext_edgelist, c(fcrosstalk[i,1], fcrosstalk[i,2]))
to <- unlist(strsplit(fcrosstalk[i,5],";"))
for(j in to) { ext_edgelist <- rbind(ext_edgelist, c(fcrosstalk[i,2], j)) }
}
return(unique(ext_edgelist))
}
### The address of the COVID-19 Disease Map in MINERVA
map <- "https://covid19map.elixir-luxembourg.org/minerva/api/"
### Get the components of MINERVA and WikiPathways (see minerva_access.R)
map_components <- get_map_components(map)
### Introduce pathway groups (following 'https://covid.pages.uni.lu/map_contents')
map_groups <- list(
`Virus replication cycle` = c(
"WP4846", "WP4799",
"Virus replication cycle", "Nsp9 protein interactions", "SARS-CoV-2 RTC and transcription",
"Nsp4 and Nsp6 protein interactions","E protein interactions", "Orf3a protein interactions"),
`ER stress and unfolded protein response` = c(
"WP4861",
"Endoplasmatic Reticulum stress", "Orf10 Cul2 pathway"),
`Autophagy and protein degradation` = c(
"WP4860", "WP4936", "WP4863",
"Endoplasmatic Reticulum stress"),
`Apoptosis pathway` = c(
"WP4864", "WP4877",
"Apoptosis pathway", "JNK pathway", "Electron Transport Chain disruption"),
`Renin-angiotensin system` = c(
"WP4883", "WP4965", "WP4969",
"Renin-angiotensin pathway"),
`Coagulopathy pathway` = c(
"WP4927",
"Coagulation pathway"),
`PAMP signalling` = c(
"WP4912",
"PAMP signalling"),
`Induction of interferons and the cytokine storm` = c(
"WP4868", "WP4880", "WP4876", "WP4884", "WP4961",
"Interferon 1 pathway", "Interferon lambda pathway"),
`Altered host metabolism` = c(
"WP4853", "WP4904",
"Kynurenine synthesis pathway", "HMOX1 pathway", "Nsp14 and metabolism", "Pyrimidine deprivation"),
`Other diagrams` = c(
"WP4891", "WP5017", "TGFbeta signalling", "overview"
)
)
### For a non-grouped version, re-run without 'map_groups'
map_grefs <- get_grouped_refs(map_components, map_groups)
### Get existing between-pathway crosstalks
ix_edgelist <- get_crosstalk_edges(map_grefs, compress = F)
### Global list of all identifiers
all_hgncs <- unique(unlist(map_grefs))
###
### For new interactions, we will use OmniPathDB (https://omnipathdb.org) and
### EMMAA COVID-19 dataset - text mining results based on INDRA (emmaa.indra.bio)
### We will use EMMAA interactions (for COVID-19 specificity) that have a corresponding
### OmniPath edge (for biological relevance, to filter potential text mining quirks)
###
library(OmnipathR)
library(dplyr)
message(paste0("Asking for OmniPath interactions"))
### Get OmniPath interactions
ia_omnipath <- import_omnipath_interactions() %>% as_tibble()
### Narrow down the interactions (consensus, support, remove columns)
ia_omnipath %>% filter(consensus_direction == 1 & n_references > 1 & (consensus_inhibition + consensus_stimulation) > 0) %>%
select(-dip_url, -starts_with("consensus_"), -starts_with("is_")) -> op_narrow
### Get EMMAA results
### Warning! EMMAA file is heavy (~800MB currently); suggessted use
### Download the file and create "emmaa_covid19_jsons_all.Rd" object,
### use the object file with downstream calculations
if(!file.exists("emmaa_covid19_jsons_all.Rd")) {
### The file may be outdated, check https://emmaa.indra.bio/dashboard/covid19?tab=model for the latest version
download.file("https://emmaa.s3.amazonaws.com/assembled/covid19/statements_2020-12-01-21-05-54.jsonl",
destfile = "statements_2020-12-01-21-05-54.jsonl")
emmaa_covid19_lines <- readLines("statements_2020-12-01-21-05-54.jsonl")
## The line below takes time to execute, advised grep to filter down by e.g. belief: grep 'belief": 0.[7-9]'
emmaa_covid19_jsons <- sapply(emmaa_covid19_lines, fromJSON)
save(emmaa_covid19_jsons, file = "emmaa_covid19_jsons_all.Rd")
}
### One more optimisation step, load "emmaa_covid19_jsons_all.Rd" file only if
### the 'emmaa_covid19_jsons' is not found in the current environment
if(!("emmaa_covid19_jsons" %in% ls())) {
load(file = "emmaa_covid19_jsons_all.Rd")
}
### EMMAA contains belief values, allowing to filter associations based on significance
emmaa_bfs <- sapply(emmaa_covid19_jsons, function(x) x$belief)
### Here, the dataset is reduced to interactions with belief > 0.8
### This is a reasonable tradeoff, based on the distribution of the results
### see: hist(emmaa_bfs)
emmaa_small <- emmaa_covid19_jsons[emmaa_bfs > 0.8]
## Reduce EMMAA dataset only to HGNC - HGNC interactions
emmaa_obj_dbs <- unname(sapply(emmaa_small, function(x) names(x$obj$db_refs)))
emmaa_subj_dbs <- unname(sapply(emmaa_small, function(x) names(x$subj$db_refs)))
emmaa_hgnc <- emmaa_small[sapply(emmaa_obj_dbs, function(x) "HGNC" %in% x) & sapply(emmaa_subj_dbs, function(x) "HGNC" %in% x)]
### Cleanup the environment, especially 'emmaa_covid19_jsons' object is large
rm(emmaa_covid19_jsons, emmaa_bfs, emmaa_small)
### Create a simplified table with the interactors
emmaa_interactors <- t(unname(sapply(emmaa_hgnc, function(x) c(x$subj$name, x$obj$name))))
### Add filtering by the OmniPathDB contents
emmaa_op <- paste0(emmaa_interactors[,1],"-",emmaa_interactors[,2]) %in%
paste0(op_narrow$source_genesymbol,"-", op_narrow$target_genesymbol)
### New crosstalks based on EMMAA TM results, raw and filtered by OmniPathDB
ix_emmaa <- get_external_crosstalks(map_grefs, emmaa_interactors)
ix_emmaa_op <- get_external_crosstalks(map_grefs, emmaa_interactors[emmaa_op,])
### Create a network of new inter-pathway crosstalks (with more than one 'from_pathway_n' and 'to_pathway_n')
### (raw and OmniPathDB-filtered)
ix_emmaa_edges <- get_external_edges(ix_emmaa[ix_emmaa[,'from_pathway_n'] > 1 &
ix_emmaa[,'to_pathway_n'] > 1,])
ix_emmaa_edges_op <- get_external_edges(ix_emmaa_op[ix_emmaa_op[,'from_pathway_n'] > 1 &
ix_emmaa_op[,'to_pathway_n'] > 1,])
### Create a network of new (HGNC not among the map contents) upstream regulators (with 'from_pathway_n' == 0, and 'to_pathway_n' > 0)
### (raw and OmniPathDB-filtered)
ix_reg_emmaa_edges <- get_external_edges(ix_emmaa[ix_emmaa[,'from_pathway_n'] == 0 &
ix_emmaa[,'to_pathway_n'] > 0 &
!(ix_emmaa[,'from'] %in% all_hgncs),])
ix_reg_emmaa_edges_op <- get_external_edges(ix_emmaa_op[ix_emmaa_op[,'from_pathway_n'] == 0 &
ix_emmaa_op[,'to_pathway_n'] > 0 &
!(ix_emmaa_op[,'from'] %in% all_hgncs),])
### Use igraph to create xgmml files (for CytoScape) and pdf files
library(igraph)
### A helper function to generate a graph from an edgelist, including some
### useful graphical properties, and then save it in different formats
produce_graph <- function(fedgelist, fxgmml = NULL, fpdf = NULL) {
### Create a graph from a non-empty names
f_g <- graph.edgelist(fedgelist[fedgelist[,1] != "",])
### Graphical properties
### Pathway nodes contain spaces in their names, we need them for a different visualisation
pathway_indices <- grep(" ", V(f_g)$name)
### Basic HGNC node colour
f_color <- rep("lightblue", gorder(f_g))
### Three-neighbour colour
if(quantile(degree(f_g))[2] > 2) { f_color[degree(f_g) >= quantile(degree(f_g))[2]] <- "yellow" }
### Four-or-more-neighbour colour
if(quantile(degree(f_g))[3] > 3) { f_color[degree(f_g) >= quantile(degree(f_g))[3]] <- "red" }
### Pathway colour
f_color[pathway_indices] <- "lightgreen"
### Height (for the rectangle shape)
f_height <- rep(40, gorder(f_g))
### Set the colours
f_g <- set.vertex.attribute(f_g, "color", value = f_color)
### Create layout, stretch a bit
l = layout_with_fr(f_g)
l[,1] <- l[,1]*2000
### Plot to pdf
if(!is.null(fpdf)) {
pdf(file = fpdf, width = 20, height = 15)
plot(f_g, layout = l, vertex.size = 5)
dev.off()
}
### Plot to xgmml, node sizes provided for the CytoScape style mapping
f_g <- set.vertex.attribute(f_g, "size", value = f_height)
f_g <- set.vertex.attribute(f_g, "size2", value = 40+4*nchar(V(f_g)$name))
if(!is.null(fxgmml)) { write.graph(f_g, format = "graphml", file = fxgmml) }
return(f_g)
}
### Create a graph of direct crosstalks
ix_g <- produce_graph(ix_edgelist, fxgmml = "ix_out.graphml", fpdf = "ix_out.pdf")
### Create a graph of new EMMAA crosstalks (raw)
ix_emmaa_g <- produce_graph(ix_emmaa_edges, fxgmml = "ix_emmaa_out.graphml", fpdf = "ix_emmaa_out.pdf")
### Create a graph of new EMMAA crosstalks (OmniPathDB-filtered)
ix_emmaa_op_g <- produce_graph(ix_emmaa_edges_op, fxgmml = "ix_emmaa_op_out.graphml", fpdf = "ix_emmaa_op_out.pdf")
### Create a graph of new EMMAA upstream regulators (raw)
ix_reg_emmaa_g <- produce_graph(ix_reg_emmaa_edges, fxgmml = "ix_reg_emmaa_out.graphml", fpdf = "ix_reg_emmaa_out.pdf")
### Create a graph of new EMMAA upstream regulators (OmniPathDB-filtered)
ix_reg_emmaa_op_g <- produce_graph(ix_reg_emmaa_edges_op, fxgmml = "ix_reg_emmaa_op_out.graphml", fpdf = "ix_reg_emmaa_op_out.pdf")
message("Done.")
\ No newline at end of file
##################################################
## Project: COVID-19 Disease Map
## Script purpose: Convenience functions for accessing the MINERVA Platform
## Date: 24.12.2020
## Author: Marek Ostaszewski
##################################################
library(httr)
library(jsonlite)
### A convenience function to handle API queries
ask_GET <- function(fask_url, verbose = F) {
if(verbose) {
message(URLencode(fask_url))
}
resp <- httr::GET(url = URLencode(fask_url),
httr::add_headers('Content-Type' = "application/x-www-form-urlencoded"))
if(httr::status_code(resp) == 200) {
return(httr::content(resp, as = "text"))
}
return(NULL)
}
### Get the components of a given map/project on the MINERVA Platform
get_map_components <- function(map_api, project_id = NULL) {
if(is.null(project_id)) {
### If project id not given, get configuration of the map, to obtain the latest (default) version
cfg <- fromJSON(ask_GET(paste0(map_api, "configuration/")))
project_id <- cfg$options[cfg$options$type == "DEFAULT_MAP","value"]
}
### The address of the latest (default) build
mnv_base <- paste0(map_api, "projects/",project_id,"/")
message(paste0("Asking for diagrams in: ", mnv_base, "models/"))
### Get diagrams
models <- fromJSON(ask_GET(paste0(mnv_base, "models/")))
### Get elements of the chosen diagram
model_elements <- lapply(models$idObject,
function(x)
fromJSON(ask_GET(paste0(mnv_base,"models/",x,"/",
"bioEntities/elements/?columns=id,name,type,references,elementId,complexId")),
flatten = F))
### Request for reactions that have at least one top 10 element as participant
model_reactions <- lapply(models$idObject,
function(x)
fromJSON(ask_GET(paste0(mnv_base,"models/",x,"/",
"bioEntities/reactions/?columns=modifiers,products,reactants")),
flatten = F))
### Pack all into a list and return
return(list(models = models, map_elements = model_elements, map_reactions = model_reactions))
}
### Get annotation of a given type, from element/reaction references
get_annotation <- function(freferences, ftype) {
sapply(freferences,
function(x) {
ifelse(any(x$type == ftype), x$resource[x$type == ftype], NA)
})
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment