find_crosstalks.R 13.6 KB
Newer Older
1
2
3
4
5
6
7
8
##################################################
## Project: COVID-19 Disease Map
## Script purpose: Discover crosstalks between COVID-19 Disease Map diagrams
## Date: 27.11.2020
## Author: Marek Ostaszewski
##################################################

### Helper functions to access and retrieve content from MINERVA-hosted diagrams
9
source("https://git-r3lab.uni.lu/covid/models/-/raw/master/Integration/MINERVA_access/minerva_access_functions.R")
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
### For handling WikiPathway queries
library(rWikiPathways)

### A function retrieving element identifiers from MINERVA and WikiPathway diagrams 
get_grouped_refs <- function(fcomponents, fgroups = NULL) {
  ### Get MINERVA HGNCs as a named list
  message("Retrieving MINERVA HGNCs collection")
  refs <- sapply(fcomponents$map_elements, function(x) get_annotation(x$references, "HGNC_SYMBOL"))
  names(refs) <- fcomponents$models$name
  ### Get WikiPathways HGNCs as a named list
  message("Retrieving WP collection")
  ### This file contains all the pathways in the WP COVID-19 repo
  wpids <- readLines("https://raw.githubusercontent.com/wikipathways/SARS-CoV-2-WikiPathways/master/pathways.txt")
  require(rWikiPathways)
  message("Retrieving WP HGNCs")
  wp_hgncs <- sapply(wpids, getXrefList, "H")
  names(wp_hgncs) <- wpids
  refs <- c(refs, wp_hgncs)
  ### If no grouping needed, return the lists as they are
  if(is.null(fgroups)) { return(refs) }
  ### If by-name groupings provided, group unique HGNCs and return
  grefs <- sapply(fgroups, function(x) setdiff(unique(unlist(refs[x])), c("", NA)))
  names(grefs) <- names(fgroups)
  return(grefs)
}

### A function to calculate common elements across pathways
get_crosstalk_edges <- function(frefs, compress = T) {
  ### Frequencies table, needed for compression
  freq_refs <- table(unlist(frefs))
  ### Global table of crosslinks; two-column (HGNC, Pathway name)
  ix_edgelist <- c()
  ### Pairwise checks for shared identifiers
  for(i in 1:(length(frefs)-1)) {
    for(j in (i+1):length(frefs)) {
      ix <- intersect(frefs[[i]], frefs[[j]])
      ix <- ix[!is.na(ix)]
      if(length(ix) == 0) next
      ### This compresses the network by gathering all HGNCs in 
      ### unique crosstalks (between a pair of pathways) and gathering them 
      ### into a single entry, semicolon separated
      if(compress) {
        compress_idx <- which(ix %in% names(freq_refs == 2))
        ix <- c(ix, paste(ix[compress_idx], collapse = ";"))
        ix <- ix[-compress_idx]
      }
      ### Add the crosstalk links to the global table
      for(ti in ix) {
        ix_edgelist <- rbind(ix_edgelist, c(ti, names(frefs)[i]))
        ix_edgelist <- rbind(ix_edgelist, c(ti, names(frefs)[j]))
        ix_edgelist <- unique(ix_edgelist)
      }
    }
  }
  colnames(ix_edgelist) <- c("Participant", "Pathway")
  return(ix_edgelist)
}

### A function listing interactions between pathways from an external list
### interactions between HGNCs within a pathway are excluded
get_external_crosstalks <- function(frefs, external_interactions) {
  ### Global return table
  ext_ix_edgelist <- data.frame(from = external_interactions[,1], to = external_interactions[,2],
                                from_pathway = "", from_pathway_n = 0,
                                to_pathway = "", to_pathway_n = 0)
  for(i in 1:(length(frefs))) {
    ### For each pathway
    ### Calculate "internal" pathway crosstalks for the provided interaction list
    in_hits <- (external_interactions[,1] %in% frefs[[i]] & external_interactions[,2] %in% frefs[[i]])
    ### List external interactions upstream pathway elements
    from_hits <- external_interactions[,1] %in% frefs[[i]] & !in_hits
    ### List external interactions downstream pathway elements
    to_hits <- external_interactions[,2] %in% frefs[[i]] & !in_hits
    ### Add entries to the global table
    ext_ix_edgelist[from_hits,"from_pathway"] <- paste0(ext_ix_edgelist[from_hits,"from_pathway"], names(frefs)[i], ";")
    ext_ix_edgelist[from_hits,"from_pathway_n"] <- ext_ix_edgelist[from_hits,"from_pathway_n"] + 1
    ext_ix_edgelist[to_hits,"to_pathway"] <- paste0(ext_ix_edgelist[to_hits,"to_pathway"], names(frefs)[i], ";")
    ext_ix_edgelist[to_hits,"to_pathway_n"] <- ext_ix_edgelist[to_hits,"to_pathway_n"] + 1
  }
  return(ext_ix_edgelist)
}

### A function operating on the external cross-talks table (see above) 
### following the format: from, to, from pathway, from pathway n, to pathway, to pathway n 
### list external edges to and from pathways
get_external_edges <- function(fcrosstalk) {
  ext_edgelist <- c()
  for(i in 1:nrow(fcrosstalk)) {
    from <- unlist(strsplit(fcrosstalk[i,3],";"))
    for(j in from) { ext_edgelist <- rbind(ext_edgelist, c(j, fcrosstalk[i,1])) }
    ext_edgelist <- rbind(ext_edgelist, c(fcrosstalk[i,1], fcrosstalk[i,2]))
    to <- unlist(strsplit(fcrosstalk[i,5],";"))
    for(j in to) { ext_edgelist <- rbind(ext_edgelist, c(fcrosstalk[i,2], j)) }
  }
  return(unique(ext_edgelist))
}

### The address of the COVID-19 Disease Map in MINERVA
map <- "https://covid19map.elixir-luxembourg.org/minerva/api/"
### Get the components of MINERVA and WikiPathways (see minerva_access.R)
map_components <- get_map_components(map)
### Introduce pathway groups (following 'https://covid.pages.uni.lu/map_contents')
map_groups <- list(
  `Virus replication cycle` = c(
    "WP4846", "WP4799", 
    "Virus replication cycle", "Nsp9 protein interactions", "SARS-CoV-2 RTC and transcription",
    "Nsp4 and Nsp6 protein interactions","E protein interactions", "Orf3a protein interactions"),                            
  `ER stress and unfolded protein response` = c(
    "WP4861", 
    "Endoplasmatic Reticulum stress", "Orf10 Cul2 pathway"),
  `Autophagy and protein degradation` = c(
    "WP4860", "WP4936", "WP4863", 
    "Endoplasmatic Reticulum stress"),
  `Apoptosis pathway` = c(
    "WP4864", "WP4877",
    "Apoptosis pathway", "JNK pathway", "Electron Transport Chain disruption"),
  `Renin-angiotensin system` = c(
    "WP4883", "WP4965", "WP4969",
    "Renin-angiotensin pathway"),
  `Coagulopathy pathway` = c(
    "WP4927", 
    "Coagulation pathway"),
  `PAMP signalling` = c(
    "WP4912", 
    "PAMP signalling"),
  `Induction of interferons and the cytokine storm` = c(
    "WP4868", "WP4880", "WP4876", "WP4884", "WP4961",
    "Interferon 1 pathway", "Interferon lambda pathway"),
  `Altered host metabolism` = c(
    "WP4853", "WP4904", 
    "Kynurenine synthesis pathway", "HMOX1 pathway", "Nsp14 and metabolism", "Pyrimidine deprivation"),
  `Other diagrams` = c(
    "WP4891", "WP5017", "TGFbeta signalling", "overview"
  )
)

### For a non-grouped version, re-run without 'map_groups'
map_grefs <- get_grouped_refs(map_components, map_groups)
### Get existing between-pathway crosstalks
ix_edgelist <- get_crosstalk_edges(map_grefs, compress = F)
### Global list of all identifiers
all_hgncs <- unique(unlist(map_grefs))


###
### For new interactions, we will use OmniPathDB (https://omnipathdb.org) and
### EMMAA COVID-19 dataset - text mining results based on INDRA (emmaa.indra.bio)
### We will use EMMAA interactions (for COVID-19 specificity) that have a corresponding 
### OmniPath edge (for biological relevance, to filter potential text mining quirks)
###

library(OmnipathR)
library(dplyr)

message(paste0("Asking for OmniPath interactions"))
### Get OmniPath interactions
ia_omnipath <- import_omnipath_interactions() %>% as_tibble()
### Narrow down the interactions (consensus, support, remove columns)
ia_omnipath %>% filter(consensus_direction == 1 & n_references > 1 & (consensus_inhibition + consensus_stimulation) > 0) %>% 
  select(-dip_url, -starts_with("consensus_"), -starts_with("is_")) -> op_narrow

### Get EMMAA results 
### Warning! EMMAA file is heavy (~800MB currently); suggessted use
### Download the file and create "emmaa_covid19_jsons_all.Rd" object, 
### use the object file with downstream calculations
if(!file.exists("emmaa_covid19_jsons_all.Rd")) {
  ### The file may be outdated, check https://emmaa.indra.bio/dashboard/covid19?tab=model for the latest version
  download.file("https://emmaa.s3.amazonaws.com/assembled/covid19/statements_2020-12-01-21-05-54.jsonl",
                destfile = "statements_2020-12-01-21-05-54.jsonl")
  emmaa_covid19_lines <- readLines("statements_2020-12-01-21-05-54.jsonl")
  ## The line below takes time to execute, advised grep to filter down by e.g. belief: grep 'belief": 0.[7-9]'
  emmaa_covid19_jsons <- sapply(emmaa_covid19_lines, fromJSON)
  save(emmaa_covid19_jsons, file = "emmaa_covid19_jsons_all.Rd")
}
### One more optimisation step, load "emmaa_covid19_jsons_all.Rd" file only if 
### the 'emmaa_covid19_jsons' is not found in the current environment
if(!("emmaa_covid19_jsons" %in% ls())) {
  load(file = "emmaa_covid19_jsons_all.Rd")
}
### EMMAA contains belief values, allowing to filter associations based on significance  
emmaa_bfs <- sapply(emmaa_covid19_jsons, function(x) x$belief)
### Here, the dataset is reduced to interactions with belief > 0.8
### This is a reasonable tradeoff, based on the distribution of the results
### see: hist(emmaa_bfs)
emmaa_small <- emmaa_covid19_jsons[emmaa_bfs > 0.8]

## Reduce EMMAA dataset only to HGNC - HGNC interactions
emmaa_obj_dbs <- unname(sapply(emmaa_small, function(x) names(x$obj$db_refs)))
emmaa_subj_dbs <- unname(sapply(emmaa_small, function(x) names(x$subj$db_refs)))
emmaa_hgnc <- emmaa_small[sapply(emmaa_obj_dbs, function(x) "HGNC" %in% x) & sapply(emmaa_subj_dbs, function(x) "HGNC" %in% x)]

### Cleanup the environment, especially 'emmaa_covid19_jsons' object is large
rm(emmaa_covid19_jsons, emmaa_bfs, emmaa_small)

### Create a simplified table  with the interactors
emmaa_interactors <- t(unname(sapply(emmaa_hgnc, function(x) c(x$subj$name, x$obj$name))))
### Add filtering by the OmniPathDB contents
emmaa_op <- paste0(emmaa_interactors[,1],"-",emmaa_interactors[,2]) %in% 
  paste0(op_narrow$source_genesymbol,"-", op_narrow$target_genesymbol)

### New crosstalks based on EMMAA TM results, raw and filtered by OmniPathDB
ix_emmaa <- get_external_crosstalks(map_grefs, emmaa_interactors)
ix_emmaa_op <- get_external_crosstalks(map_grefs, emmaa_interactors[emmaa_op,])

### Create a network of new inter-pathway crosstalks (with more than one 'from_pathway_n' and 'to_pathway_n')
### (raw and OmniPathDB-filtered)
ix_emmaa_edges <- get_external_edges(ix_emmaa[ix_emmaa[,'from_pathway_n'] > 1 & 
                                                ix_emmaa[,'to_pathway_n'] > 1,])
ix_emmaa_edges_op <- get_external_edges(ix_emmaa_op[ix_emmaa_op[,'from_pathway_n'] > 1 & 
                                                      ix_emmaa_op[,'to_pathway_n'] > 1,])

### Create a network of new (HGNC not among the map contents) upstream regulators (with 'from_pathway_n' == 0, and 'to_pathway_n' > 0)
### (raw and OmniPathDB-filtered)
ix_reg_emmaa_edges <- get_external_edges(ix_emmaa[ix_emmaa[,'from_pathway_n'] == 0 & 
                                                  ix_emmaa[,'to_pathway_n'] > 0 & 
                                                  !(ix_emmaa[,'from'] %in% all_hgncs),])
ix_reg_emmaa_edges_op <- get_external_edges(ix_emmaa_op[ix_emmaa_op[,'from_pathway_n'] == 0 & 
                                                        ix_emmaa_op[,'to_pathway_n'] > 0 & 
                                                        !(ix_emmaa_op[,'from'] %in% all_hgncs),])

### Use igraph to create xgmml files (for CytoScape) and pdf files
library(igraph)

### A helper function to generate a graph from an edgelist, including some
### useful graphical properties, and then save it in different formats
produce_graph <- function(fedgelist, fxgmml = NULL, fpdf = NULL) {
  ### Create a graph from a non-empty names
  f_g <- graph.edgelist(fedgelist[fedgelist[,1] != "",])
  
  ### Graphical properties
  ### Pathway nodes contain spaces in their names, we need them for a different visualisation
  pathway_indices <- grep(" ", V(f_g)$name)
  ### Basic HGNC node colour
  f_color <- rep("lightblue", gorder(f_g))
  ### Three-neighbour colour
  if(quantile(degree(f_g))[2] > 2) { f_color[degree(f_g) >= quantile(degree(f_g))[2]] <- "yellow" }
  ### Four-or-more-neighbour colour
  if(quantile(degree(f_g))[3] > 3) { f_color[degree(f_g) >= quantile(degree(f_g))[3]] <- "red" }
  ### Pathway colour
  f_color[pathway_indices] <- "lightgreen"
  ### Height (for the rectangle shape)
  f_height <- rep(40, gorder(f_g))
  
  ### Set the colours
  f_g <- set.vertex.attribute(f_g, "color", value = f_color)
  ### Create layout, stretch a bit
  l = layout_with_fr(f_g)
  l[,1] <- l[,1]*2000
  ### Plot to pdf
  if(!is.null(fpdf)) {
    pdf(file = fpdf, width = 20, height = 15)
    plot(f_g, layout = l, vertex.size = 5)
    dev.off() 
  }
  ### Plot to xgmml, node sizes provided for the CytoScape style mapping
  f_g <- set.vertex.attribute(f_g, "size", value = f_height)
  f_g <- set.vertex.attribute(f_g, "size2", value = 40+4*nchar(V(f_g)$name))
  if(!is.null(fxgmml)) { write.graph(f_g, format = "graphml", file = fxgmml) }
  return(f_g)
}

### Create a graph of direct crosstalks
ix_g <- produce_graph(ix_edgelist, fxgmml = "ix_out.graphml", fpdf = "ix_out.pdf")
### Create a graph of new EMMAA crosstalks (raw)
ix_emmaa_g <- produce_graph(ix_emmaa_edges, fxgmml = "ix_emmaa_out.graphml", fpdf = "ix_emmaa_out.pdf")
### Create a graph of new EMMAA crosstalks (OmniPathDB-filtered)
ix_emmaa_op_g <- produce_graph(ix_emmaa_edges_op, fxgmml = "ix_emmaa_op_out.graphml", fpdf = "ix_emmaa_op_out.pdf")
### Create a graph of new EMMAA upstream regulators (raw)
ix_reg_emmaa_g <- produce_graph(ix_reg_emmaa_edges, fxgmml = "ix_reg_emmaa_out.graphml", fpdf = "ix_reg_emmaa_out.pdf")
### Create a graph of new EMMAA upstream regulators (OmniPathDB-filtered)
ix_reg_emmaa_op_g <- produce_graph(ix_reg_emmaa_edges_op, fxgmml = "ix_reg_emmaa_op_out.graphml", fpdf = "ix_reg_emmaa_op_out.pdf")

message("Done.")