resolve_aliases.R 6.52 KB
Newer Older
Marek Ostaszewski's avatar
Marek Ostaszewski committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
##################################################
## Project: COVID-19 Disease Map
## Script purpose: Translate raw CellDesigner SIF to Entrez identifiers using MINERVA 
## Date: 05.06.2020
## Author: Marek Ostaszewski
##################################################

library(httr)
library(jsonlite)

### A convenience function to handle API queries
ask_GET <- function(furl, fask) {
  resp <- httr::GET(url = paste0(furl, fask),
                    httr::add_headers('Content-Type' = "application/x-www-form-urlencoded"),
                    ### Currently ignoring SSL!
                    httr::set_config(config(ssl_verifypeer = 0L)))
  if(httr::status_code(resp) == 200) {
    return(httr::content(resp, as = "text"))
  }
  return(NULL)
}

### Define the source file (GitLab, raw link)
diagram <- "https://git-r3lab.uni.lu/covid/models/-/raw/master/Curation/Apoptosis/Apoptosis_03.06.2020.xml"

### Read in the raw SIF version (here straight from the github of Aurelien)
raw_sif <- read.table(url("https://raw.githubusercontent.com/aurelien-naldi/preliminary-covid-modeling/master/covid-models/Apoptosis_03.06.2020_raw.sif"),
                      sep = " ", header = F, stringsAsFactors = F)

### Read the list of resources to be integrated, from the MINERVA build scripts
res <- read.csv(url("https://git-r3lab.uni.lu/covid/models/raw/master/Integration/MINERVA_build/resources.csv"),
                header = T, stringsAsFactors = F)

diag_name <- res[res$Resource == diagram, "Name"]

### Get MINERVA elements
### The address of the COVID-19 Disease Map in MINERVA
map <- "https://covid19map.elixir-luxembourg.org/minerva/api/"
### Get configuration of the COVID-19 Disease Map, to obtain the latest (default) version
cfg <- fromJSON(ask_GET(map, "configuration/"))
project_id <- cfg$options[cfg$options$type == "DEFAULT_MAP","value"]
### The address of the latest (default) build 
mnv_base <- paste0(map,"projects/",project_id,"/")

message(paste0("Asking for diagrams in: ", mnv_base, "models/"))

### Get diagrams
models <- ask_GET(mnv_base, "models/")
models <- fromJSON(models, flatten = F)

this_refs <- models[models$name == diag_name]

### Get elements of the chosen diagram
model_elements <- fromJSON(ask_GET(paste0(mnv_base,"models/",models$idObject[models$name == diag_name],"/"), 
Marek Ostaszewski's avatar
Marek Ostaszewski committed
55
                                   "bioEntities/elements/?columns=id,name,type,references,elementId,complexId,bounds"),
Marek Ostaszewski's avatar
Marek Ostaszewski committed
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
                           flatten = F)

message("Fetching entrez ids...")
### Get information about Entrez identifiers from MINERVA elements
entrez <- sapply(model_elements$references, function(x) ifelse(length(x) == 0, NA, x[x$type == "ENTREZ", "resource"]))
names(entrez) <- model_elements$elementId

### An utility function to retrieve Entrez based on the species id
### if the id is a complex, the function goes recursively and fetches the ids of elements in this complex
group_elements <- function(feid, felements, fentrez) {
  pos <- which(felements$elementId == feid)
  ### Any elements that may be nested in the 'feid' (CellDesigner alias)
  incs <- felements$elementId[felements$complexId %in% felements$id[pos]]
  if(length(incs) > 0) {
    ### If nested elements found, run the function recursively for the contained elements
    return(paste(unlist(sapply(incs, group_elements, felements, fentrez)), collapse = ";"))
  } else {
    ### If no nested elements, return Entrez
    rid <- fentrez[[feid]]
    if(is.na(rid)) {
      ### If Entrez not available, return name
      rid <- felements$name[pos]
    }
    return(rid)
  }
}

Marek Ostaszewski's avatar
Marek Ostaszewski committed
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
### A workaround function to get information about hypothetical complexes; 
### currently MINERVA API does not support this, we need to get the entire CD file and parse it
get_groups <- function(fname) {
  message(paste0("Getting groups for", fname, "..."))
  library(xml2)
  cd_map <- read_xml(ask_GET(mnv_base, 
                             paste0("models/",
                                    models$idObject[models$name == fname],
                                    ":downloadModel?handlerClass=lcsb.mapviewer.converter.model.celldesigner.CellDesignerXmlParser")))
  ### CellDesigner namespace
  ns_cd <- xml_ns_rename(xml_ns(read_xml("<root>
                                            <sbml xmlns = \"http://www.sbml.org/sbml/level2/version4\"/>
                                            <cd xmlns = \"http://www.sbml.org/2001/ns/celldesigner\"/>
                                          </root>")), 
                         d1 = "sbml", d2 = "cd")
  ### Get complex ids
  cids <- xml_attr(xml_find_all(cd_map, "//cd:complexSpeciesAlias", ns_cd), "species")
  ### For each check, which is hypothetical
  hypocs <- sapply(xml_attr(xml_find_all(cd_map, "//cd:complexSpeciesAlias", ns_cd), "species"), 
                   function(x) xml_text(xml_find_first(xml_find_first(cd_map, 
                                                                      paste0("//sbml:species[@id='", x, "']"), ns_cd),
                                                      ".//cd:hypothetical", ns_cd)))
  names(hypocs) <- gsub("s_id_", "",names(hypocs))
  return(hypocs)
}

cgroups <- get_groups(diag_name)

Marek Ostaszewski's avatar
Marek Ostaszewski committed
111
112
113
message("Translating...")
### Create a copy
translated_sif <- raw_sif
Marek Ostaszewski's avatar
Marek Ostaszewski committed
114
115
116
117
118
119
120
121
122
### Retrieve Entrez and type for the entire columns of sources and targets
s.entrez <- sapply(raw_sif[,1], group_elements, model_elements, entrez)
s.type <- sapply(raw_sif[,1], function(x) { ifelse(x %in% names(cgroups),
                                                   ifelse(is.na(cgroups[x]), "complex", "group"),
                                                   "node") })
t.entrez <- sapply(raw_sif[,3], group_elements, model_elements, entrez)
t.type <- sapply(raw_sif[,3], function(x) { ifelse(x %in% names(cgroups),
                                                   ifelse(is.na(cgroups[x]), "complex", "group"),
                                                   "node") })
Marek Ostaszewski's avatar
Marek Ostaszewski committed
123
124
125
126
127
### Collect x.y information
s.xy <- t(sapply(raw_sif[,1], function(x) unlist(model_elements$bounds[model_elements$elementId == x, c(3,4)])))
colnames(s.xy) <- c("source.x", "source.y")
t.xy <- t(sapply(raw_sif[,3], function(x) unlist(model_elements[model_elements$elementId == x,1][,c(3,4)])))
colnames(t.xy) <- c("targets.x", "targets.y")
Marek Ostaszewski's avatar
Marek Ostaszewski committed
128

Marek Ostaszewski's avatar
Marek Ostaszewski committed
129
### Combine into a single data frame
Marek Ostaszewski's avatar
Marek Ostaszewski committed
130
131
132
133
translated_sif <- data.frame(source = s.entrez, source.type = s.type,
                             sign = raw_sif[,2], 
                             target = t.entrez, target.type = t.type,
                             s.xy, t.xy)
Marek Ostaszewski's avatar
Marek Ostaszewski committed
134

Marek Ostaszewski's avatar
Marek Ostaszewski committed
135
write.table(translated_sif, file = "translated_sif.txt",
Marek Ostaszewski's avatar
Marek Ostaszewski committed
136
            sep = "\t", quote = F, col.names = T, row.names = F)
Marek Ostaszewski's avatar
Marek Ostaszewski committed
137
message("Done.")