resolve_aliases.R 6.97 KB
Newer Older
Marek Ostaszewski's avatar
Marek Ostaszewski committed
1
2
3
4
5
6
7
8
9
10
11
##################################################
## Project: COVID-19 Disease Map
## Script purpose: Translate raw CellDesigner SIF to Entrez identifiers using MINERVA 
## Date: 05.06.2020
## Author: Marek Ostaszewski
##################################################

library(httr)
library(jsonlite)

### A convenience function to handle API queries
12
ask_GET <- function(furl, fask, unpack = T) {
13
  print(URLencode(paste0(furl, fask)))
14
  resp <- httr::GET(url = URLencode(paste0(furl, fask)), write_memory(),
Marek Ostaszewski's avatar
Marek Ostaszewski committed
15
16
17
18
                    httr::add_headers('Content-Type' = "application/x-www-form-urlencoded"),
                    ### Currently ignoring SSL!
                    httr::set_config(config(ssl_verifypeer = 0L)))
  if(httr::status_code(resp) == 200) {
19
20
21
22
23
24
25
26
27
28
29
    ### when the content is sent as a zip file, it needs to be handled differently,
    ### i.e. saved to a tmp file and unzipped
    if(headers(resp)$`content-type` == "application/zip") {
      tmp <- tempfile()
      tmpc <- file(tmp, "wb")
      writeBin(httr::content(resp, as = "raw"), con = tmpc)
      close(tmpc)
      unzipped <- unzip(tmp)
      file.remove(tmp)
      return(unzipped)
    }
Marek Ostaszewski's avatar
Marek Ostaszewski committed
30
31
32
33
34
35
    return(httr::content(resp, as = "text"))
  }
  return(NULL)
}

### Define the source file (GitLab, raw link)
36
diagram <- "https://git-r3lab.uni.lu/covid/models/-/raw/master/Curation/ER%20Stress/ER_Stress_stable.xml"
Marek Ostaszewski's avatar
Marek Ostaszewski committed
37
38

### Read in the raw SIF version (here straight from the github of Aurelien)
39
raw_sif <- read.table(url("https://git-r3lab.uni.lu/covid/models/-/raw/master/Executable%20Modules/SBML_qual_build/sif/ER_Stress_stable_raw.sif"),
Marek Ostaszewski's avatar
Marek Ostaszewski committed
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
                      sep = " ", header = F, stringsAsFactors = F)

### Read the list of resources to be integrated, from the MINERVA build scripts
res <- read.csv(url("https://git-r3lab.uni.lu/covid/models/raw/master/Integration/MINERVA_build/resources.csv"),
                header = T, stringsAsFactors = F)

diag_name <- res[res$Resource == diagram, "Name"]

### Get MINERVA elements
### The address of the COVID-19 Disease Map in MINERVA
map <- "https://covid19map.elixir-luxembourg.org/minerva/api/"
### Get configuration of the COVID-19 Disease Map, to obtain the latest (default) version
cfg <- fromJSON(ask_GET(map, "configuration/"))
project_id <- cfg$options[cfg$options$type == "DEFAULT_MAP","value"]
### The address of the latest (default) build 
mnv_base <- paste0(map,"projects/",project_id,"/")

message(paste0("Asking for diagrams in: ", mnv_base, "models/"))

### Get diagrams
models <- ask_GET(mnv_base, "models/")
models <- fromJSON(models, flatten = F)

this_refs <- models[models$name == diag_name]

### Get elements of the chosen diagram
model_elements <- fromJSON(ask_GET(paste0(mnv_base,"models/",models$idObject[models$name == diag_name],"/"), 
Marek Ostaszewski's avatar
Marek Ostaszewski committed
67
                                   "bioEntities/elements/?columns=id,name,type,references,elementId,complexId,bounds"),
Marek Ostaszewski's avatar
Marek Ostaszewski committed
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
                           flatten = F)

message("Fetching entrez ids...")
### Get information about Entrez identifiers from MINERVA elements
entrez <- sapply(model_elements$references, function(x) ifelse(length(x) == 0, NA, x[x$type == "ENTREZ", "resource"]))
names(entrez) <- model_elements$elementId

### An utility function to retrieve Entrez based on the species id
### if the id is a complex, the function goes recursively and fetches the ids of elements in this complex
group_elements <- function(feid, felements, fentrez) {
  pos <- which(felements$elementId == feid)
  ### Any elements that may be nested in the 'feid' (CellDesigner alias)
  incs <- felements$elementId[felements$complexId %in% felements$id[pos]]
  if(length(incs) > 0) {
    ### If nested elements found, run the function recursively for the contained elements
    return(paste(unlist(sapply(incs, group_elements, felements, fentrez)), collapse = ";"))
  } else {
    ### If no nested elements, return Entrez
    rid <- fentrez[[feid]]
    if(is.na(rid)) {
      ### If Entrez not available, return name
      rid <- felements$name[pos]
    }
    return(rid)
  }
}

Marek Ostaszewski's avatar
Marek Ostaszewski committed
95
96
97
### A workaround function to get information about hypothetical complexes; 
### currently MINERVA API does not support this, we need to get the entire CD file and parse it
get_groups <- function(fname) {
98
  message(paste0("Getting groups for ", fname, "..."))
Marek Ostaszewski's avatar
Marek Ostaszewski committed
99
  library(xml2)
100
101
102
103
104
  ## Currently comment out the MINERVA download, some content dlded as binary!
  cd_map <- read_xml(ask_GET(mnv_base,
                             paste0("models/",
                                    models$idObject[models$name == fname],
                                    ":downloadModel?handlerClass=lcsb.mapviewer.converter.model.celldesigner.CellDesignerXmlParser")))
Marek Ostaszewski's avatar
Marek Ostaszewski committed
105
106
107
108
109
110
111
112
113
  ### CellDesigner namespace
  ns_cd <- xml_ns_rename(xml_ns(read_xml("<root>
                                            <sbml xmlns = \"http://www.sbml.org/sbml/level2/version4\"/>
                                            <cd xmlns = \"http://www.sbml.org/2001/ns/celldesigner\"/>
                                          </root>")), 
                         d1 = "sbml", d2 = "cd")
  ### Get complex ids
  cids <- xml_attr(xml_find_all(cd_map, "//cd:complexSpeciesAlias", ns_cd), "species")
  ### For each check, which is hypothetical
114
115
116
117
118
  hypocs <- sapply(cids, 
                   function(x) {
                     tcid <- xml_find_first(cd_map, paste0("//sbml:species[@id='", x, "']"), ns_cd)
                     ifelse(length(tcid) > 0, xml_text(xml_find_first(tcid, ".//cd:hypothetical", ns_cd)), NA)
                   })
Marek Ostaszewski's avatar
Marek Ostaszewski committed
119
120
121
122
123
124
  names(hypocs) <- gsub("s_id_", "",names(hypocs))
  return(hypocs)
}

cgroups <- get_groups(diag_name)

Marek Ostaszewski's avatar
Marek Ostaszewski committed
125
126
127
message("Translating...")
### Create a copy
translated_sif <- raw_sif
Marek Ostaszewski's avatar
Marek Ostaszewski committed
128
129
130
131
132
133
134
135
136
### Retrieve Entrez and type for the entire columns of sources and targets
s.entrez <- sapply(raw_sif[,1], group_elements, model_elements, entrez)
s.type <- sapply(raw_sif[,1], function(x) { ifelse(x %in% names(cgroups),
                                                   ifelse(is.na(cgroups[x]), "complex", "group"),
                                                   "node") })
t.entrez <- sapply(raw_sif[,3], group_elements, model_elements, entrez)
t.type <- sapply(raw_sif[,3], function(x) { ifelse(x %in% names(cgroups),
                                                   ifelse(is.na(cgroups[x]), "complex", "group"),
                                                   "node") })
Marek Ostaszewski's avatar
Marek Ostaszewski committed
137
138
139
140
141
### Collect x.y information
s.xy <- t(sapply(raw_sif[,1], function(x) unlist(model_elements$bounds[model_elements$elementId == x, c(3,4)])))
colnames(s.xy) <- c("source.x", "source.y")
t.xy <- t(sapply(raw_sif[,3], function(x) unlist(model_elements[model_elements$elementId == x,1][,c(3,4)])))
colnames(t.xy) <- c("targets.x", "targets.y")
Marek Ostaszewski's avatar
Marek Ostaszewski committed
142

Marek Ostaszewski's avatar
Marek Ostaszewski committed
143
### Combine into a single data frame
Marek Ostaszewski's avatar
Marek Ostaszewski committed
144
145
146
147
translated_sif <- data.frame(source = s.entrez, source.type = s.type,
                             sign = raw_sif[,2], 
                             target = t.entrez, target.type = t.type,
                             s.xy, t.xy)
Marek Ostaszewski's avatar
Marek Ostaszewski committed
148

Marek Ostaszewski's avatar
Marek Ostaszewski committed
149
write.table(translated_sif, file = "translated_sif.txt",
Marek Ostaszewski's avatar
Marek Ostaszewski committed
150
            sep = "\t", quote = F, col.names = T, row.names = F)
Marek Ostaszewski's avatar
Marek Ostaszewski committed
151
message("Done.")