combine_sources.R 8.58 KB
Newer Older
Marek Ostaszewski's avatar
Marek Ostaszewski committed
1
2
3
4
5
6
7
##################################################
## Project: COVID-19 Disease Map
## Script purpose: Combine maps from different sources 
## Date: 01.04.2020
## Author: Marek Ostaszewski
##################################################

8
9
options(stringsAsFactors = F)

Marek Ostaszewski's avatar
Marek Ostaszewski committed
10
library(httr)
11
library(xml2)
Marek Ostaszewski's avatar
Marek Ostaszewski committed
12

13
14
15
16
17
18
19
20
### An 'xml2' namespace structure for parsing CellDesigner xml 
ns_cd <- xml_ns_rename(xml_ns(read_xml("<root>
                                       <sbml xmlns = \"http://www.sbml.org/sbml/level2/version4\"/>
                                       <cd xmlns = \"http://www.sbml.org/2001/ns/celldesigner\"/>
                                       <html xmlns = \"http://www.w3.org/1999/xhtml\"/>
                                       <rdf xmlns = \"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"/>
                                       </root>")), 
                       d1 = "sbml", d2 = "cd", d3 = "html", d4 = "rdf")
21

22
23
24
25
ns_sbml <- xml_ns_rename(xml_ns(read_xml("<root>
                                          <sbml xmlns = 'http://www.sbml.org/sbml/level3/version2/core'/>
                                          </root>")), 
                          d1 = "sbml")
26

27
28
29
### Ironing out of issues in files converted from GPML to CellDesigner_SBML
process_gpml <- function(source) {
  message("API translation request")
30
31
  ### We use httr::POST to execute the API call, and then write down the response
  res <- httr::POST(url = "https://minerva-covid19-curation.lcsb.uni.lu/minerva/api/convert/GPML:CellDesigner_SBML",
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
                    body = source,
                    content_type("text/plain"))
  ### Get resulting XML content as text
  cont <- content(res, as = "text")
  message("Finalizing...")
  ### Apply final corrections using the dedicated 'finalize_gpml' (output: xml)
  gpml_xml <- read_xml(cont)
  notes <- xml_find_first(gpml_xml, "//sbml:model/sbml:notes/html:html/html:body", ns_cd)
  nlines <- unlist(strsplit(xml_text(notes), "\n"))
  nlines <- nlines[-grep("CellDesigner requires (inner|outer)Width|thickness", nlines)]
  xml_text(notes) <- paste(nlines, collapse = "\n")
  return(gpml_xml)
}

### Ironing out of issues in files converted from SBGN to CellDesigner_SBML
process_sbgn <- function(source) {
  message("API translation request")
  ### We use httr::POST to execute the API call, and then write down the response
  res <- httr::POST(url = "https://minerva-covid19-curation.lcsb.uni.lu/minerva/api/convert/SBGN-ML:CellDesigner_SBML",
                    body = source,
52
53
54
                    content_type("text/plain"))
  ### Get resulting XML content as text
  cont <- content(res, as = "text")
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
  return(read_xml(cont))
}

process_cdsbml <- function(source) {
  cdsbml_xml <- read_xml(source)
  ### Remove all kineticLaw tags, a temporary solution for a current bug in v15.beta.2
  ### This will change
  xml_remove(xml_find_all(cdsbml_xml, "//sbml:kineticLaw", ns_cd))
  return(cdsbml_xml)
}

construct_overview <- function(elements) {
  ### Read in an SBML template 
  root <- read_xml("<?xml version='1.0' encoding='UTF-8' standalone='no'?>
                    <sbml xmlns='http://www.sbml.org/sbml/level3/version2/core' level='3' version='2'>
                      <model id='ovw' name='overview'>
                        <listOfCompartments>
                          <compartment constant='false' id='default' size='1' spatialDimensions='3' />
                        </listOfCompartments>
                        <listOfSpecies>
                        </listOfSpecies>
                      </model>
                    </sbml>")
  los <- xml_find_first(root, "//sbml:listOfSpecies", ns_sbml)
  for(e in 1:length(elements)) {
    element <- paste0("<species boundaryCondition='false' initialAmount='0' constant='false' hasOnlySubstanceUnits='false' ",
                      "compartment='default' id='nel_", e, "' name='", elements[e], "' sboTerm='SBO:0000358' />")
    xml_add_child(los, read_xml(element))
  }
  res <- httr::POST(url = "https://minerva-covid19-curation.lcsb.uni.lu/minerva/api/convert/SBML:CellDesigner_SBML",
                    body = as.character(root),
                    content_type("text/plain"))
  return(content(res, as = "text"))
}

### Read the list of resources to be integrated
91
92
93
94
95
### The file has the following columns:
### Include: if the resource is to be added to this build 
### Resource: url to the xml content of the diagram
### Type: what kind of file do we integrate
### Name: under which name the diaram is to be shown in the build
Marek Ostaszewski's avatar
updates    
Marek Ostaszewski committed
96

Marek Ostaszewski's avatar
Marek Ostaszewski committed
97
98
99
100
101
102
103
104
105
106
107
108
109
110
regular_build = T

if(regular_build) {
  ### Regular build
  res <- read.csv(url("https://git-r3lab.uni.lu/covid/models/raw/master/Integration/MINERVA_build/resources.csv"),
                  header = T, stringsAsFactors = F)
} else {
  ### WikiPathways testbuild
  wps <- readLines("https://raw.githubusercontent.com/wikipathways/SARS-CoV-2-WikiPathways/master/pathways.txt")
  res <- data.frame(Include = "Yes", 
                    Resource = paste0("https://raw.githubusercontent.com/wikipathways/SARS-CoV-2-WikiPathways/master/gpml/", wps, ".gpml"),
                    Type = "GPML",
                    Name = wps)
}
Marek Ostaszewski's avatar
updates    
Marek Ostaszewski committed
111

112
### Filter only these to be included
113
114
res <- res[res$Include == "Yes",]

Marek Ostaszewski's avatar
updates    
Marek Ostaszewski committed
115
116
117
118
119
### Define the output dir
outdir <- "_notgit/output/"
### Just to simplify later writes
outdir_submaps <- paste0(outdir,"submaps/")

120
### Create output directory if not existing.
Marek Ostaszewski's avatar
updates    
Marek Ostaszewski committed
121
if(!dir.exists(outdir)) { dir.create(outdir) }
122
123

### Create submaps directory if not existing.
Marek Ostaszewski's avatar
updates    
Marek Ostaszewski committed
124
if(!dir.exists(outdir_submaps)) { dir.create(outdir_submaps) }
125

126
### For all resources
127
128
129
130
for(r in 1:nrow(res)) {
  ### Process the 'resources' table, all should be network-accessible (raw git)
  message(paste0("Processing: ", res[r,]$Resource))
  con <- url(res[r,]$Resource)
Marek Ostaszewski's avatar
updates    
Marek Ostaszewski committed
131
132
133
134
135
136
137
138
139
  conread <-try(readLines(con), silent = T)
  ### Try retrieving a resource, end gracefully
  if(class(conread) == "try-error") {
    message(paste0("Cannot read from ", res[r,]$Resource))
    close(con)
    message(conread)
    next
  }
  rls <- paste(conread, collapse = "\n")
140
141
142
  close(con)
  fin_cont <- NULL
  ### Depending on the type, process differently
143
  ### see wrapper functions for MINERVA conversion API above
144
145
146
147
148
149
150
151
152
153
  if(res[r,]$Type == "GPML") {
    fin_cont <- process_gpml(rls)
  } else if (res[r,]$Type == "SBGN") {
    fin_cont <- process_sbgn(rls)
  } else if (res[r,]$Type == "CellDesigner_SBML") {
    fin_cont <- process_cdsbml(rls)
  }else {
    warning(paste0("Resource type not handled: ", res[r,]$Type))
  }

154
  ### Write the result to a file
Marek Ostaszewski's avatar
updates    
Marek Ostaszewski committed
155
  write_xml(fin_cont, file = paste0(outdir_submaps,res[r,]$Name,".xml"))
156
  message("Done.\n\n")
157
158
}

159
### If the overview map and the mapping are to be constructed de novo
Marek Ostaszewski's avatar
Marek Ostaszewski committed
160
161
reconstruct_overview = T
reconstruct_mapping = T 
162

163
if(reconstruct_overview) {
164
  ### Create a sinple SBML file and convert it to CellDesigner, gives circular layout
165
  ovw <- construct_overview(res$Name)
Marek Ostaszewski's avatar
Marek Ostaszewski committed
166
167
168
169
170
171
  
  ovw <- gsub("w=\"90.0\" h=\"30.0\"", "w=\"190.0\" h=\"40.0\"", ovw)
  ovw <- gsub("width=\"90.0\" height=\"30.0\"", "width=\"190.0\" height=\"40.0\"", ovw)
  ovw <- gsub("width=\"90.0\" height=\"30.0\"", "width=\"190.0\" height=\"40.0\"", ovw)
  ovw <- gsub("color=\"FFCC99FF\"", "color=\"FFCCFFFF\"", ovw)
  
Marek Ostaszewski's avatar
updates    
Marek Ostaszewski committed
172
  cat(ovw, file = paste0(outdir,"overview.xml"))
173
174
175
}

if(reconstruct_mapping) {
176
177
178
  ### Use a mapping template; mapping file requires handling complexes, which are tricky
  ### It is easier to use a preconstructed template, change names of species inside
  ### and remove unnecessary reactions
179
  
180
  ### Load the template, readlines
Marek Ostaszewski's avatar
Marek Ostaszewski committed
181
  con <- url("https://git-r3lab.uni.lu/covid/models/-/raw/master/Integration/MINERVA_build/template_mapping.xml")
182
183
184
185
  mapping <- paste(readLines(con), collapse = "\n")
  close(con)
  
  ### Replace the names
186
187
188
189
190
191
192
  for(n in 1:nrow(res)) {
    mapping <- gsub(paste0(">placeholder", n,"<"), paste0(">nel_",n,"<"), mapping)
    mapping <- gsub(paste0("\"placeholder", n,"\""), paste0("\"nel_",n,"\""), mapping)
    mapping <- gsub(paste0(">target", n,"<"), paste0(">",res[n,]$Name,"<"), mapping)
    mapping <- gsub(paste0("\"target", n,"\""), paste0("\"",res[n,]$Name,"\""), mapping)
  }
  
193
  ### Write to file, read in as an xml structure
Marek Ostaszewski's avatar
updates    
Marek Ostaszewski committed
194
195
  cat(mapping, file = paste0(outdir_submaps, "mapping.xml"), sep = "\n")
  mapn <- read_xml(paste0(outdir_submaps, "mapping.xml"))
196
  
197
  ### Remove all reactions whose baseReactant species is a placeholder
198
199
200
201
202
203
204
  for(sp in xml_find_all(mapn, "//cd:species", ns_cd)) {
    spattrs <- xml_attrs(sp)
    if(startsWith(spattrs["name"], "placeholder")) {
      br <- xml_find_first(mapn, paste0("//cd:baseReactant[@species='", spattrs["id"], "']"), ns_cd)
      xml_remove(xml_parent(xml_parent(xml_parent(xml_parent(br)))))
    }
  }
205
206
  
  ### Write down the trimmed file
Marek Ostaszewski's avatar
updates    
Marek Ostaszewski committed
207
  write_xml(mapn, paste0(outdir_submaps, "mapping.xml"))
Marek Ostaszewski's avatar
renames    
Marek Ostaszewski committed
208
}