combine_sources.R 7.42 KB
Newer Older
Marek Ostaszewski's avatar
Marek Ostaszewski committed
1
2
3
4
5
6
7
##################################################
## Project: COVID-19 Disease Map
## Script purpose: Combine maps from different sources 
## Date: 01.04.2020
## Author: Marek Ostaszewski
##################################################

8
9
options(stringsAsFactors = F)

Marek Ostaszewski's avatar
Marek Ostaszewski committed
10
library(httr)
11
library(xml2)
Marek Ostaszewski's avatar
Marek Ostaszewski committed
12

13
14
15
16
17
18
19
20
### An 'xml2' namespace structure for parsing CellDesigner xml 
ns_cd <- xml_ns_rename(xml_ns(read_xml("<root>
                                       <sbml xmlns = \"http://www.sbml.org/sbml/level2/version4\"/>
                                       <cd xmlns = \"http://www.sbml.org/2001/ns/celldesigner\"/>
                                       <html xmlns = \"http://www.w3.org/1999/xhtml\"/>
                                       <rdf xmlns = \"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"/>
                                       </root>")), 
                       d1 = "sbml", d2 = "cd", d3 = "html", d4 = "rdf")
21

22
23
24
25
ns_sbml <- xml_ns_rename(xml_ns(read_xml("<root>
                                          <sbml xmlns = 'http://www.sbml.org/sbml/level3/version2/core'/>
                                          </root>")), 
                          d1 = "sbml")
26

27
28
29
### Ironing out of issues in files converted from GPML to CellDesigner_SBML
process_gpml <- function(source) {
  message("API translation request")
30
31
  ### We use httr::POST to execute the API call, and then write down the response
  res <- httr::POST(url = "https://minerva-covid19-curation.lcsb.uni.lu/minerva/api/convert/GPML:CellDesigner_SBML",
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
                    body = source,
                    content_type("text/plain"))
  ### Get resulting XML content as text
  cont <- content(res, as = "text")
  message("Finalizing...")
  ### Apply final corrections using the dedicated 'finalize_gpml' (output: xml)
  gpml_xml <- read_xml(cont)
  notes <- xml_find_first(gpml_xml, "//sbml:model/sbml:notes/html:html/html:body", ns_cd)
  nlines <- unlist(strsplit(xml_text(notes), "\n"))
  nlines <- nlines[-grep("CellDesigner requires (inner|outer)Width|thickness", nlines)]
  xml_text(notes) <- paste(nlines, collapse = "\n")
  return(gpml_xml)
}

### Ironing out of issues in files converted from SBGN to CellDesigner_SBML
process_sbgn <- function(source) {
  message("API translation request")
  ### We use httr::POST to execute the API call, and then write down the response
  res <- httr::POST(url = "https://minerva-covid19-curation.lcsb.uni.lu/minerva/api/convert/SBGN-ML:CellDesigner_SBML",
                    body = source,
52
53
54
                    content_type("text/plain"))
  ### Get resulting XML content as text
  cont <- content(res, as = "text")
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
  return(read_xml(cont))
}

process_cdsbml <- function(source) {
  cdsbml_xml <- read_xml(source)
  ### Remove all kineticLaw tags, a temporary solution for a current bug in v15.beta.2
  ### This will change
  xml_remove(xml_find_all(cdsbml_xml, "//sbml:kineticLaw", ns_cd))
  return(cdsbml_xml)
}

construct_overview <- function(elements) {
  ### Read in an SBML template 
  root <- read_xml("<?xml version='1.0' encoding='UTF-8' standalone='no'?>
                    <sbml xmlns='http://www.sbml.org/sbml/level3/version2/core' level='3' version='2'>
                      <model id='ovw' name='overview'>
                        <listOfCompartments>
                          <compartment constant='false' id='default' size='1' spatialDimensions='3' />
                        </listOfCompartments>
                        <listOfSpecies>
                        </listOfSpecies>
                      </model>
                    </sbml>")
  los <- xml_find_first(root, "//sbml:listOfSpecies", ns_sbml)
  for(e in 1:length(elements)) {
    element <- paste0("<species boundaryCondition='false' initialAmount='0' constant='false' hasOnlySubstanceUnits='false' ",
                      "compartment='default' id='nel_", e, "' name='", elements[e], "' sboTerm='SBO:0000358' />")
    xml_add_child(los, read_xml(element))
  }
  res <- httr::POST(url = "https://minerva-covid19-curation.lcsb.uni.lu/minerva/api/convert/SBML:CellDesigner_SBML",
                    body = as.character(root),
                    content_type("text/plain"))
  return(content(res, as = "text"))
}

### Read the list of resources to be integrated
91
92
93
94
95
### The file has the following columns:
### Include: if the resource is to be added to this build 
### Resource: url to the xml content of the diagram
### Type: what kind of file do we integrate
### Name: under which name the diaram is to be shown in the build
96
97
res <- read.csv(url("https://git-r3lab.uni.lu/covid/models/raw/master/Integration/resources.csv"),
                header = T, stringsAsFactors = F)
98
### Filter only these to be included
99
100
101
102
103
104
105
106
res <- res[res$Include == "Yes",]

### Create output directory if not existing.
if(!dir.exists("output/")) { dir.create("output/") }

### Create submaps directory if not existing.
if(!dir.exists("output/submaps/")) { dir.create("output/submaps/") }

107
### For all resources
108
109
110
111
112
113
114
115
for(r in 1:nrow(res)) {
  ### Process the 'resources' table, all should be network-accessible (raw git)
  message(paste0("Processing: ", res[r,]$Resource))
  con <- url(res[r,]$Resource)
  rls <- paste(readLines(con), collapse = "\n")
  close(con)
  fin_cont <- NULL
  ### Depending on the type, process differently
116
  ### see wrapper functions for MINERVA conversion API above
117
118
119
120
121
122
123
124
125
126
  if(res[r,]$Type == "GPML") {
    fin_cont <- process_gpml(rls)
  } else if (res[r,]$Type == "SBGN") {
    fin_cont <- process_sbgn(rls)
  } else if (res[r,]$Type == "CellDesigner_SBML") {
    fin_cont <- process_cdsbml(rls)
  }else {
    warning(paste0("Resource type not handled: ", res[r,]$Type))
  }

127
  ### Write the result to a file
128
129
  write_xml(fin_cont, file = paste0("output/submaps/",res[r,]$Name,".xml"))
  message("Done.\n\n")
130
131
}

132
### If the overview map and the mapping are to be constructed de novo
133
134
reconstruct_overview = F
reconstruct_mapping = F 
135

136
if(reconstruct_overview) {
137
  ### Create a sinple SBML file and convert it to CellDesigner, gives circular layout
138
139
140
141
142
  ovw <- construct_overview(res$Name)
  cat(ovw, file = paste0("output/overview.xml"))
}

if(reconstruct_mapping) {
143
144
145
  ### Use a mapping template; mapping file requires handling complexes, which are tricky
  ### It is easier to use a preconstructed template, change names of species inside
  ### and remove unnecessary reactions
146
  
147
148
149
150
151
152
  ### Load the template, readlines
  con <- url("https://git-r3lab.uni.lu/covid/models/-/raw/master/Integration/template_mapping.xml")
  mapping <- paste(readLines(con), collapse = "\n")
  close(con)
  
  ### Replace the names
153
154
155
156
157
158
159
  for(n in 1:nrow(res)) {
    mapping <- gsub(paste0(">placeholder", n,"<"), paste0(">nel_",n,"<"), mapping)
    mapping <- gsub(paste0("\"placeholder", n,"\""), paste0("\"nel_",n,"\""), mapping)
    mapping <- gsub(paste0(">target", n,"<"), paste0(">",res[n,]$Name,"<"), mapping)
    mapping <- gsub(paste0("\"target", n,"\""), paste0("\"",res[n,]$Name,"\""), mapping)
  }
  
160
  ### Write to file, read in as an xml structure
161
162
163
  cat(mapping, file = "output/submaps/mapping.xml", sep = "\n")
  mapn <- read_xml("output/submaps/mapping.xml")
  
164
  ### Remove all reactions whose baseReactant species is a placeholder
165
166
167
168
169
170
171
  for(sp in xml_find_all(mapn, "//cd:species", ns_cd)) {
    spattrs <- xml_attrs(sp)
    if(startsWith(spattrs["name"], "placeholder")) {
      br <- xml_find_first(mapn, paste0("//cd:baseReactant[@species='", spattrs["id"], "']"), ns_cd)
      xml_remove(xml_parent(xml_parent(xml_parent(xml_parent(br)))))
    }
  }
172
173
  
  ### Write down the trimmed file
174
175
  write_xml(mapn, "output/submaps/mapping.xml")
}