Commit e5674597 authored by St. Elmo's avatar St. Elmo
Browse files

finished tests for basic analysis

parent 7299f0a0
......@@ -9,7 +9,6 @@ Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
GLPK = "60bf3e95-4087-53dc-ae20-288a0d20c6a6"
Gadfly = "c91e804a-d5a3-530f-b6f0-dfbca275c004"
GraphPlot = "a2cc645c-3eea-5389-862e-a155d0052231"
Gurobi = "2e9cd046-0924-5485-92f1-d5272153d98b"
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
......@@ -25,6 +24,7 @@ Requires = "ae029012-a4dd-5104-9daa-d747884805df"
Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
SBML = "e5567a89-2604-4b09-9718-f5f78e97c3bb"
SBML_jll = "bb12108a-f4ef-5f88-8ef3-0b33ff7017f1"
SimpleWeightedGraphs = "47aef6b3-ad0c-573a-a1e2-d07658019622"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StringDistances = "88034a9c-02f8-509d-84a9-84ec65e18404"
......
# CobraTools.jl
*CobraTools is a Julia package for constraint based reconstruction and analysis of metabolic models.*
[docs-img]:https://img.shields.io/badge/docs-dev-blue.svg
[docs-img]:https://img.shields.io/badge/docs-latest-blue.svg
[docs-url]: https://stelmo.github.io/CobraTools.jl/dev
[ci-img]: https://github.com/stelmo/CobraTools.jl/actions/workflows/ci.yml/badge.svg?branch=master&event=push
......
......@@ -19,7 +19,7 @@
To install this package: `] add CobraTools`.
Some of the features used in this package require external programs to be installed. These are described below:
Some of the optional features used in this package require external programs and/or data to be available. These are described below:
* The Equilibrator interface requires that the Equilibrator-API has been installed and can be accessed through Julia's PyCall package. Refer to the [Equilibrator-API website](https://gitlab.com/equilibrator/equilibrator-api) for installation instructions. Within Julia, if you can call `pyimport("equilibrator_api")` successfully, then you will be able to use the functions exposed here. To actually use the functions insert `using PyCall` in your main level script (before or after `using CobraTools`).
* To extract turnover numbers, Km, Kcat/Km and Ki from the Brenda database, you will need to download the database as a txt file [available here](https://www.brenda-enzymes.org/download_brenda_without_registration.php) (~250 MB).
......
# Model Structure
Before reading, writing models or building models, it is important to understand how they are represented internally in `CobraTools`.
Before reading, writing, or building models, it is important to understand how they are represented internally in `CobraTools`.
Each model is a struct of the type `CobraTools.Model`, which is composed of a model `id`, and arrays of `Reaction`s, `Metabolite`s and `Gene`s.
```@docs
Model
......@@ -9,11 +9,9 @@ When reading, writing, building or analysing models, these fields are what is us
```@docs
Reaction
```
Note, the format of `grr` in `Reaction` should be a nested array, like [[g1, g2], [g3, g4], ...].
Each sub-array, e.g. [g1, g2], is composed of essential genes (`g1::CobraTools.Gene`, etc.) for the reaction to function.
Thus, if the reaction requires (g1 and g2) or (g3 and g4) to function, then this would be represented by [[g1, g2], [g3, g4]] in `grr`.
Also note, the metabolites dictionary field of `Reaction` maps a `Metabolite` to its stoichiometrix coefficient.
Note, the format of `grr` in `Reaction` should be a nested array, like `[[g1, g2], [g3, g4], ...]`.
Each sub-array, e.g. `[g1, g2]`, is composed of essential genes (`g1::CobraTools.Gene`, etc.) for the reaction to function.
Thus, if the reaction requires (`g1` and `g2`) or (`g3` and `g4`) to function, then this would be represented by `[[g1, g2], [g3, g4]]` in `grr`. Also note, the metabolites dictionary field of `Reaction` maps a `Metabolite` to its stoichiometrix coefficient.
```@docs
Metabolite
```
......
module CobraTools
using Requires
using MAT
using JSON
using SBML
using SparseArrays
using JuMP
using LinearAlgebra
using MAT, JSON, SBML
using LinearAlgebra, SparseArrays
using Measurements
using Statistics
using Random
using Statistics, Random
using PyCall
using Tulip # for LPs
using OSQP # for QPs - only pFBA, not a very good LP solver
using JuMP, Tulip, OSQP # OSQP sucks for LPs
using LightGraphs, SimpleWeightedGraphs, GraphPlot, Compose
import Base: findfirst, getindex, show
......@@ -49,7 +44,7 @@ export
read_model, save_model,
# optimization_analysis
get_core_model, build_cbm, fba, map_fluxes, set_bound, pfba#, exchange_reactions, metabolite_fluxes
get_core_model, build_cbm, fba, map_fluxes, set_bound, pfba, atom_exchange, exchange_reactions, metabolite_fluxes
# Initialization functions
include("init_functions.jl")
......
......@@ -259,41 +259,104 @@ function set_bound(vind, ubs, lbs; ub=1000, lb=-1000)
set_normalized_rhs(ubs[vind], ub)
end
# """
# get_exchanges(rxndict::Dict{String, Float64}; topN=8, ignorebound=1000)
# Display the topN producing and consuming exchange fluxes. Ignores infinite (problem upper/lower bound) fluxes (set with ignorebound).
# """
# function exchange_reactions(rxndict::Dict{String, Float64}; topN=8, ignorebound=1000)
# fluxes = Float64[]
# rxns = String[]
# for (k, v) in rxndict
# if startswith(k, "EX_") && abs(v) < ignorebound
# push!(rxns, k)
# push!(fluxes, v)
# end
# end
# inds_prod = sortperm(fluxes, rev=true)
# inds_cons = sortperm(fluxes)
# println("Consuming fluxes:")
# for i in 1:topN
# if rxndict[rxns[inds_cons[i]]] > 0
# continue
# end
# println(rxns[inds_cons[i]], " = ", round(rxndict[rxns[inds_cons[i]]], digits=4))
# end
# println("Producing fluxes:")
# for i in 1:topN
# if rxndict[rxns[inds_cons[i]]] < 0
# continue
# end
# println(rxns[inds_prod[i]], " = ", round(rxndict[rxns[inds_prod[i]]], digits=4))
# end
# end
# """
# """
# function metabolite_fluxes(fluxdict::Dict{String, Float64}, model::CobraTools.Model)
# end
"""
atom_exchange(flux_dict::Dict{String, Float64}, model::CobraTools.Model)
Return a dictionary mapping the flux of atoms across the boundary of the model given `flux_dict` of reactions in `model`.
Here `flux_dict` is a mapping of reaction `id`s to fluxes, e.g. from FBA.
"""
function atom_exchange(flux_dict::Dict{String, Float64}, model::CobraTools.Model)
atom_flux = Dict{String, Float64}()
for (rxnid, flux) in flux_dict
if startswith(rxnid, "EX_") || startswith(rxnid, "DM_") # exchange, demand reaction
for (met, stoich) in findfirst(model.reactions, rxnid).metabolites
adict = get_atoms(met)
for (atom, stoich) in adict
atom_flux[atom] = get(atom_flux, atom, 0.0) + flux*stoich
end
end
end
end
return atom_flux
end
"""
get_exchanges(rxndict::Dict{String, Float64}; topN=8, ignorebound=1000.0, verbose=true)
Display the topN producing and consuming exchange fluxes.
Set topN to a large number to get all the consuming/producing fluxes.
Ignores infinite (problem upper/lower bound) fluxes (set with ignorebound).
When `verbose` is false, the output is not printed out.
Return these reactions in two dictionaries: `consuming`, `producing`
"""
function exchange_reactions(rxndict::Dict{String, Float64}; topN=8, ignorebound=1000.0, verbose=true)
fluxes = Float64[]
rxns = String[]
for (k, v) in rxndict
if startswith(k, "EX_") && abs(v) < ignorebound
push!(rxns, k)
push!(fluxes, v)
end
end
inds_prod = sortperm(fluxes, rev=true)
inds_cons = sortperm(fluxes)
consuming = Dict{String, Float64}()
producing = Dict{String, Float64}()
verbose && println("Consuming fluxes:")
for i in 1:min(topN, length(rxndict))
if rxndict[rxns[inds_cons[i]]] < -eps()
verbose && println(rxns[inds_cons[i]], " = ", round(rxndict[rxns[inds_cons[i]]], digits=4))
consuming[rxns[inds_cons[i]]] = rxndict[rxns[inds_cons[i]]]
else
continue
end
end
verbose && println("Producing fluxes:")
for i in 1:min(topN, length(rxndict))
if rxndict[rxns[inds_prod[i]]] > eps()
verbose && println(rxns[inds_prod[i]], " = ", round(rxndict[rxns[inds_prod[i]]], digits=4))
producing[rxns[inds_prod[i]]] = rxndict[rxns[inds_prod[i]]]
else
continue
end
end
return consuming, producing
end
"""
metabolite_fluxes(fluxdict::Dict{String, Float64}, model::CobraTools.Model)
Return two dictionaries of metabolite `id`s mapped to reactions that consume or produce them.
"""
function metabolite_fluxes(fluxdict::Dict{String, Float64}, model::CobraTools.Model)
S, _, _, _ = get_core_model(model)
S = Array(S) # full
met_flux = Dict{String, Float64}()
rxnids = [rxn.id for rxn in model.reactions]
metids= [met.id for met in model.metabolites]
producing = Dict{String, Dict{String, Float64}}()
consuming = Dict{String, Dict{String, Float64}}()
for (row, metid) in enumerate(metids)
for (col, rxnid) in enumerate(rxnids)
mf = fluxdict[rxnid]*S[row, col]
# ignore zero flux
if mf < -eps() # consuming rxn
if haskey(consuming, metid)
consuming[metid][rxnid] = mf
else
consuming[metid] = Dict(rxnid => mf)
end
elseif mf > eps()
if haskey(producing, metid)
producing[metid][rxnid] = mf
else
producing[metid] = Dict(rxnid => mf)
end
end
end
end
return consuming, producing
end
......@@ -168,70 +168,70 @@ function test_samples(samples::Array{Float64, 2}, model::CobraTools.Model, ubcon
return violations
end
"""
achr(N::Int64, wpoints::Array{Float64, 2}, ubcons, lbcons; keepevery=100, samplesize=1000)
Needs work, for long iterations it becomes unstable (violates bounds).
"""
function achr(N::Int64, wpoints::Array{Float64, 2}, ubcons, lbcons; keepevery=100, samplesize=1000)
ubs, lbs = get_bound_vectors(ubcons, lbcons) # get bounds from model
nwpts = size(wpoints, 2) # number of warmup points generated
samples = zeros(size(wpoints, 1), samplesize) # sample storage
current_point = zeros(size(wpoints, 1))
current_point .= wpoints[:, rand(1:nwpts)]
shat = mean(wpoints, dims=2)[:]
δdirtol = 1e-6 # too small directions get ignored ≈ 0 (solver precision issue)
sample_num = 0
samplelength = 0
updatesamplesizelength = true
for n=1:N
if updatesamplesizelength # switch to samples
direction_point = (@view wpoints[:, rand(1:nwpts)]) - (@view current_point[:]) # use warmup points to find direction in warmup phase
else
direction_point = (@view shat[:]) - (@view current_point[:]) # after warmup phase, only find directions in sampled space
end
λmax = 1e10
λmin = -1e10
for i in eachindex(lbs)
δlower = lbs[i] - current_point[i]
δupper = ubs[i] - current_point[i]
if direction_point[i] < -δdirtol
lower = δupper/direction_point[i]
upper = δlower/direction_point[i]
elseif direction_point[i] > δdirtol
lower = δlower/direction_point[i]
upper = δupper/direction_point[i]
else
lower = -1e10
upper = 1e10
end
lower > λmin && (λmin = lower) # max min step size that satisfies all bounds
upper < λmax && (λmax = upper) # min max step size that satisfies all bounds
end
if λmax <= λmin || λmin == -1e10 || λmax == 1e10 # this sometimes can happen
@warn "Infeasible direction at iteration $(n)..."
continue
end
# """
# achr(N::Int64, wpoints::Array{Float64, 2}, ubcons, lbcons; keepevery=100, samplesize=1000)
# Needs work, for long iterations it becomes unstable (violates bounds).
# """
# function achr(N::Int64, wpoints::Array{Float64, 2}, ubcons, lbcons; keepevery=100, samplesize=1000)
# ubs, lbs = get_bound_vectors(ubcons, lbcons) # get bounds from model
# nwpts = size(wpoints, 2) # number of warmup points generated
# samples = zeros(size(wpoints, 1), samplesize) # sample storage
# current_point = zeros(size(wpoints, 1))
# current_point .= wpoints[:, rand(1:nwpts)]
# shat = mean(wpoints, dims=2)[:]
# δdirtol = 1e-6 # too small directions get ignored ≈ 0 (solver precision issue)
# sample_num = 0
# samplelength = 0
# updatesamplesizelength = true
# for n=1:N
# if updatesamplesizelength # switch to samples
# direction_point = (@view wpoints[:, rand(1:nwpts)]) - (@view current_point[:]) # use warmup points to find direction in warmup phase
# else
# direction_point = (@view shat[:]) - (@view current_point[:]) # after warmup phase, only find directions in sampled space
# end
# λmax = 1e10
# λmin = -1e10
# for i in eachindex(lbs)
# δlower = lbs[i] - current_point[i]
# δupper = ubs[i] - current_point[i]
# if direction_point[i] < -δdirtol
# lower = δupper/direction_point[i]
# upper = δlower/direction_point[i]
# elseif direction_point[i] > δdirtol
# lower = δlower/direction_point[i]
# upper = δupper/direction_point[i]
# else
# lower = -1e10
# upper = 1e10
# end
# lower > λmin && (λmin = lower) # max min step size that satisfies all bounds
# upper < λmax && (λmax = upper) # min max step size that satisfies all bounds
# end
# if λmax <= λmin || λmin == -1e10 || λmax == 1e10 # this sometimes can happen
# @warn "Infeasible direction at iteration $(n)..."
# continue
# end
λ = rand()*(λmax - λmin) + λmin # random step size
current_point .= current_point .+ λ .* direction_point # will be feasible
shat .= (n.*shat .+ current_point)./(n+1)
if n % keepevery == 0
sample_num += 1
samples[:, sample_num] .= current_point
if sample_num >= samplesize
updatesamplesizelength = false
sample_num = 0 # reset, start replacing the older samples
end
updatesamplesizelength && (samplelength += 1) # lags sample_num because the latter is a flag as well
end
# λ = rand()*(λmax - λmin) + λmin # random step size
# current_point .= current_point .+ λ .* direction_point # will be feasible
# shat .= (n.*shat .+ current_point)./(n+1)
# if n % keepevery == 0
# sample_num += 1
# samples[:, sample_num] .= current_point
# if sample_num >= samplesize
# updatesamplesizelength = false
# sample_num = 0 # reset, start replacing the older samples
# end
# updatesamplesizelength && (samplelength += 1) # lags sample_num because the latter is a flag as well
# end
end
# end
return samples
end
\ No newline at end of file
# return samples
# end
\ No newline at end of file
......@@ -2,11 +2,36 @@
model = CobraTools.read_model(joinpath("data", "e_coli_core.json"))
@test length(model.reactions) == 95 # read in correctly
# FBA
biomass = findfirst(model.reactions, "BIOMASS_Ecoli_core_w_GAM")
optimizer = Tulip.Optimizer # quiet by default
sol = fba(model, biomass, optimizer)
@test isapprox(sol["BIOMASS_Ecoli_core_w_GAM"], 0.8739215022678488, atol=1e-6)
# exchange trackers
atom_fluxes = atom_exchange(sol, model)
@test isapprox(atom_fluxes["C"], -37.1902, atol=1e-3)
consuming, producing = exchange_reactions(sol; verbose=false)
@test isapprox(consuming["EX_nh4_e"], -4.76532, atol=1e-3)
consuming, producing = metabolite_fluxes(sol, model)
@test isapprox(consuming["atp_c"]["PFK"], -7.47738, atol=1e-3)
@test isapprox(producing["atp_c"]["PYK"], 1.75818, atol=1e-3)
# set bounds
cbmodel, v, mb, ubs, lbs = build_cbm(model)
glucose_index = model[findfirst(model.reactions, "EX_glc__D_e")]
o2_index = model[findfirst(model.reactions, "EX_o2_e")]
atpm_index = model[findfirst(model.reactions, "ATPM")]
set_bound(glucose_index, ubs, lbs; ub=-1.0, lb=-1.0)
@test normalized_rhs(ubs[glucose_index]) == -1.0
@test normalized_rhs(lbs[glucose_index]) == 1.0
set_bound(o2_index, ubs, lbs; ub=1.0, lb=1.0)
@test normalized_rhs(ubs[o2_index]) == 1.0
@test normalized_rhs(lbs[o2_index]) == -1.0
# pFBA
optimizer = OSQP.Optimizer
atts = Dict("eps_abs" => 5e-4,"eps_rel" => 5e-4, "max_iter" => 100_000, "verbose"=>false)
sol = pfba(model, biomass, optimizer; solver_attributes=atts) # just see if it works - OSQP is a terrible LP solver
......@@ -14,4 +39,6 @@
sol = pfba(model, biomass, [Tulip.Optimizer, OSQP.Optimizer]; solver_attributes=Dict("opt1" => Dict{Any, Any}(), "opt2" => atts)) # try two optimizers
@test isapprox(sol["PGM"], -14.737442319041387, atol=1e-6)
end
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment