Commit e4a6b8cb authored by St. Elmo's avatar St. Elmo
Browse files

redid sampler, now correct

parent 293f6cb5
......@@ -17,6 +17,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MATLAB = "10e44e05-a98a-55b3-a45b-ba969058deb6"
Measurements = "eff96d63-e80a-5855-80a2-b1b0885c5ab7"
PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
......
using CobraTools
using JuMP
using Gurobi # use your favourite solver
using Measurements
using LinearAlgebra
using JLD
using Plots
pyplot()
# E. coli model
modelpath = joinpath("models", "iJO1366.json")
model = CobraTools.read_model(modelpath)
decomp = JLD.load(joinpath("data", "dgzeros.jld"), "gibbs")
gibbs = Dict{String, Measurement{Float64}}()
for (k, vs) in decomp
gibbs[k] = vs[1] ± vs[2]
end
##
ecoli_kJmolCarbon = -37.36 ± 8.55 # formation of biomass kJ/mol 74.36 ± 8.67
cbmodel, v, mb, ubs, lbs = CobraTools.CBM(model)
set_optimizer(cbmodel, Gurobi.Optimizer)
set_optimizer_attribute(cbmodel, "OutputFlag", 0) # quiet
biomass_index = model[findfirst(model.rxns, "BIOMASS_Ec_iJO1366_WT_53p95M")]
glucose_index = model[findfirst(model.rxns, "EX_glc__D_e")]
o2_index = model[findfirst(model.rxns, "EX_o2_e")]
atpm_index = model[findfirst(model.rxns, "ATPM")]
# Fix glucose use 1.0 then normalization is easy. NB - if not 1 then change normalization!!
CobraTools.set_bound(glucose_index, ubs, lbs; ub=-1.0, lb=-1.0)
# Aerobic
# CobraTools.set_bound(o2_index, ubs, lbs; ub=1000.0, lb=-1000.0)
# Anaerobic
CobraTools.set_bound(o2_index, ubs, lbs; ub=1000.0, lb=0.0)
# No free ATP generation
CobraTools.set_bound(atpm_index, ubs, lbs; ub=1000.0, lb=0.0)
@objective(cbmodel, Max, v[biomass_index])
optimize!(cbmodel)
termination_status(cbmodel) != MOI.OPTIMAL && @warn "Optimization issue..."
μ = objective_value(cbmodel)
### Fix biomass as a constraint
CobraTools.set_bound(biomass_index, ubs, lbs; ub=μ, lb=0.99*μ)
wpoints = CobraTools.get_warmup_points(cbmodel, v, ubs, lbs) # very slow
Nsamples = 100_000
samples = CobraTools.achr(Nsamples, wpoints, model, ubs, lbs, burnin=0.1, keepevery=200)
ΔG_exts = Measurement{Float64}[]
for s in 1:size(samples, 2)
fluxes = CobraTools.map_fluxes(samples[:, s], model)
carbon_ex = CobraTools.atom_exchange(fluxes, model)["C"] # carbon flux
ΔG_ext, missing_ext = CobraTools.map_gibbs_external(fluxes, gibbs)
ΔG_ext -= carbon_ex*ecoli_kJmolCarbon # minus because carbons consumed
push!(ΔG_exts, ΔG_ext)
end
\ No newline at end of file
......@@ -20,6 +20,9 @@ using Measurements
using Statistics
using PyCall # for Equilibrator - ensure that it is installed
# Sampling
using Random
include("global_cobratools.jl")
include("cobra.jl")
......@@ -42,7 +45,7 @@ include("name_space.jl")
include("sampling.jl")
# Init function
# Init function - build Gibbs calling functions
function __init__()
py"""
from equilibrator_api import ComponentContribution, Q_
......@@ -108,8 +111,6 @@ function __init__()
return bals, mags, errs
"""
end
end # module
"""
get_warmup_points(cbmodel, v, mb, ubs, lbs)
get_warmup_points(cbmodel, v, mb, ubs, lbs; randomobjective=false, numstop=1e10)
Generate warmup points for all the reactions on the model that
are not fixed. Assumes you feed in a JuMP model that is already
constrained by however you want it to be.
numstop = 2*number of warmup points - to reduce the time this takes
"""
function get_warmup_points(cbmodel, v, ubs, lbs; randomobjective=false)
function get_warmup_points(cbmodel, v, ubs, lbs; randomobjective=false, numstop=1e10)
# determine which rxns should be max/min-ized
fixed_rxns = Int64[]
for i in eachindex(v)
ub_val = normalized_rhs(ubs[i])
......@@ -16,37 +19,35 @@ function get_warmup_points(cbmodel, v, ubs, lbs; randomobjective=false)
push!(fixed_rxns, i)
end
end
var_rxn_inds = filter(x->!(x in fixed_rxns), 1:length(v))
wpoints = zeros(length(v), 2*length(var_rxn_inds))
# determine number of warmup points
var_rxn_inds =shuffle!(filter(x->!(x in fixed_rxns), 1:length(v))) # shuffle the optimization points
NN = numstop > length(var_rxn_inds) ? length(var_rxn_inds) : numstop
wpoints = zeros(length(v), 2*NN)
if randomobjective
for i in 1:2:size(wpoints, 2)
@objective(cbmodel, Max, sum(rand()*v[ii] for ii in var_rxn_inds))
optimize!(cbmodel)
for j=1:size(wpoints, 1)
wpoints[j, i] = value(v[j])
end
@objective(cbmodel, Min, sum(rand()*v[ii] for ii in var_rxn_inds))
optimize!(cbmodel)
for j=1:size(wpoints, 1)
wpoints[j, i+1] = value(v[j])
end
for (i, ii) in zip(1:length(var_rxn_inds), 1:2:(2*length(var_rxn_inds)))
i > NN && break
if randomobjective
@objective(cbmodel, Max, sum(rand()*v[iii] for iii in var_rxn_inds))
else
@objective(cbmodel, Max, v[var_rxn_inds[i]])
end
else
for (ii, i) in enumerate(1:2:size(wpoints, 2))
@objective(cbmodel, Max, v[var_rxn_inds[ii]])
optimize!(cbmodel)
for j=1:size(wpoints, 1)
wpoints[j, i] = value(v[j])
end
@objective(cbmodel, Min, v[var_rxn_inds[ii]])
optimize!(cbmodel)
for j=1:size(wpoints, 1)
wpoints[j, i+1] = value(v[j])
end
optimize!(cbmodel)
for j=1:size(wpoints, 1)
wpoints[j, ii] = value(v[j])
end
if randomobjective
@objective(cbmodel, Min, sum(rand()*v[iii] for iii in var_rxn_inds))
else
@objective(cbmodel, Min, v[var_rxn_inds[i]])
end
optimize!(cbmodel)
for j=1:size(wpoints, 1)
wpoints[j, ii+1] = value(v[j])
end
end
......@@ -73,82 +74,67 @@ function get_bound_vectors(ubconref, lbconref)
return ubs, lbs
end
function achr(N::Int64, wpoints::Array{Float64, 2}, model::Model, ubcons, lbcons; burnin=0.9, keepevery=10)
S, _, _, _ = CobraTools.get_core_model(model) # assume S has not been modified from model
ubs, lbs = CobraTools.get_bound_vectors(ubcons, lbcons)
nwpts = size(wpoints, 2) # number of warmup points generated
Nkeep = round(Int64, burnin*N) # start storing from here only
samples = zeros(size(wpoints, 1), round(Int64, length(Nkeep:N)/keepevery, RoundUp)) # sample storage
center_point = mean(wpoints, dims=2)[:]
w = zeros(size(wpoints, 1)) # direction vector
λlower = zeros(size(S, 2))
λupper = zeros(size(S, 2))
δdirtol = 1e-6 # too small directions get ignored solver precision issue
"""
hit_and_run(N::Int64, wpoints::Array{Float64, 2}, model::Model, ubcons, lbcons; keepevery=10, samplesize=1000)
Perform basic hit and run sampling for N iterations. Note that N needs to be >> samplesize. Sample size is the
size of the samples kept in memory. The larger samplesize is the better the approximation becomes, but the more
memory the sampler requires. W is the warm up phase iteration length.
"""
function hit_and_run(N::Int64, wpoints::Array{Float64, 2}, ubcons, lbcons; keepevery=100, samplesize=1000, W=2000)
ubs, lbs = get_bound_vectors(ubcons, lbcons) # get bounds from model
nwpts = size(wpoints, 2) # number of warmup points generated
samples = zeros(size(wpoints, 1), samplesize) # sample storage
current_point = zeros(size(wpoints, 1))
current_point .= wpoints[:, rand(1:nwpts)] # initial point
sample_num = 0
current_point .= wpoints[:, rand(1:nwpts)]
δdirtol = 1e-6 # too small directions get ignored ≈ 0 (solver precision issue)
sample_num = 0
samplelength = 0
updatesamplesizelength = true
for n=1:N
foundit = false # found a feasible direction
λmax = 0.0
λmin = 0.0
numiters = 0
while numiters < 3*(nwpts+sample_num) # maximum time spent looking for feasible direction
# pick a random direction from samples and warmup points
if rand() < nwpts/(nwpts + sample_num)
w .= wpoints[:, rand(1:nwpts)] .- center_point
else
w .= samples[:, rand(1:(sample_num))] .- center_point
end
w .= w./norm(w) # direction
for i in eachindex(w)
δlower = lbs[i] - current_point[i]
δupper = ubs[i] - current_point[i]
if w[i] < -δdirtol
λlower[i] = δupper/w[i]
λupper[i] = δlower/w[i]
elseif w[i] > δdirtol
λlower[i] = δlower/w[i]
λupper[i] = δupper/w[i]
else
λlower[i] = -1e3
λupper[i] = 1e3
end
end
λmax = minimum(λupper)
λmin = maximum(λlower)
if λmax > λmin
foundit = true
break
if n <= W
direction_point = (@view wpoints[:, rand(1:nwpts)]) - (@view current_point[:]) # use warmup points to find direction in warmup phase
else
direction_point = (@view samples[:, rand(1:(samplelength))]) - (@view current_point[:]) # after warmup phase, only find directions in sampled space
end
λmax = 1e10
λmin = -1e10
for i in eachindex(lbs)
δlower = lbs[i] - current_point[i]
δupper = ubs[i] - current_point[i]
if direction_point[i] < -δdirtol
lower = δupper/direction_point[i]
upper = δlower/direction_point[i]
elseif direction_point[i] > δdirtol
lower = δlower/direction_point[i]
upper = δupper/direction_point[i]
else
numiters += 1
lower = -1e10
upper = 1e10
end
lower > λmin && (λmin = lower) # max min step size that satisfies all bounds
upper < λmax && (λmax = upper) # min max step size that satisfies all bounds
end
if foundit == false
@warn "Error: no feasible direction found."
break
if λmax <= λmin || λmin == -1e10 || λmax == 1e10 # this sometimes can happen
@warn "Infeasible direction at iteration $(n)..."
continue
end
λ = rand()*(λmax - λmin) + λmin # random step size
current_point .= current_point .+ λ .* direction_point # will be feasible
λ = rand()*(λmax - λmin) + λmin
current_point .= current_point .+ λ .* w
center_point .= ((nwpts + n - 1).*center_point .+ current_point) ./ (nwpts + n)
if n >= Nkeep && n % keepevery == 0
if n % keepevery == 0
sample_num += 1
samples[:, sample_num] .= current_point
if sample_num >= samplesize
updatesamplesizelength = false
sample_num = 0 # reset, start replacing the older samples
end
updatesamplesizelength && (samplelength += 1) # lags sample_num because the latter is a flag as well
end
end
......@@ -156,3 +142,99 @@ function achr(N::Int64, wpoints::Array{Float64, 2}, model::Model, ubcons, lbcons
return samples
end
"""
test_samples(samples::Array{Float64, 2}, model::Model, ubcons, lbcons)
Test if samples pass tests.
"""
function test_samples(samples::Array{Float64, 2}, model::Model, ubcons, lbcons)
S, _, _, _ = get_core_model(model) # assume S has not been modified from model
ubs, lbs = get_bound_vectors(ubcons, lbcons)
violations = Int64[]
tol = 1e-6
for i in 1:size(samples, 2)
if sum(abs, S*samples[:, i]) < tol
equality = true
else
equality = false
end
inequality = all(abs.(lbs .- samples[:, i]) .<= tol) .== all(abs.(samples[:, i] .- ubs) .<= tol)
if !all([equality, inequality])
push!(violations, i)
end
end
return violations
end
# function achr(N::Int64, wpoints::Array{Float64, 2}, model::Model, ubcons, lbcons; burnin=0.9, keepevery=10)
# # S, _, _, _ = CobraTools.get_core_model(model) # assume S has not been modified from model
# ubs, lbs = CobraTools.get_bound_vectors(ubcons, lbcons)
# nwpts = size(wpoints, 2) # number of warmup points generated
# Nkeep = round(Int64, burnin*N) # start storing from here only
# samples = zeros(size(wpoints, 1), round(Int64, length(Nkeep:N)/keepevery, RoundUp)) # sample storage
# center_point = mean(wpoints, dims=2)[:]
# w = zeros(size(wpoints, 1)) # direction vector
# λlower = zeros(size(S, 2))
# λupper = zeros(size(S, 2))
# δdirtol = 1e-6 # too small directions get ignored solver precision issue
# current_point = zeros(size(wpoints, 1))
# current_point .= center_point
# sample_num = 0
# for n=1:N
# λmax = 0.0
# λmin = 0.0
# if rand() < nwpts/(nwpts + sample_num)
# ref_point .= wpoints[:, rand(1:nwpts)] .- center_point # initial point
# else
# ref_point .= samples[:, rand(1:(sample_num))] .- center_point # initial point
# end
# w .= w./norm(w) # direction
# for i in eachindex(w)
# δlower = lbs[i] - current_point[i]
# δupper = ubs[i] - current_point[i]
# if w[i] < -δdirtol
# λlower[i] = δupper/w[i]
# λupper[i] = δlower/w[i]
# elseif w[i] > δdirtol
# λlower[i] = δlower/w[i]
# λupper[i] = δupper/w[i]
# else
# λlower[i] = -1e3
# λupper[i] = 1e3
# end
# end
# λmax = minimum(λupper)
# λmin = maximum(λlower)
# if λmax > λmin
# @warn "Infeasible direction"
# continue
# end
# λ = rand()*(λmax - λmin) + λmin
# current_point .= current_point .+ λ .* w
# center_point .= ((nwpts + n - 1).*center_point .+ current_point) ./ (nwpts + n)
# if n >= Nkeep && n % keepevery == 0
# sample_num += 1
# samples[:, sample_num] .= current_point
# # center_point .= ((nwpts + sample_num - 1).*center_point .+ current_point) ./ (nwpts + sample_num)
# end
# end
# return samples
# end
\ No newline at end of file
......@@ -6,6 +6,9 @@ using JLD
using JuMP
using Gurobi
using Plots
pyplot()
# E. coli model
modelpath = joinpath("models", "iJO1366.json")
model = CobraTools.read_model(modelpath)
......@@ -36,19 +39,12 @@ CobraTools.set_bound(biomass_index, ubs, lbs; ub=μ_max, lb=0.9*μ_max)
##################################
# Get warmup points
wpoints = CobraTools.get_warmup_points(cbmodel, v, ubs, lbs) # very slow
wpoints = CobraTools.get_warmup_points(cbmodel, v, ubs, lbs, numstop=1000) # very slow
# sample!
samples = @time CobraTools.achr(100_000, wpoints, model, ubs, lbs)
samples = @time CobraTools.hit_and_run(100_000, wpoints, ubs, lbs; keepevery=200, samplesize=5000, W=2000)
###########################
violation_inds = CobraTools.test_samples(samples, model, ubs, lbs)
plot(samples[etoh_index, :])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment