Unverified Commit a43ad19a authored by Miroslav Kratochvil's avatar Miroslav Kratochvil
Browse files

sampling seems to work OK now

parent 0012e659
......@@ -11,7 +11,6 @@ JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MAT = "23992714-dd62-5051-b70f-ba57cb901cac"
MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d"
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
......
......@@ -13,7 +13,6 @@ using Random
using Serialization
using SparseArrays
using Statistics
using MCMCChains
import Base: findfirst, getindex, show
import Pkg
......
"""
hit_and_run(
model,
optimizer;
modifications = [],
N = 1000,
keepevery = _constants.sampling_keep_iters,
samplesize = _constants.sampling_size,
warmup_indices = collect(1:n_reactions(model)),
workerids = [myid()],
nchains = 1,
)
TODO
Perform basic hit and run sampling for `N` iterations on `model` using `optimizer` to
generate the warmup points. Here warmup points are generated by iteratively
......@@ -64,119 +54,93 @@ chains = hit_and_run(
)
```
"""
function hit_and_run(
model,
optimizer;
modifications = [],
N = 150_000,
keepevery = _constants.sampling_keep_iters,
samplesize = _constants.sampling_size,
warmup_indices = collect(1:n_reactions(model)),
workerids = [myid()],
nchains = 1,
function affine_hit_and_run(
warmup_points::Matrix{Float64},
lbs::Vector{Float64},
ubs::Vector{Float64};
sample_iters = 100 .* (1:5),
workers = [myid()],
chains = length(workers),
)
# get warmup points in parallel, similar to FVA
ws, lbs, ubs = warmup(
model,
optimizer;
modifications = modifications,
workerids = workerids, # parallel
warmup_points = warmup_indices,
)
# load warmup points to workers
save_at.(workerids, :cobrexa_ws, Ref(:($ws)))
save_at.(workerids, :cobrexa_lbs, Ref(:($lbs)))
save_at.(workerids, :cobrexa_ubs, Ref(:($ubs)))
# sample in parallel!
samples = dpmap(
x -> :($COBREXA._serial_hit_and_run(
cobrexa_ws,
cobrexa_lbs,
cobrexa_ubs,
$samplesize,
$keepevery,
$N,
)),
CachingPool(workerids),
1:nchains,
# distribute starting data to workers
save_at.(workers, :cobrexa_hit_and_run_data, Ref((warmup_points, lbs, ubs)))
# sample all chains
samples = hcat(
dpmap(
chain -> :($COBREXA._affine_hit_and_run_chain(
cobrexa_hit_and_run_data...,
$sample_iters,
$chain,
)),
CachingPool(workers),
1:chains,
)...,
)
# remove warmup points from workers
map(fetch, remove_from.(workerids, :cobrexa_ws))
map(fetch, remove_from.(workerids, :cobrexa_lbs))
map(fetch, remove_from.(workerids, :cobrexa_ubs))
# not sure how to do this better - cat/vcat doesn't work, oh well
vals = zeros(samplesize, length(lbs), nchains)
for c = 1:nchains
vals[:, :, c] = samples[c]'
end
chains = Chains(vals, reactions(model))
map(fetch, remove_from.(workers, :cobrexa_hit_and_run_data))
return chains
return samples
end
function _serial_hit_and_run(ws, lbs, ubs, samplesize, keepevery, N)
samples = zeros(length(lbs), samplesize) # sample storage
current_point = zeros(length(lbs))
current_point .= ws[1, 1] # just use the first warmup point, randomness is introduced later
direction = zeros(length(lbs))
sample_num = 0
samplelength = 0
use_warmup_points = true
for n = 1:N
if use_warmup_points
i = rand(1:size(ws, 1))
j = rand(1:size(ws, 2))
direction .= ws[i, j] - current_point # use warmup points to find direction in warmup phase
else
direction .= samples[:, rand(1:(samplelength))] - current_point # after warmup phase, only find directions in sampled space
end
λmax = Inf
λmin = -Inf
for i in eachindex(lbs)
δlower = lbs[i] - current_point[i]
δupper = ubs[i] - current_point[i]
# only consider the step size bound if the direction of travel is non-negligible
if direction[i] < -_constants.tolerance
lower = δupper / direction[i]
upper = δlower / direction[i]
elseif direction[i] > _constants.tolerance
lower = δlower / direction[i]
upper = δupper / direction[i]
else
lower = -Inf
upper = Inf
function _affine_hit_and_run_chain(warmup, lbs, ubs, iters, chain)
points = copy(warmup)
d, n_points = size(points)
result = Matrix{Float64}(undef, size(points, 1), 0)
iter = 0
for iter_target in iters
while iter < iter_target
iter += 1
new_points = copy(points)
for i = 1:n_points
mix = rand(n_points) .+ _constants.tolerance
dir = points * (mix ./ sum(mix)) - points[:, i]
# iteratively collect the maximum and minimum possible multiple
# of `dir` added to the current point
λmax = Inf
λmin = -Inf
for j = 1:d
dl = lbs[j] - points[j, i]
du = ubs[j] - points[j, i]
idir = 1 / dir[j]
if dir[j] < -_constants.tolerance
lower = du * idir
upper = dl * idir
elseif dir[j] > _constants.tolerance
lower = dl * idir
upper = du * idir
else
lower = -Inf
upper = Inf
end
λmin = max(λmin, lower)
λmax = min(λmax, upper)
end
λ = λmin + rand() * (λmax - λmin)
!isfinite(λ) && continue # avoid divergence
new_points[:, i] = points[:, i] .+ λ .* dir
# TODO normally, here we would check if sum(S*new_point) is still
# lower than the tolerance, but we shall trust the computer
# instead.
end
lower > λmin && (λmin = lower) # max min step size that satisfies all bounds
upper < λmax && (λmax = upper) # min max step size that satisfies all bounds
end
if λmax <= λmin || λmin == -Inf || λmax == Inf # this sometimes can happen
# @warn "Infeasible direction at iteration $(n)..." # TODO: make this optional/ add to a logger... very noisy otherwise
continue
end
λ = rand() * (λmax - λmin) + λmin # random step size
current_point .= current_point .+ λ .* direction # will be feasible
if n % keepevery == 0
sample_num += 1
samples[:, sample_num] .= current_point
if sample_num >= samplesize
use_warmup_points = false # once the entire memory vector filled, stop using warm up points
sample_num = 0 # reset, start replacing the older samples
end
use_warmup_points && (samplelength += 1) # lags sample_num because the latter is a flag as well
points = new_points
end
result = hcat(result, points)
end
return samples
result
end
"""
warmup_from_variability(
n_points::Int,
model::MetabolicModel,
optimizer;
modifications = [],
workers = [myid()],
optimizer,
n_points::Int;
kwargs...
)
Generates FVA-like warmup points for samplers, by selecting random points by
minimizing and maximizing reactions. Can not return more than 2 times the
number of reactions in the model.
"""
function warmup_from_variability(n_points::Int, model::MetabolicModel, optimizer; kwargs...)
function warmup_from_variability(model::MetabolicModel,
optimizer,
n_points::Int;
kwargs...)
nr = n_reactions(model)
n_points > 2 * nr && throw(
......@@ -23,36 +25,36 @@ function warmup_from_variability(n_points::Int, model::MetabolicModel, optimizer
sample = shuffle(vcat(1:nr, -(1:nr)))[begin:n_points]
warmup_from_variability(
-filter(x -> x < 0, sample),
filter(x -> x > 0, sample),
model,
optimizer;
optimizer,
-filter(x -> x < 0, sample),
filter(x -> x > 0, sample);
kwargs...,
)
end
"""
warmup_from_variability(
min_reactions::Vector{Int},
max_reactions::Vector{Int},
function warmup_from_variability(
model::MetabolicModel,
optimizer;
modifications=[],
workers::Vector{Int}=[myid()]
)::Matrix{Float64}
optimizer,
min_reactions::Vector{Int}=1:n_reactions(model),
max_reactions::Vector{Int}=1:n_reactions(model);
modifications = [],
workers::Vector{Int} = [myid()],
)::Tuple{Matrix{Float64}, Vector{Float64}, Vector{Float64}}
Generate FVA-like warmup points for samplers, by minimizing and maximizing the
specified reactions. The result is returned as a matrix, each point occupies as
single column in the result.
"""
function warmup_from_variability(
min_reactions::Vector{Int},
max_reactions::Vector{Int},
model::MetabolicModel,
optimizer;
optimizer,
min_reactions::AbstractVector{Int} = 1:n_reactions(model),
max_reactions::AbstractVector{Int} = 1:n_reactions(model);
modifications = [],
workers::Vector{Int} = [myid()],
)::Matrix{Float64}
)::Tuple{Matrix{Float64},Vector{Float64},Vector{Float64}}
# create optimization problem at workers, apply modifications
save_model = :(
......@@ -80,8 +82,14 @@ function warmup_from_variability(
)...,
)
# snatch the bounds from whatever worker is around
lbs, ubs = get_val_from(
workers[1],
:($COBREXA.get_bound_vectors(cobrexa_sampling_warmup_optmodel)),
)
# free the data on workers
map(fetch, remove_from.(workers, :cobrexa_sampling_warmup_optmodel))
return fluxes
return fluxes, lbs, ubs
end
......@@ -65,17 +65,8 @@ end
Returns vectors of the lower and upper bounds of `opt_model` constraints, where
`opt_model` is a JuMP model constructed by e.g.
[`make_optimization_model`](@ref) or [`flux_balance_analysis`](@ref).
"""
function get_bound_vectors(opt_model)
lbconref = opt_model[:lbs]
ubconref = opt_model[:ubs]
lbs = zeros(length(lbconref))
for i in eachindex(lbs)
lbs[i] = -normalized_rhs(lbconref[i])
end
ubs = [normalized_rhs(ubconref[i]) for i in eachindex(ubconref)]
return lbs, ubs
end
get_bound_vectors(opt_model) = (
[-normalized_rhs(lb) for lb in opt_model[:lbs]],
[normalized_rhs(ub) for ub in opt_model[:ubs]],
)
......@@ -7,36 +7,21 @@
model = load_model(StandardModel, model_path)
# Serial test
chains = hit_and_run(
model,
Tulip.Optimizer;
N = 10_000,
nchains = 3,
samplesize = 1000,
modifications = [
change_constraint("EX_glc__D_e", -10, -10),
change_constraint("BIOMASS_Ecoli_core_w_GAM", 0.8, 0.8),
],
workerids = W,
)
# Do not test for convergence, that requires too many samples
@test isapprox(mean(chains[[:PFL]]).nt.mean[1], 0.8, atol = 1.0)
warmup, lbs, ubs = warmup_from_variability(model, Tulip.Optimizer; workers = W)
# parallel tests (still broken, not sure why)
chainsparallel = hit_and_run(
model,
Tulip.Optimizer;
N = 10_000,
nchains = 3,
samplesize = 1000,
modifications = [
change_constraint("EX_glc__D_e", -10, -10),
change_constraint("BIOMASS_Ecoli_core_w_GAM", 0.8, 0.8),
],
workerids = W,
samples = affine_hit_and_run(
warmup,
lbs,
ubs;
sample_iters = 10 * (1:3),
workers = W,
chains = length(W),
)
@test isapprox(mean(chainsparallel[[:PFL]]).nt.mean[1], 0.8, atol = 1.0)
@test size(samples, 1) == size(warmup, 1)
@test size(samples, 2) == size(warmup, 2) * 3 * length(W)
@test all(samples .>= lbs)
@test all(samples .<= ubs)
@test all(stoichiometry(model) * samples .< TEST_TOLERANCE)
end
......@@ -6,10 +6,10 @@
)
model = load_model(StandardModel, model_path)
pts = warmup_from_variability(
100,
pts, lbs, ubs = warmup_from_variability(
model,
Tulip.Optimizer;
Tulip.Optimizer,
100;
modifications = [change_constraint("EX_glc__D_e", -2, 2)],
workers = W,
)
......@@ -18,4 +18,6 @@
@test size(pts) == (95, 100)
@test all(pts[idx, :] .>= -2)
@test all(pts[idx, :] .<= 2)
@test lbs[idx] == -2
@test ubs[idx] == 2
end
......@@ -12,7 +12,6 @@ using SHA
using SparseArrays
using Statistics
using Tulip
using MCMCChains
# tolerance for comparing analysis results (should be a bit bigger than the
# error tolerance in computations)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment