Skip to content
Snippets Groups Projects
Unverified Commit a43ad19a authored by Miroslav Kratochvil's avatar Miroslav Kratochvil :bicyclist:
Browse files

sampling seems to work OK now

parent 0012e659
No related branches found
No related tags found
No related merge requests found
...@@ -11,7 +11,6 @@ JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" ...@@ -11,7 +11,6 @@ JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
JuMP = "4076af6c-e467-56ae-b986-b466b2749572" JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MAT = "23992714-dd62-5051-b70f-ba57cb901cac" MAT = "23992714-dd62-5051-b70f-ba57cb901cac"
MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d"
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
......
...@@ -13,7 +13,6 @@ using Random ...@@ -13,7 +13,6 @@ using Random
using Serialization using Serialization
using SparseArrays using SparseArrays
using Statistics using Statistics
using MCMCChains
import Base: findfirst, getindex, show import Base: findfirst, getindex, show
import Pkg import Pkg
......
""" """
hit_and_run( TODO
model,
optimizer;
modifications = [],
N = 1000,
keepevery = _constants.sampling_keep_iters,
samplesize = _constants.sampling_size,
warmup_indices = collect(1:n_reactions(model)),
workerids = [myid()],
nchains = 1,
)
Perform basic hit and run sampling for `N` iterations on `model` using `optimizer` to Perform basic hit and run sampling for `N` iterations on `model` using `optimizer` to
generate the warmup points. Here warmup points are generated by iteratively generate the warmup points. Here warmup points are generated by iteratively
...@@ -64,119 +54,93 @@ chains = hit_and_run( ...@@ -64,119 +54,93 @@ chains = hit_and_run(
) )
``` ```
""" """
function hit_and_run( function affine_hit_and_run(
model, warmup_points::Matrix{Float64},
optimizer; lbs::Vector{Float64},
modifications = [], ubs::Vector{Float64};
N = 150_000, sample_iters = 100 .* (1:5),
keepevery = _constants.sampling_keep_iters, workers = [myid()],
samplesize = _constants.sampling_size, chains = length(workers),
warmup_indices = collect(1:n_reactions(model)),
workerids = [myid()],
nchains = 1,
) )
# get warmup points in parallel, similar to FVA # distribute starting data to workers
ws, lbs, ubs = warmup( save_at.(workers, :cobrexa_hit_and_run_data, Ref((warmup_points, lbs, ubs)))
model,
optimizer; # sample all chains
modifications = modifications, samples = hcat(
workerids = workerids, # parallel dpmap(
warmup_points = warmup_indices, chain -> :($COBREXA._affine_hit_and_run_chain(
) cobrexa_hit_and_run_data...,
$sample_iters,
# load warmup points to workers $chain,
save_at.(workerids, :cobrexa_ws, Ref(:($ws))) )),
save_at.(workerids, :cobrexa_lbs, Ref(:($lbs))) CachingPool(workers),
save_at.(workerids, :cobrexa_ubs, Ref(:($ubs))) 1:chains,
)...,
# sample in parallel!
samples = dpmap(
x -> :($COBREXA._serial_hit_and_run(
cobrexa_ws,
cobrexa_lbs,
cobrexa_ubs,
$samplesize,
$keepevery,
$N,
)),
CachingPool(workerids),
1:nchains,
) )
# remove warmup points from workers # remove warmup points from workers
map(fetch, remove_from.(workerids, :cobrexa_ws)) map(fetch, remove_from.(workers, :cobrexa_hit_and_run_data))
map(fetch, remove_from.(workerids, :cobrexa_lbs))
map(fetch, remove_from.(workerids, :cobrexa_ubs))
# not sure how to do this better - cat/vcat doesn't work, oh well
vals = zeros(samplesize, length(lbs), nchains)
for c = 1:nchains
vals[:, :, c] = samples[c]'
end
chains = Chains(vals, reactions(model))
return chains return samples
end end
function _serial_hit_and_run(ws, lbs, ubs, samplesize, keepevery, N) function _affine_hit_and_run_chain(warmup, lbs, ubs, iters, chain)
samples = zeros(length(lbs), samplesize) # sample storage
current_point = zeros(length(lbs)) points = copy(warmup)
current_point .= ws[1, 1] # just use the first warmup point, randomness is introduced later d, n_points = size(points)
direction = zeros(length(lbs)) result = Matrix{Float64}(undef, size(points, 1), 0)
sample_num = 0 iter = 0
samplelength = 0
use_warmup_points = true for iter_target in iters
for n = 1:N
while iter < iter_target
if use_warmup_points iter += 1
i = rand(1:size(ws, 1))
j = rand(1:size(ws, 2)) new_points = copy(points)
direction .= ws[i, j] - current_point # use warmup points to find direction in warmup phase
else for i = 1:n_points
direction .= samples[:, rand(1:(samplelength))] - current_point # after warmup phase, only find directions in sampled space
end mix = rand(n_points) .+ _constants.tolerance
dir = points * (mix ./ sum(mix)) - points[:, i]
λmax = Inf
λmin = -Inf # iteratively collect the maximum and minimum possible multiple
for i in eachindex(lbs) # of `dir` added to the current point
δlower = lbs[i] - current_point[i] λmax = Inf
δupper = ubs[i] - current_point[i] λmin = -Inf
# only consider the step size bound if the direction of travel is non-negligible for j = 1:d
if direction[i] < -_constants.tolerance dl = lbs[j] - points[j, i]
lower = δupper / direction[i] du = ubs[j] - points[j, i]
upper = δlower / direction[i] idir = 1 / dir[j]
elseif direction[i] > _constants.tolerance if dir[j] < -_constants.tolerance
lower = δlower / direction[i] lower = du * idir
upper = δupper / direction[i] upper = dl * idir
else elseif dir[j] > _constants.tolerance
lower = -Inf lower = dl * idir
upper = Inf upper = du * idir
else
lower = -Inf
upper = Inf
end
λmin = max(λmin, lower)
λmax = min(λmax, upper)
end
λ = λmin + rand() * (λmax - λmin)
!isfinite(λ) && continue # avoid divergence
new_points[:, i] = points[:, i] .+ λ .* dir
# TODO normally, here we would check if sum(S*new_point) is still
# lower than the tolerance, but we shall trust the computer
# instead.
end end
lower > λmin && (λmin = lower) # max min step size that satisfies all bounds
upper < λmax && (λmax = upper) # min max step size that satisfies all bounds
end
if λmax <= λmin || λmin == -Inf || λmax == Inf # this sometimes can happen points = new_points
# @warn "Infeasible direction at iteration $(n)..." # TODO: make this optional/ add to a logger... very noisy otherwise
continue
end
λ = rand() * (λmax - λmin) + λmin # random step size
current_point .= current_point .+ λ .* direction # will be feasible
if n % keepevery == 0
sample_num += 1
samples[:, sample_num] .= current_point
if sample_num >= samplesize
use_warmup_points = false # once the entire memory vector filled, stop using warm up points
sample_num = 0 # reset, start replacing the older samples
end
use_warmup_points && (samplelength += 1) # lags sample_num because the latter is a flag as well
end end
result = hcat(result, points)
end end
return samples result
end end
""" """
warmup_from_variability( warmup_from_variability(
n_points::Int,
model::MetabolicModel, model::MetabolicModel,
optimizer; optimizer,
modifications = [], n_points::Int;
workers = [myid()], kwargs...
) )
Generates FVA-like warmup points for samplers, by selecting random points by Generates FVA-like warmup points for samplers, by selecting random points by
minimizing and maximizing reactions. Can not return more than 2 times the minimizing and maximizing reactions. Can not return more than 2 times the
number of reactions in the model. number of reactions in the model.
""" """
function warmup_from_variability(n_points::Int, model::MetabolicModel, optimizer; kwargs...) function warmup_from_variability(model::MetabolicModel,
optimizer,
n_points::Int;
kwargs...)
nr = n_reactions(model) nr = n_reactions(model)
n_points > 2 * nr && throw( n_points > 2 * nr && throw(
...@@ -23,36 +25,36 @@ function warmup_from_variability(n_points::Int, model::MetabolicModel, optimizer ...@@ -23,36 +25,36 @@ function warmup_from_variability(n_points::Int, model::MetabolicModel, optimizer
sample = shuffle(vcat(1:nr, -(1:nr)))[begin:n_points] sample = shuffle(vcat(1:nr, -(1:nr)))[begin:n_points]
warmup_from_variability( warmup_from_variability(
-filter(x -> x < 0, sample),
filter(x -> x > 0, sample),
model, model,
optimizer; optimizer,
-filter(x -> x < 0, sample),
filter(x -> x > 0, sample);
kwargs..., kwargs...,
) )
end end
""" """
warmup_from_variability( function warmup_from_variability(
min_reactions::Vector{Int},
max_reactions::Vector{Int},
model::MetabolicModel, model::MetabolicModel,
optimizer; optimizer,
modifications=[], min_reactions::Vector{Int}=1:n_reactions(model),
workers::Vector{Int}=[myid()] max_reactions::Vector{Int}=1:n_reactions(model);
)::Matrix{Float64} modifications = [],
workers::Vector{Int} = [myid()],
)::Tuple{Matrix{Float64}, Vector{Float64}, Vector{Float64}}
Generate FVA-like warmup points for samplers, by minimizing and maximizing the Generate FVA-like warmup points for samplers, by minimizing and maximizing the
specified reactions. The result is returned as a matrix, each point occupies as specified reactions. The result is returned as a matrix, each point occupies as
single column in the result. single column in the result.
""" """
function warmup_from_variability( function warmup_from_variability(
min_reactions::Vector{Int},
max_reactions::Vector{Int},
model::MetabolicModel, model::MetabolicModel,
optimizer; optimizer,
min_reactions::AbstractVector{Int} = 1:n_reactions(model),
max_reactions::AbstractVector{Int} = 1:n_reactions(model);
modifications = [], modifications = [],
workers::Vector{Int} = [myid()], workers::Vector{Int} = [myid()],
)::Matrix{Float64} )::Tuple{Matrix{Float64},Vector{Float64},Vector{Float64}}
# create optimization problem at workers, apply modifications # create optimization problem at workers, apply modifications
save_model = :( save_model = :(
...@@ -80,8 +82,14 @@ function warmup_from_variability( ...@@ -80,8 +82,14 @@ function warmup_from_variability(
)..., )...,
) )
# snatch the bounds from whatever worker is around
lbs, ubs = get_val_from(
workers[1],
:($COBREXA.get_bound_vectors(cobrexa_sampling_warmup_optmodel)),
)
# free the data on workers # free the data on workers
map(fetch, remove_from.(workers, :cobrexa_sampling_warmup_optmodel)) map(fetch, remove_from.(workers, :cobrexa_sampling_warmup_optmodel))
return fluxes return fluxes, lbs, ubs
end end
...@@ -65,17 +65,8 @@ end ...@@ -65,17 +65,8 @@ end
Returns vectors of the lower and upper bounds of `opt_model` constraints, where Returns vectors of the lower and upper bounds of `opt_model` constraints, where
`opt_model` is a JuMP model constructed by e.g. `opt_model` is a JuMP model constructed by e.g.
[`make_optimization_model`](@ref) or [`flux_balance_analysis`](@ref). [`make_optimization_model`](@ref) or [`flux_balance_analysis`](@ref).
""" """
function get_bound_vectors(opt_model) get_bound_vectors(opt_model) = (
lbconref = opt_model[:lbs] [-normalized_rhs(lb) for lb in opt_model[:lbs]],
ubconref = opt_model[:ubs] [normalized_rhs(ub) for ub in opt_model[:ubs]],
lbs = zeros(length(lbconref)) )
for i in eachindex(lbs)
lbs[i] = -normalized_rhs(lbconref[i])
end
ubs = [normalized_rhs(ubconref[i]) for i in eachindex(ubconref)]
return lbs, ubs
end
...@@ -7,36 +7,21 @@ ...@@ -7,36 +7,21 @@
model = load_model(StandardModel, model_path) model = load_model(StandardModel, model_path)
# Serial test warmup, lbs, ubs = warmup_from_variability(model, Tulip.Optimizer; workers = W)
chains = hit_and_run(
model,
Tulip.Optimizer;
N = 10_000,
nchains = 3,
samplesize = 1000,
modifications = [
change_constraint("EX_glc__D_e", -10, -10),
change_constraint("BIOMASS_Ecoli_core_w_GAM", 0.8, 0.8),
],
workerids = W,
)
# Do not test for convergence, that requires too many samples
@test isapprox(mean(chains[[:PFL]]).nt.mean[1], 0.8, atol = 1.0)
# parallel tests (still broken, not sure why) samples = affine_hit_and_run(
chainsparallel = hit_and_run( warmup,
model, lbs,
Tulip.Optimizer; ubs;
N = 10_000, sample_iters = 10 * (1:3),
nchains = 3, workers = W,
samplesize = 1000, chains = length(W),
modifications = [
change_constraint("EX_glc__D_e", -10, -10),
change_constraint("BIOMASS_Ecoli_core_w_GAM", 0.8, 0.8),
],
workerids = W,
) )
@test isapprox(mean(chainsparallel[[:PFL]]).nt.mean[1], 0.8, atol = 1.0) @test size(samples, 1) == size(warmup, 1)
@test size(samples, 2) == size(warmup, 2) * 3 * length(W)
@test all(samples .>= lbs)
@test all(samples .<= ubs)
@test all(stoichiometry(model) * samples .< TEST_TOLERANCE)
end end
...@@ -6,10 +6,10 @@ ...@@ -6,10 +6,10 @@
) )
model = load_model(StandardModel, model_path) model = load_model(StandardModel, model_path)
pts = warmup_from_variability( pts, lbs, ubs = warmup_from_variability(
100,
model, model,
Tulip.Optimizer; Tulip.Optimizer,
100;
modifications = [change_constraint("EX_glc__D_e", -2, 2)], modifications = [change_constraint("EX_glc__D_e", -2, 2)],
workers = W, workers = W,
) )
...@@ -18,4 +18,6 @@ ...@@ -18,4 +18,6 @@
@test size(pts) == (95, 100) @test size(pts) == (95, 100)
@test all(pts[idx, :] .>= -2) @test all(pts[idx, :] .>= -2)
@test all(pts[idx, :] .<= 2) @test all(pts[idx, :] .<= 2)
@test lbs[idx] == -2
@test ubs[idx] == 2
end end
...@@ -12,7 +12,6 @@ using SHA ...@@ -12,7 +12,6 @@ using SHA
using SparseArrays using SparseArrays
using Statistics using Statistics
using Tulip using Tulip
using MCMCChains
# tolerance for comparing analysis results (should be a bit bigger than the # tolerance for comparing analysis results (should be a bit bigger than the
# error tolerance in computations) # error tolerance in computations)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment