Commit 605d8d1d authored by Miroslav Kratochvil's avatar Miroslav Kratochvil
Browse files

clean up the gapfilling code

parent f8cd84a3
Pipeline #56576 passed with stages
in 12 minutes and 1 second
...@@ -19,16 +19,18 @@ function make_optimization_model(model::MetabolicModel, optimizer; sense = MAX_S ...@@ -19,16 +19,18 @@ function make_optimization_model(model::MetabolicModel, optimizer; sense = MAX_S
xl, xu = bounds(model) xl, xu = bounds(model)
optimization_model = Model(optimizer) optimization_model = Model(optimizer)
@variable(optimization_model, x[i = 1:n]) @variable(optimization_model, x[1:n])
@objective(optimization_model, sense, objective(model)' * x) @objective(optimization_model, sense, objective(model)' * x)
@constraint(optimization_model, mb, stoichiometry(model) * x .== balance(model)) # mass balance @constraint(optimization_model, mb, stoichiometry(model) * x .== balance(model)) # mass balance
@constraint(optimization_model, lbs, xl .<= x) # lower bounds @constraint(optimization_model, lbs, xl .<= x) # lower bounds
@constraint(optimization_model, ubs, x .<= xu) # upper bounds @constraint(optimization_model, ubs, x .<= xu) # upper bounds
C = coupling(model) # empty if no coupling C = coupling(model) # empty if no coupling
cl, cu = coupling_bounds(model) isempty(C) || begin
isempty(C) || @constraint(optimization_model, c_lbs, cl .<= C * x) # coupling lower bounds cl, cu = coupling_bounds(model)
isempty(C) || @constraint(optimization_model, c_ubs, C * x .<= cu) # coupling upper bounds @constraint(optimization_model, c_lbs, cl .<= C * x) # coupling lower bounds
@constraint(optimization_model, c_ubs, C * x .<= cu) # coupling upper bounds
end
return optimization_model return optimization_model
end end
......
""" """
gapfill_minimum_reactions( function gapfill_minimum_reactions(
model::MetabolicModel, model::MetabolicModel,
universal_reactions::Vector{Reaction}, universal_reactions::Vector{Reaction},
objective_lower_bound::Float64,
optimizer; optimizer;
modifications=[], objective_bounds = (_constants.tolerance, _constants.default_reaction_bound),
maximum_new_reactions = 5,
weights = fill(1.0, length(universal_reactions)), weights = fill(1.0, length(universal_reactions)),
objective_upper_bound = COBREXA._constants.default_reaction_bound, modifications = [],
ignore_reactions = [],
max_gaps_fillable = 1000_000,
) )
Return the indices of reactions in `universal_reactions` that should be added to Find a minimal set of reactions from `universal_reactions` that should be added
`model` so that the model can carry flux through its objective function, which to `model` so that the model has a feasible solution with bounds on its
is bounded by `objective_lower_bound`. Optionally, specify `weights` that can be objective function given in `objective_bounds`. Weights of the added reactions
used to bias the reactions found through solving the underlying mixed integer may be specified in `weights` to prefer adding reactions with lower weights.
program (MILP). Also, some reactions in `universal_reactions` can be ignored by
specifying their ids in `ignore_reactions`, this is useful to, e.g., restrict Internally, this builds and solves a mixed integer program, following
which exchanges can be added. Finally, the limit the search space, it is the method of Reed et al. (Reed, Jennifer L., et al. "Systems approach to
possible to specify the maximum number of gaps that can be filled through refining genome annotation." *Proceedings of the National Academy of Sciences*
`max_gaps_fillable`. (2006)).
This gap filling algorithm is based on the one introduced in *Reed, Jennifer L., The function returns a solved JuMP optimization model, with the boolean
et al. "Systems approach to refining genome annotation." Proceedings of the reaction inclusion indicators in variable vector `y`. Use
National Academy of Sciences (2006)*. Briefly, the algorithm find the smallest [`mask`](@ref) or [`gapfilled_rids`](@ref) to collect the reaction
number of reactions to add by solving the MILP: information in Julia datatypes.
```
min ∑ wᵢ * yᵢ To reduce the uncertainty in the MILP solver (and likely reduce the
s.t. S * x = 0 complexity), you may put a limit on the size of the added reaction set in
xₗ ≤ x ≤ xᵤ ∀ model reactions `maximum_new_reactions`.
y * xₗ ≤ x ≤ y * xᵤ ∀ universal reactions
lb ≤ objective(x) ≤ ub
y ∈ {0, 1}
```
where `w` is the set of optional `weights`, `x` the fluxes, and `y` the indicator
variables.
""" """
function gapfill_minimum_reactions( function gapfill_minimum_reactions(
model::MetabolicModel, model::MetabolicModel,
universal_reactions::Vector{Reaction}, universal_reactions::Vector{Reaction},
objective_lower_bound::Float64,
optimizer; optimizer;
modifications = [], objective_bounds = (_constants.tolerance, _constants.default_reaction_bound),
maximum_new_reactions = length(universal_reactions),
weights = fill(1.0, length(universal_reactions)), weights = fill(1.0, length(universal_reactions)),
objective_upper_bound = COBREXA._constants.default_reaction_bound, modifications = [],
ignore_reactions = [],
max_gaps_fillable = _constants.max_gaps_fillable,
) )
# constraints from model to be gap filled precache!(model)
S_model = stoichiometry(model)
metabolite_id_order = metabolites(model)
# constraints from universal reactions that can fill gaps # constraints from universal reactions that can fill gaps
n_universal_reactions = length(universal_reactions) univs = _universal_stoichiometry(universal_reactions, metabolites(model))
S_universal, lbs_universal, ubs_universal = COBREXA._universal_stoichiometry(
universal_reactions, # add space for additional metabolites and glue with the universal reaction
metabolite_id_order; # stoichiometry
ignore_reactions, extended_stoichiometry = [[
) stoichiometry(model)
spzeros(length(univs.new_mids), n_reactions(model))
# adjust the model stoichiometric matrix to account for additional metabolites if necessary ] univs.stoichiometry]
S = [
S_model # make the model anew (we can't really use make_optimization_model because
spzeros(size(S_universal, 1) - size(S_model, 1), size(S_model, 2)) # we need the balances and several other things completely removed. Could
] # be solved either by parametrizing make_optimization_model or by making a
# adjust the balance to account for additional metabolites # tiny temporary wrapper for this.
bal = [ # keep this in sync with src/base/solver.jl, except for adding balances.
balance(model) opt_model = Model(optimizer)
spzeros(size(S_universal, 1) - size(S_model, 1)) @variable(opt_model, x[1:n_reactions(model)])
] xl, xu = bounds(model)
@constraint(opt_model, lbs, xl .<= x)
#= @constraint(opt_model, ubs, x .<= xu)
First build standard flux balance type optimization problem, then add
specific details of the gap filling algorithm, e.g. indicator constraints, etc. C = coupling(model)
=# isempty(C) || begin
opt_model = make_optimization_model(model, optimizer; sense = COBREXA.MIN_SENSE) cl, cu = coupling_bounds(model)
delete(opt_model, opt_model[:mb]) # need to remove mass balances @constraint(opt_model, c_lbs, cl .<= C * x)
unregister(opt_model, :mb) # will re-use symbol @constraint(opt_model, c_ubs, C * x .<= cu)
end
@variable(opt_model, z[1:n_universal_reactions]) # fluxes from universal reactions # add the variables for new stuff
@variable(opt_model, y[1:n_universal_reactions], Bin) # indicators @variable(opt_model, ux[1:length(universal_reactions)]) # fluxes from universal reactions
@variable(opt_model, y[1:length(universal_reactions)], Bin) # indicators
# objective bounds # combined metabolite balances
@constraint( @constraint(
opt_model, opt_model,
obj_bounds, extended_stoichiometry * [x; ux] .==
objective_lower_bound <= objective(model)' * opt_model[:x] <= objective_upper_bound [balance(model); zeros(length(univs.new_mids))]
) )
# objective bounds
@constraint(opt_model, objective_bounds[1] <= objective(model)' * x)
@constraint(opt_model, objective_bounds[2] >= objective(model)' * x)
# flux bounds of universal reactions with indicators # flux bounds of universal reactions with indicators
@constraint(opt_model, lbs_universal, lbs_universal .* y .<= z) @constraint(opt_model, ulb, univs.lbs .* y .<= ux)
@constraint(opt_model, ubs_universal, z .<= ubs_universal .* y) @constraint(opt_model, uub, univs.ubs .* y .>= ux)
# combined mass balances # minimize the total number of indicated reactions
@constraint(opt_model, mb, S * opt_model[:x] + S_universal * z .== bal) # mass balance of all reactions @objective(opt_model, Min, weights' * y)
# constrain the maximum number of gaps that can be filled # limit the number of indicated reactions
@constrain(opt_model, max_gaps, sum(y) <= max_gaps_fillable) # (prevents the solver from exploring too far)
@constraint(opt_model, sum(y) <= maximum_new_reactions)
# make new objective
@objective(opt_model, Min, sum(weights .* y))
# apply all modifications
for mod in modifications for mod in modifications
mod(opt_model, model) mod(opt_model, model)
end end
optimize!(opt_model) optimize!(opt_model)
findall(value.(y) .> 0) return opt_model
end end
"""
gapfilled_mask(opt_model::BitVector)
Get a `BitVector` of added reactions from the model solved by
[`gapfill_minimum_reactions`](@ref). The bit indexes correspond to the indexes
of `universal_reactions` given to the gapfilling function. In case the model is
not solved, this returns `nothing`.
# Example
gapfill_minimum_reactions(myModel, myReactions, Tulip.Optimizer) |> gapfilled_mask
"""
gapfilled_mask(opt_model)::BitVector =
is_solved(opt_model) ? value.(opt_model[:y]) .> 0 : nothing
"""
gapfilled_rids(opt_model, universal_reactions::Vector{Reaction})::Vector{String}
Utility to extract a short vector of IDs of the reactions added by the
gapfilling algorithm. Use with `opt_model` returned from
[`gapfill_minimum_reactions`](@ref).
"""
gapfilled_rids(opt_model, universal_reactions::Vector{Reaction}) =
let v = gapfilled_mask(opt_model)
isnothing(v) ? nothing : [rxn.id for rxn in universal_reactions[v]]
end
"""
gapfilled_rids(universal_reactions::Vector{Reaction})
Overload of [`gapfilled_rids`](@ref) that can be piped easily.
# Example
gapfill_minimum_reactions(myModel, myReactions, Tulip.Optimizer) |> gapfilled_rids(myReactions)
"""
gapfilled_rids(universal_reactions::Vector{Reaction}) =
opt_model -> gapfilled_rids(opt_model, universal_reactions)
""" """
_universal_stoichiometry( _universal_stoichiometry(
universal_reactions::Vector{Reaction}, universal_reactions::Vector{Reaction},
metabolite_id_order, mids,
) )
A helper function that constructs the stoichiometric matrix of a set of A helper function that constructs the stoichiometric matrix of a set of
`universal_reactions`. The order of the metabolites is determined with `universal_reactions`. The order of the metabolites is determined with
`metabolite_id_order`, so that this stoichiometric matrix can be combined with `mids`, so that this stoichiometric matrix can be combined with
another one. another one.
""" """
function _universal_stoichiometry( function _universal_stoichiometry(urxns::Vector{Reaction}, mids::Vector{String})
universal_reactions::Vector{Reaction},
metabolite_id_order; # traversal over all elements in stoichiometry of universal_reactions
ignore_reactions = [], stoiMap(f) = [
) f(ridx, mid, stoi) for (ridx, rxn) in enumerate(urxns) for
rows = Int[] (mid, stoi) in rxn.metabolites
cols = Int[] ]
vals = Float64[]
lbs = zeros(length(universal_reactions))
ubs = zeros(length(universal_reactions))
met_id_order_lu = Dict(zip(metabolite_id_order, 1:length(metabolite_id_order)))
n_midxs = length(met_id_order_lu) # account for metabolites already in model
n_cols = 0 # counter for filtered reactions
for (col, rxn) in
enumerate(filter(x -> !in(x.id, ignore_reactions), universal_reactions))
n_cols += 1
for (mid, stoich) in rxn.metabolites
if !haskey(met_id_order_lu, mid)
n_midxs += 1
met_id_order_lu[mid] = n_midxs
end
push!(rows, met_id_order_lu[mid])
push!(cols, col)
push!(vals, stoich)
end
lbs[col] = rxn.lb
ubs[col] = rxn.ub
end
return sparse(rows, cols, vals, n_midxs, n_cols), lbs, ubs # make an index and find new metabolites
met_id_lookup = Dict(mids .=> eachindex(mids))
new_mids =
collect(Set(filter(x -> !haskey(met_id_lookup, x), stoiMap((_, mid, _) -> mid))))
all_mids = vcat(mids, new_mids)
# remake the index with all metabolites
met_id_lookup = Dict(all_mids .=> eachindex(all_mids))
# build the result
return (
stoichiometry = float.(
sparse(
stoiMap((_, mid, _) -> met_id_lookup[mid]),
stoiMap((ridx, _, _) -> ridx),
stoiMap((_, _, stoi) -> stoi),
length(all_mids),
length(urxns),
),
),
lbs = [rxn.lb for rxn in urxns],
ubs = [rxn.ub for rxn in urxns],
new_mids = new_mids,
)
end end
...@@ -2,15 +2,9 @@ ...@@ -2,15 +2,9 @@
#= #=
Implement the small model that should be gapfilled. Implement the small model that should be gapfilled.
=# =#
model = StandardModel("partialmodel") model = StandardModel("partial model")
m1 = Metabolite("m1")
m2 = Metabolite("m2") (m1, m2, m3, m4, m5, m6, m7, m8) = Metabolite.("m$i" for i = 1:8)
m3 = Metabolite("m3")
m4 = Metabolite("m4")
m5 = Metabolite("m5")
m6 = Metabolite("m6")
m7 = Metabolite("m7")
m8 = Metabolite("m8")
@add_reactions! model begin @add_reactions! model begin
"r1", nothing m1, 0, 1 "r1", nothing m1, 0, 1
...@@ -38,18 +32,16 @@ ...@@ -38,18 +32,16 @@
rB = Reaction("rB", Dict("m2" => -1, "m9" => 1), :forward) rB = Reaction("rB", Dict("m2" => -1, "m9" => 1), :forward)
rC = Reaction("rC", Dict("m9" => -1, "m10" => 1), :bidirectional) rC = Reaction("rC", Dict("m9" => -1, "m10" => 1), :bidirectional)
rD = Reaction("rC", Dict("m10" => -1), :reverse) rD = Reaction("rC", Dict("m10" => -1), :reverse)
rE = Reaction("rE", Dict("m2" => -1, "m7" => 2, "m6" => 2), :forward)
universal_reactions = [r5, r7, r10, rA, rB, rC, rD] universal_reactions = [r5, r7, r10, rA, rB, rC, rD]
optimizer = GLPK.Optimizer
rxns = gapfill_minimum_reactions( rxns =
model, gapfill_minimum_reactions(
universal_reactions, model,
0.1, universal_reactions,
optimizer; GLPK.Optimizer;
ignore_reactions = ["rE"], objective_bounds = (0.1, 1000.0),
) ) |> gapfilled_rids(universal_reactions)
@test 2 in rxns
@test 3 in rxns @test issetequal(["r7", "r10"], rxns)
@test length(rxns) == 2
end end
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment