Unverified Commit 418186b1 authored by St. Elmo's avatar St. Elmo Committed by GitHub
Browse files

Merge pull request #586 from LCSB-BioCore/mo-enzyme-constrained

Implement GECKO and SMOMENT
parents 0869d36b 38dbbed1
Pipeline #55370 passed with stages
in 19 minutes and 49 seconds
......@@ -32,6 +32,7 @@ _inc_all.(
joinpath("base", "logging"),
joinpath("base", "macros"),
joinpath("base", "types"),
joinpath("base", "types", "wrappers"),
"base",
"io",
joinpath("io", "show"),
......
......@@ -70,7 +70,6 @@ biomass_reaction_id = findfirst(model.reactions, "BIOMASS_Ecoli_core_w_GAM")
modified_solution = flux_balance_analysis(model, GLPK.optimizer;
modifications=[change_objective(biomass_reaction_id)])
```
"""
function flux_balance_analysis(
model::M,
......
"""
make_gecko_model(
model::MetabolicModel;
reaction_isozymes::Union{Function,Dict{String,Vector{Isozyme}}}
gene_product_bounds::Union{Function,Dict{String,Tuple{Float64,Float64}}},
gene_product_molar_mass::Union{Function,Dict{String,Float64}},
gene_product_mass_group::Union{Function,Dict{String,String}} = _ -> "uncategorized",
gene_product_mass_group_bound::Union{Function,Dict{String,Float64}},
)
Wrap a model into a [`GeckoModel`](@ref), following the structure given by
GECKO algorithm (see [`GeckoModel`](@ref) documentation for details).
# Arguments
- `reaction_isozymes` is a function that returns a vector of [`Isozyme`](@ref)s
for each reaction, or empty vector if the reaction is not enzymatic.
- `gene_product_bounds` is a function that returns lower and upper bound for
concentration for a given gene product (specified by the same string gene ID as in
`reaction_isozymes`), as `Tuple{Float64,Float64}`.
- `gene_product_molar_mass` is a function that returns a numeric molar mass of
a given gene product specified by string gene ID.
- `gene_product_mass_group` is a function that returns a string group identifier for a
given gene product, again specified by string gene ID. By default, all gene
products belong to group `"uncategorized"` which is the behavior of original
GECKO.
- `gene_product_mass_group_bound` is a function that returns the maximum mass for a given
mass group.
Alternatively, all function arguments may be also supplied as dictionaries that
provide the same data lookup.
"""
function make_gecko_model(
model::MetabolicModel;
reaction_isozymes::Union{Function,Dict{String,Vector{Isozyme}}},
gene_product_bounds::Union{Function,Dict{String,Tuple{Float64,Float64}}},
gene_product_molar_mass::Union{Function,Dict{String,Float64}},
gene_product_mass_group::Union{Function,Dict{String,String}} = _ -> "uncategorized",
gene_product_mass_group_bound::Union{Function,Dict{String,Float64}},
)
ris_ =
reaction_isozymes isa Function ? reaction_isozymes :
(rid -> get(reaction_isozymes, rid, []))
gpb_ =
gene_product_bounds isa Function ? gene_product_bounds :
(gid -> gene_product_bounds[gid])
gpmm_ =
gene_product_molar_mass isa Function ? gene_product_molar_mass :
(gid -> gene_product_molar_mass[gid])
gmg_ =
gene_product_mass_group isa Function ? gene_product_mass_group :
(gid -> gene_product_mass_group[gid])
gmgb_ =
gene_product_mass_group_bound isa Function ? gene_product_mass_group_bound :
(grp -> gene_product_mass_group_bound[grp])
# ...it would be nicer to have an overload for this, but kwargs can't be used for dispatch
columns = Vector{_gecko_reaction_column}()
coupling_row_reaction = Int[]
coupling_row_gene_product = Int[]
gids = genes(model)
(lbs, ubs) = bounds(model)
rids = reactions(model)
gene_name_lookup = Dict(gids .=> 1:length(gids))
gene_row_lookup = Dict{Int,Int}()
for i = 1:n_reactions(model)
isozymes = ris_(rids[i])
if isempty(isozymes)
push!(columns, _gecko_reaction_column(i, 0, 0, 0, lbs[i], ubs[i], []))
continue
end
# loop over both directions for all isozymes
for (lb, ub, kcatf, dir) in [
(-ubs[i], -lbs[i], i -> i.kcat_reverse, -1),
(lbs[i], ubs[i], i -> i.kcat_forward, 1),
]
# The coefficients in the coupling matrix will be categorized in
# separate rows for negative and positive reactions. Surprisingly,
# we do not need to explicitly remember the bounds, because the
# ones taken from the original model are perfectly okay -- the
# "reverse" direction is unreachable because of individual
# bounds on split reactions, and the "forward" direction is
# properly negated in the reverse case to work nicely with the
# global lower bound.
reaction_coupling_row =
ub > 0 && length(isozymes) > 1 ? begin
push!(coupling_row_reaction, i)
length(coupling_row_reaction)
end : 0
# all isozymes in this direction
for (iidx, isozyme) in enumerate(isozymes)
kcat = kcatf(isozyme)
if ub > 0 && kcat > _constants.tolerance
# prepare the coupling with gene product molar
gene_product_coupling = collect(
begin
gidx = gene_name_lookup[gene]
row_idx = if haskey(gene_row_lookup, gidx)
gene_row_lookup[gidx]
else
push!(coupling_row_gene_product, gidx)
gene_row_lookup[gidx] =
length(coupling_row_gene_product)
end
(row_idx, stoich / kcat)
end for (gene, stoich) in isozyme.gene_product_count if
haskey(gene_name_lookup, gene)
)
# make a new column
push!(
columns,
_gecko_reaction_column(
i,
iidx,
dir,
reaction_coupling_row,
max(lb, 0),
ub,
gene_product_coupling,
),
)
end
end
end
end
# prepare enzyme capacity constraints
mg_gid_lookup = Dict{String,Vector{String}}()
for gid in gids[coupling_row_gene_product]
mg = gmg_(gid)
if haskey(mg_gid_lookup, mg)
push!(mg_gid_lookup[mg], gid)
else
mg_gid_lookup[mg] = [gid]
end
end
coupling_row_mass_group = Vector{_gecko_capacity}()
for (grp, gs) in mg_gid_lookup
idxs = [gene_row_lookup[x] for x in Int.(indexin(gs, gids))]
mms = gpmm_.(gs)
push!(coupling_row_mass_group, _gecko_capacity(grp, idxs, mms, gmgb_(grp)))
end
# create model with dummy objective
gm = GeckoModel(
spzeros(length(columns) + length(coupling_row_gene_product)),
columns,
coupling_row_reaction,
collect(zip(coupling_row_gene_product, gpb_.(gids[coupling_row_gene_product]))),
coupling_row_mass_group,
model,
)
#=
Set objective. This is a separate field because gene products can also be objectives.
This way they can be set as objectives by the user.
=#
gm.objective .= [
_gecko_reaction_column_reactions(gm)' * objective(gm.inner)
spzeros(length(coupling_row_gene_product))
]
return gm
end
"""
make_smoment_model(
model::MetabolicModel;
reaction_isozyme::Union{Function,Dict{String,Isozyme}},
gene_product_molar_mass::Union{Function,Dict{String,Float64}},
total_enzyme_capacity::Float64,
)
Construct a model with a structure given by sMOMENT algorithm; returns a
[`SMomentModel`](@ref) (see the documentation for details).
# Arguments
- `reaction_isozyme` parameter is a function that returns a single
[`Isozyme`](@ref) for each reaction, or `nothing` if the reaction is not
enzymatic. If the reaction has multiple isozymes, use
[`smoment_isozyme_speed`](@ref) to select the fastest one, as recommended by
the sMOMENT paper.
- `gene_product_molar_mass` parameter is a function that returns a molar mass
of each gene product as specified by sMOMENT.
- `total_enzyme_capacity` is the maximum "enzyme capacity" in the model.
Alternatively, all function arguments also accept dictionaries that are used to
provide the same data lookup.
"""
function make_smoment_model(
model::MetabolicModel;
reaction_isozyme::Union{Function,Dict{String,Isozyme}},
gene_product_molar_mass::Union{Function,Dict{String,Float64}},
total_enzyme_capacity::Float64,
)
ris_ =
reaction_isozyme isa Function ? reaction_isozyme :
(rid -> get(reaction_isozyme, rid, nothing))
gpmm_ =
gene_product_molar_mass isa Function ? gene_product_molar_mass :
(gid -> gene_product_molar_mass[gid])
columns = Vector{_smoment_column}()
(lbs, ubs) = bounds(model)
rids = reactions(model)
for i = 1:n_reactions(model)
isozyme = ris_(rids[i])
if isnothing(isozyme)
# non-enzymatic reaction (or a totally ignored one)
push!(columns, _smoment_column(i, 0, lbs[i], ubs[i], 0))
continue
end
mw = sum(gpmm_(gid) * ps for (gid, ps) in isozyme.gene_product_count)
if min(lbs[i], ubs[i]) < 0 && isozyme.kcat_reverse > _constants.tolerance
# reaction can run in reverse
push!(
columns,
_smoment_column(i, -1, max(-ubs[i], 0), -lbs[i], mw / isozyme.kcat_reverse),
)
end
if max(lbs[i], ubs[i]) > 0 && isozyme.kcat_forward > _constants.tolerance
# reaction can run forward
push!(
columns,
_smoment_column(i, 1, max(lbs[i], 0), ubs[i], mw / isozyme.kcat_forward),
)
end
end
return SMomentModel(columns, total_enzyme_capacity, model)
end
......@@ -3,13 +3,15 @@
make_optimization_model(
model::MetabolicModel,
optimizer;
sense = MOI.MAX_SENSE,
sense = MAX_SENSE,
)
Convert `MetabolicModel`s to a JuMP model, place objectives and the equality
constraint.
Here coupling means inequality constraints coupling multiple variables together.
"""
function make_optimization_model(model::MetabolicModel, optimizer; sense = MOI.MAX_SENSE)
function make_optimization_model(model::MetabolicModel, optimizer; sense = MAX_SENSE)
precache!(model)
......@@ -25,8 +27,8 @@ function make_optimization_model(model::MetabolicModel, optimizer; sense = MOI.M
C = coupling(model) # empty if no coupling
cl, cu = coupling_bounds(model)
isempty(C) || @constraint(optimization_model, c_lbs, cl .<= coupling(model) * x) # coupling lower bounds
isempty(C) || @constraint(optimization_model, c_ubs, coupling(model) * x .<= cu) # coupling upper bounds
isempty(C) || @constraint(optimization_model, c_lbs, cl .<= C * x) # coupling lower bounds
isempty(C) || @constraint(optimization_model, c_ubs, C * x .<= cu) # coupling upper bounds
return optimization_model
end
......@@ -83,7 +85,6 @@ function set_optmodel_bound!(
isnothing(ub) || set_normalized_rhs(opt_model[:ubs][vidx], ub)
end
"""
solved_objective_value(opt_model)::Maybe{Float64}
......@@ -122,4 +123,16 @@ flux_dict(model, flux_balance_analysis(model, ...))
"""
flux_dict(model::MetabolicModel, opt_model)::Maybe{Dict{String,Float64}} =
is_solved(opt_model) ?
Dict(reactions(model) .=> reaction_flux(model)' * value.(opt_model[:x])) : nothing
Dict(fluxes(model) .=> reaction_flux(model)' * value.(opt_model[:x])) : nothing
"""
flux_dict(model::MetabolicModel)
A pipeable variant of `flux_dict`.
# Example
```
flux_balance_analysis(model, ...) |> flux_dict(model)
```
"""
flux_dict(model::MetabolicModel) = opt_model -> flux_dict(model, opt_model)
......@@ -7,6 +7,7 @@ id :: String
name :: Maybe{String}
notes :: Dict{String, Vector{String}}
annotation :: Dict{String, Union{Vector{String}, String}}
molar_mass :: Maybe{Float64}
````
"""
mutable struct Gene
......@@ -14,7 +15,13 @@ mutable struct Gene
name::Maybe{String}
notes::Notes
annotations::Annotations
molar_mass::Maybe{Float64}
Gene(id::String = ""; name = nothing, notes = Notes(), annotations = Annotations()) =
new(id, name, notes, annotations)
Gene(
id::String = "";
name = nothing,
notes = Notes(),
annotations = Annotations(),
molar_mass = nothing,
) = new(id, name, notes, annotations, molar_mass)
end
"""
mutable struct Isozyme
Information about isozyme composition and activity.
# Fields
- `gene_product_count :: Dict{String, Int}` assigns each gene product ID its
count in the isozyme complex (which is used to determine the total mass of
the isozyme)
- `kcat_forward`, `kcat_reverse` -- forward and reverse turnover numbers of the
isozyme
````
"""
mutable struct Isozyme
gene_product_count::Dict{String,Int}
kcat_forward::Float64
kcat_reverse::Float64
end
......@@ -89,7 +89,6 @@ function objective(a::MetabolicModel)::SparseVec
_missing_impl_error(objective, (a,))
end
"""
fluxes(a::MetabolicModel)::Vector{String}
......
"""
struct _gecko_reaction_column
A helper type for describing the contents of [`GeckoModel`](@ref)s.
"""
struct _gecko_reaction_column
reaction_idx::Int
isozyme_idx::Int
direction::Int
reaction_coupling_row::Int
lb::Float64
ub::Float64
gene_product_coupling::Vector{Tuple{Int,Float64}}
end
"""
struct _gecko_capacity
A helper struct that contains the gene product capacity terms organized by
the grouping type, e.g. metabolic or membrane groups etc.
"""
struct _gecko_capacity
group_id::String
gene_product_idxs::Vector{Int}
gene_product_molar_masses::Vector{Float64}
group_upper_bound::Float64
end
"""
struct GeckoModel <: ModelWrapper
A model with complex enzyme concentration and capacity bounds, as described in
*Sánchez, Benjamín J., et al. "Improving the phenotype predictions of a yeast
genome-scale metabolic model by incorporating enzymatic constraints." Molecular
systems biology 13.8 (2017): 935.*
Use [`make_gecko_model`](@ref) or [`with_gecko`](@ref) to construct this kind
of model.
The model wraps another "internal" model, and adds following modifications:
- enzymatic reactions with known enzyme information are split into multiple
forward and reverse variants for each isozyme,
- reaction coupling is added to ensure the groups of isozyme reactions obey the
global reaction flux bounds from the original model,
- gene concentrations specified by each reaction and its gene product stoichiometry,
can constrained by the user to reflect measurements, such as
from mass spectrometry,
- additional coupling is added to simulate total masses of different proteins
grouped by type (e.g., membrane-bound and free-floating proteins), which can
be again constrained by the user (this is slightly generalized from original
GECKO algorithm, which only considers a single group of indiscernible
proteins).
The structure contains fields `columns` that describe the contents of the
coupling columns, `coupling_row_reaction`, `coupling_row_gene_product` and
`coupling_row_mass_group` that describe correspondence of the coupling rows to
original model and determine the coupling bounds, and `inner`, which is the
original wrapped model. Note, `objective` is the objective vector of the model,
special care needs to be taken to ensure that its length is `n_reactions(model)
+ n_genes(model)` when the user modifies it, where `model` is the GeckoModel in
question.
Implementation exposes the split reactions (available as `reactions(model)`),
but retains the original "simple" reactions accessible by [`fluxes`](@ref). All
constraints are implemented using [`coupling`](@ref) and
[`coupling_bounds`](@ref), i.e., all virtual metabolites described by GECKO are
purely virtual and do not occur in [`metabolites`](@ref).
"""
struct GeckoModel <: ModelWrapper
objective::SparseVec
columns::Vector{_gecko_reaction_column}
coupling_row_reaction::Vector{Int}
coupling_row_gene_product::Vector{Tuple{Int,Tuple{Float64,Float64}}}
coupling_row_mass_group::Vector{_gecko_capacity}
inner::MetabolicModel
end
unwrap_model(model::GeckoModel) = model.inner
"""
stoichiometry(model::GeckoModel)
Return a stoichiometry of the [`GeckoModel`](@ref). The enzymatic reactions are
split into unidirectional forward and reverse ones, each of which may have
multiple variants per isozyme.
"""
function stoichiometry(model::GeckoModel)
irrevS = stoichiometry(model.inner) * COBREXA._gecko_reaction_column_reactions(model)
enzS = COBREXA._gecko_gene_product_coupling(model)
[
irrevS spzeros(size(irrevS, 1), size(enzS, 1))
-enzS I(size(enzS, 1))
]
end
"""
objective(model::GeckoModel)
Return the objective of the [`GeckoModel`](@ref). Note, the objective is with
respect to the internal variables, i.e. [`reactions(model)`](@ref) and
[`genes(model)`](@ref). To manually set the objective, index into
`model.objective` appropriately, and remember to set the previous coefficients
to zero.
"""
objective(model::GeckoModel) = model.objective
"""
reactions(model::GeckoModel)
Returns the internal reactions in a [`GeckoModel`](@ref) (these may be split
to forward- and reverse-only parts with different isozyme indexes; reactions
IDs are mangled accordingly with suffixes).
"""
reactions(model::GeckoModel) =
let inner_reactions = reactions(model.inner)
[
_gecko_reaction_name(
inner_reactions[col.reaction_idx],
col.direction,
col.isozyme_idx,
) for col in model.columns
]
end
"""
n_reactions(model::GeckoModel)
Returns the number of all irreversible reactions in `model` as well as the
number of gene products that take part in enzymatic reactions.
"""
n_reactions(model::GeckoModel) = length(model.columns)
"""
bounds(model::GeckoModel)
Return variable bounds for [`GeckoModel`](@ref).
"""
function bounds(model::GeckoModel)
lbs = [
[col.lb for col in model.columns]
[lb for (_, (lb, _)) in model.coupling_row_gene_product]
]
ubs = [
[col.ub for col in model.columns]
[ub for (_, (_, ub)) in model.coupling_row_gene_product]
]
(lbs, ubs)
end
"""
reaction_flux(model::GeckoModel)
Get the mapping of the reaction rates in [`GeckoModel`](@ref) to the original
fluxes in the wrapped model.
"""
function reaction_flux(model::GeckoModel)
rxnmat = _gecko_reaction_column_reactions(model)' * reaction_flux(model.inner)
[
rxnmat
spzeros(n_genes(model), size(rxnmat, 2))
]
end
"""
coupling(model::GeckoModel)
Return the coupling of [`GeckoModel`](@ref). That combines the coupling of the
wrapped model, coupling for split (arm) reactions, and the coupling for the total
enzyme capacity.
"""
function coupling(model::GeckoModel)
innerC = coupling(model.inner) * _gecko_reaction_column_reactions(model)
rxnC = _gecko_reaction_coupling(model)
enzcap = _gecko_mass_group_coupling(model)
[
innerC spzeros(size(innerC, 1), n_genes(model))
rxnC spzeros(size(rxnC, 1), n_genes(model))
spzeros(length(model.coupling_row_mass_group), length(model.columns)) enzcap
]
end
"""
n_coupling_constraints(model::GeckoModel)
Count the coupling constraints in [`GeckoModel`](@ref) (refer to
[`coupling`](@ref) for details).
"""
n_coupling_constraints(model::GeckoModel) =
n_coupling_constraints(model.inner) +
length(model.coupling_row_reaction) +
length(model.coupling_row_mass_group)
"""
coupling_bounds(model::GeckoModel)
The coupling bounds for [`GeckoModel`](@ref) (refer to [`coupling`](@ref) for
details).
"""
function coupling_bounds(model::GeckoModel)
(iclb, icub) = coupling_bounds(model.inner)
(ilb, iub) = bounds(model.inner)
return (
vcat(
iclb,
ilb[model.coupling_row_reaction],
[0.0 for _ in model.coupling_row_mass_group],
),
vcat(
icub,
iub[model.coupling_row_reaction],
[grp.group_upper_bound for grp in model.coupling_row_mass_group],
),
)
end
"""
balance(model::GeckoModel)
Return the balance of the reactions in the inner model, concatenated with a vector of
zeros representing the enzyme balance of a [`GeckoModel`](@ref).
"""
balance(model::GeckoModel) =
[balance(model.inner); spzeros(length(model.coupling_row_gene_product))]
"""
n_genes(model::GeckoModel)
Return the number of genes that have enzymatic constraints associated with them.
"""
n_genes(model::GeckoModel) = length(model.coupling_row_gene_product)
"""
genes(model::GeckoModel)
Return the gene ids of genes that have enzymatic constraints associated with them.
"""
genes(model::GeckoModel) =
genes(model.inner)[[idx for (idx, _) in model.coupling_row_gene_product]]
"""
metabolites(model::GeckoModel)
Return the ids of all metabolites, both real and pseudo, for a [`GeckoModel`](@ref).
"""