Commit a563a2ae authored by Miroslav Kratochvil's avatar Miroslav Kratochvil
Browse files

clean up

parent 3bcccccc
Pipeline #55137 failed with stages
in 3 minutes and 38 seconds
"""
make_gecko_model(
model::MetabolicModel;
reaction_isozymes::Union{Function,Dict{String,Isozyme}},
gene_product_limit::Union{Function,Dict{String,Tuple{Float64,Float64}}},
gene_product_molar_mass::Union{Function,Dict{String,Float64}},
gene_mass_group::Union{Function,Dict{String,String}} = _ -> "uncategorized",
group_mass_limit::Union{Function,Dict{String,Float64}},
relaxed_arm_reaction_bounds = false,
)
Wrap a model into a [`GeckoModel`](@ref), following the structure given by
GECKO algorithm (see [`GeckoModel`](@ref) documentation for details).
# Arguments
- `reaction_isozymes` is a function that returns a vector of [`Isozyme`](@ref)s
for each reaction, or empty vector if the reaction is not enzymatic.
- `gene_product_limit` is a function that returns lower and upper bound for
concentration for a given gene product (specified by the same string gene ID as in
`reaction_isozymes`), as `Tuple{Float64,Float64}`.
- `gene_product_molar_mass` is a function that returns a numeric molar mass of
a given gene product specified by string gene ID.
- `gene_mass_group` is a function that returns a string group identifier for a
given gene product, again specified by string gene ID. By default, all gene
products belong to group `"uncategorized"` which is the behavior of original
GECKO.
- `group_mass_limit` is a function that returns the maximum mass for a given
mass group.
- `relaxed_arm_reaction_bounds` is a boolean flag that relaxes the constraints
on the "arm" reactions specified by GECKO. By default (value `false`), there
is a separate constraint that limits the total flux through forward-direction
reaction for all its isozymes (ensuring that the sum of forward rates is less
than "global" upper bound), and another separate constraint that limits the
total flux through reverse-direction reaction isozymes. Value `true` groups
both forward and reverse reactions in a single constraint, allowing the total
forward flux to be actually greater than the upper bound IF the reverse flux
can balance it to fit into the upper and lower bound constraints (in turn,
more enzyme can be "wasted" by a reaction that runs in both directions).
Alternatively, all function arguments may be also supplied as dictionaries that
provide the same data lookup.
"""
function make_gecko_model(
model::StandardModel;
reaction_isozymes::Function,
gene_product_mass::Function,
gene_product_limit::Function,
gene_mass_group::Function = _ -> "uncategorized",
mass_fraction_limit::Function,
model::MetabolicModel;
reaction_isozymes::Union{Function,Dict{String,Isozyme}},
gene_product_limit::Union{Function,Dict{String,Tuple{Float64,Float64}}},
gene_product_molar_mass::Union{Function,Dict{String,Float64}},
gene_mass_group::Union{Function,Dict{String,String}} = _ -> "uncategorized",
group_mass_limit::Union{Function,Dict{String,Float64}},
relaxed_arm_reaction_bounds = false,
)
ris_ =
reaction_isozymes isa Function ? reaction_isozymes : (gid -> reaction_isozymes[gid])
gpl_ =
gene_product_limit isa Function ? gene_product_limit :
(gid -> gene_product_limit[gid])
gpmm_ =
gene_product_molar_mass isa Function ? gene_product_molar_mass :
(gid -> gene_product_molar_mass[gid])
gmg_ = gene_mass_group isa Function ? gene_mass_group : (gid -> gene_mass_group[gid])
gml_ = group_mass_limit isa Function ? group_mass_limit : (grp -> group_mass_limit[grp])
# ...it would be nicer to have an overload for this, but kwargs can't be used for dispatch
columns = Vector{_gecko_column}()
coupling_row_reaction = Int[]
......@@ -21,7 +76,7 @@ function make_gecko_model(
mass_group_lookup = Dict{String,Int}()
for i = 1:n_reactions(model)
isozymes = reaction_isozymes(rids[i])
isozymes = ris_(rids[i])
if isempty(isozymes)
push!(columns, _gecko_column(i, 0, 0, 0, lbs[i], ubs[i], [], []))
continue
......@@ -30,19 +85,40 @@ function make_gecko_model(
# if the reaction has multiple isozymes, it needs extra coupling to
# ensure that the total rate of the reaction doesn't exceed the
# "global" limit
reaction_coupling_row =
length(isozymes) > 1 ? begin
push!(coupling_row_reaction, i)
length(coupling_row_reaction)
end : 0
for (iidx, isozyme) in enumerate(isozymes)
# loop over both directions for all isozymes
for (lb, ub, kcat, dir) in [
(-ubs[i], -lbs[i], isozyme.kcat_reverse, -1),
(lbs[i], ubs[i], isozyme.kcat_forward, 1),
]
if max(lb, ub) > 0 && kcat > _constants.tolerance
if relaxed_arm_reaction_bounds
reaction_coupling_row =
length(isozymes) > 1 ? begin
push!(coupling_row_reaction, i)
length(coupling_row_reaction)
end : 0
end
# loop over both directions for all isozymes
for (lb, ub, kcatf, dir) in [
(-ubs[i], -lbs[i], i -> i.kcat_reverse, -1),
(lbs[i], ubs[i], i -> i.kcat_forward, 1),
]
if !relaxed_arm_reaction_bounds
# In this case, the coefficients in the coupling matrix will be
# the same as in the combined case, only categorized in
# separate rows for negative and positive ones. Surprisingly,
# we do not need to explicitly remember the bounds, because the
# ones taken from the original model are perfectly okay -- the
# "reverse" direction is unreachable because of individual
# bounds on split reactions, and the "forward" direction is
# properly negated in the reverse case to work nicely with the
# global lower bound.
reaction_coupling_row =
ub > 0 && length(isozymes) > 1 ? begin
push!(coupling_row_reaction, i)
length(coupling_row_reaction)
end : 0
end
# all isozymes in this direction
for (iidx, isozyme) in enumerate(isozymes)
kcat = kcatf(isozyme)
if ub > 0 && kcat > _constants.tolerance
# prepare the coupling with gene product molar
gene_product_coupling = collect(
begin
......@@ -60,8 +136,8 @@ function make_gecko_model(
)
# prepare the coupling with the mass groups
gp_groups = gene_mass_group.(keys(isozyme.gene_product_count))
gp_mass = gene_product_mass.(keys(isozyme.gene_product_count))
gp_groups = gmg_.(keys(isozyme.gene_product_count))
gp_mass = gpmm_.(keys(isozyme.gene_product_count))
groups = unique(filter(!isnothing, gp_groups))
group_idx = Dict(groups .=> 1:length(groups))
vals = [0.0 for _ in groups]
......@@ -106,15 +182,8 @@ function make_gecko_model(
GeckoModel(
columns,
coupling_row_reaction,
collect(
zip(
coupling_row_gene_product,
gene_product_limit.(gids[coupling_row_gene_product]),
),
),
collect(
zip(coupling_row_mass_group, mass_fraction_limit.(coupling_row_mass_group)),
),
collect(zip(coupling_row_gene_product, gpl_.(gids[coupling_row_gene_product]))),
collect(zip(coupling_row_mass_group, gml_.(coupling_row_mass_group))),
model,
)
end
......@@ -2,48 +2,56 @@
"""
make_smoment_model(
model::MetabolicModel;
reaction_isozymes::Function,
gene_product_molar_mass::Function,
reaction_isozymes::Union{Function, Dict{String, Isozyme}},
gene_product_molar_mass::Union{Function, Dict{String, Float64}},
total_enzyme_capacity::Float64,
)
Construct a model with a structure given by sMOMENT algorithm; returns a
[`SMomentModel`](@ref) (see the documentation for details.
[`SMomentModel`](@ref) (see the documentation for details).
`reaction_isozymes` parameter is a function that returns a single isozyme for
each reaction, or `nothing` if the reaction is not enzymatic. If the reaction
has multiple isozymes, use [`smoment_isozyme_score`](@ref) to select the "best"
one, as recommended by the sMOMENT approach.
# Arguments
`gene_product_molar_mass` parameter is a function that returns a molar mass of
each gene product (relative to `total_enzyme_capacity` and the specified
kcats), as specified by sMOMENT.
- `reaction_isozymes` parameter is a function that returns a single
[`Isozyme`](@ref) for each reaction, or `nothing` if the reaction is not
enzymatic. If the reaction has multiple isozymes, use
[`smoment_isozyme_score`](@ref) to select the "best" one, as recommended by
the sMOMENT approach.
- `gene_product_molar_mass` parameter is a function that returns a molar mass
of each gene product (relative to `total_enzyme_capacity` and the specified
kcats), as specified by sMOMENT.
- `total_enzyme_capacity` is the maximum "enzyme capacity" consumption in the
model.
`total_enzyme_capacity` is the maximum "enzyme capacity" consumption of the
model.
Alternatively, all function arguments also accept dictionaries that are used to
provide the same data lookup.
"""
function make_smoment_model(
model::MetabolicModel;
reaction_isozymes::Function,
gene_product_molar_mass::Function,
reaction_isozymes::Union{Function,Dict{String,Isozyme}},
gene_product_molar_mass::::Union{Function,Dict{String,Float64}},
total_enzyme_capacity::Float64,
)
ris_ =
reaction_isozymes isa Function ? reaction_isozymes : (gid -> reaction_isozymes[gid])
gpmm_ =
gene_product_molar_mass isa Function ? gene_product_molar_mass :
(gid -> gene_product_molar_mass[gid])
columns = Vector{_smoment_column}()
(lbs, ubs) = bounds(model)
rids = reactions(model)
for i = 1:n_reactions(model)
isozyme = reaction_isozymes(rids[i])
isozyme = ris_(rids[i])
if isnothing(isozyme)
# non-enzymatic reaction (or a totally ignored one)
push!(columns, _smoment_column(i, 0, lbs[i], ubs[i], 0))
continue
end
mw = sum(
gene_product_molar_mass(gid) * ps for (gid, ps) in isozyme.gene_product_count
)
mw = sum(gpmm_(gid) * ps for (gid, ps) in isozyme.gene_product_count)
if min(lbs[i], ubs[i]) < 0 && isozyme.kcat_reverse > _constants.tolerance
# reaction can run in reverse
......
"""
struct _gecko_column
A helper type for describing the contents of [`GeckoModel`](@ref)s.
"""
struct _gecko_column
reaction_idx::Int
isozyme_idx::Int
......@@ -10,6 +15,44 @@ struct _gecko_column
mass_group_coupling::Vector{Tuple{Int,Float64}}
end
"""
struct GeckoModel <: ModelWrapper
A model with complex enzyme concentration and capacity bounds, as described in
*Sánchez, Benjamín J., et al. "Improving the phenotype predictions of a yeast
genome‐scale metabolic model by incorporating enzymatic constraints." Molecular
systems biology 13.8 (2017): 935.*
Use [`make_gecko_model`](@ref) or [`with_gecko`](@ref) to construct this kind
of models.
The model wraps another "internal" model, and adds following modifications:
- enzymatic reactions with known enzyme information are split into multiple
forward and reverse variants for each isozyme,
- reaction coupling is added to ensure the groups of isozyme reactions obey the
global reaction flux bounds from the original model,
- coupling is added to simulate available gene concentrations as "virtual
metabolites" consumed by each reaction by its gene product stoichiometry,
which can constrained by the user (to reflect realistic measurements such as
from mass spectrometry),
- additional coupling is added to simulate total masses of different proteins
grouped by type (e.g., membrane-bound and free-floating proteins), which can
be again constrained by the user (this is slightly generalized from original
GECKO algorithm, which only considers a single group of indiscernible
proteins).
The structure contains fields `columns` that describe the contents of the
coupling columns, `coupling_row_reaction`, `coupling_row_gene_product` and
`coupling_row_mass_group` that describe correspondence of the coupling rows to
original model and determine the coupling bounds, and `inner`, which is the
original wrapped model.
Implementation exposes the split reactions (available as `reactions(model)`),
but retains the original "simple" reactions accessible by [`fluxes`](@ref). All
constraints are implemented using [`coupling`](@ref) and
[`coupling_bounds`](@ref), i.e., all virtual metabolites described by GECKO are
purely virtual and do not occur in [`metabolites`](@ref).
"""
struct GeckoModel <: ModelWrapper
columns::Vector{_gecko_column}
coupling_row_reaction::Vector{Int}
......@@ -84,8 +127,8 @@ reaction_flux(model::GeckoModel) =
"""
coupling(model::GeckoModel)
Return the coupling of [`GeckoModel`](@ref). That combines the coupling of
the wrapped model, coupling for split reactions, and the coupling for the total
Return the coupling of [`GeckoModel`](@ref). That combines the coupling of the
wrapped model, coupling for split reactions, and the coupling for the total
enzyme capacity.
"""
coupling(model::GeckoModel) = vcat(
......
......@@ -15,10 +15,9 @@ end
"""
struct SMomentModel <: ModelWrapper
Construct an enzyme-capacity constrained model using sMOMENT algorithm, as
described by *Bekiaris, Pavlos Stephanos, and Steffen Klamt, "Automatic
construction of metabolic models with enzyme constraints" BMC bioinformatics,
2020*.
An enzyme-capacity-constrained model using sMOMENT algorithm, as described by
*Bekiaris, Pavlos Stephanos, and Steffen Klamt, "Automatic construction of
metabolic models with enzyme constraints" BMC bioinformatics, 2020*.
Use [`make_smoment_model`](@ref) or [`with_smoment`](@ref) to construct the
models.
......@@ -27,10 +26,10 @@ The model is constructed as follows:
- stoichiometry of the original model is retained as much as possible, but
enzymatic reations are split into forward and reverse parts (marked by a
suffix like `...#forward` and `...#reverse`),
- stoichiometry is expanded by a virtual metabolite "enzyme capacity" which is
consumed by all enzymatic reactions at a rate given by enzyme mass divided by
the corresponding kcat,
- the total consumption of the enzyme capacity is constrained by a fixed
- coupling is added to simulate a virtual metabolite "enzyme capacity", which
is consumed by all enzymatic reactions at a rate given by enzyme mass divided
by the corresponding kcat,
- the total consumption of the enzyme capacity is constrained to a fixed
maximum.
The `SMomentModel` structure contains a worked-out representation of the
......
......@@ -15,18 +15,19 @@
total_protein_mass = 100.0
gm =
model |>
with_changed_bounds(
bounded_model =
model |> with_changed_bounds(
["EX_glc__D_e", "GLCpts"];
lower = [-1000.0, -1.0],
upper = [nothing, 12.0],
) |>
with_gecko(
)
gm =
bounded_model |> with_gecko(
reaction_isozymes = get_reaction_isozymes,
gene_product_limit = g -> g == "b2779" ? (0.01, 0.06) : (0.0, 1.0),
gene_product_mass = get_gene_product_mass,
mass_fraction_limit = _ -> total_protein_mass,
gene_product_molar_mass = get_gene_product_mass,
group_mass_limit = _ -> total_protein_mass,
)
opt_model = flux_balance_analysis(
......@@ -47,4 +48,26 @@
prot_mass = sum(ecoli_core_protein_masses[gid] * c for (gid, c) in prot_concens)
@test isapprox(prot_mass, total_protein_mass, atol = TEST_TOLERANCE)
gm =
bounded_model |> with_gecko(
reaction_isozymes = get_reaction_isozymes,
gene_product_limit = _ -> (0.0, 0.05),
gene_product_molar_mass = get_gene_product_mass,
group_mass_limit = _ -> total_protein_mass,
relaxed_arm_reaction_bounds = true,
)
rxn_fluxes = flux_balance_analysis_dict(
gm,
Tulip.Optimizer;
modifications = [change_optimizer_attribute("IPM_IterationsLimit", 1000)],
)
@test isapprox(
rxn_fluxes["BIOMASS_Ecoli_core_w_GAM"],
0.1877932315030117,
atol = TEST_TOLERANCE,
)
end
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment