Unverified Commit c240484a authored by St. Elmo's avatar St. Elmo
Browse files

make gecko resemble published version

parent 09393470
Pipeline #55272 failed with stages
in 9 minutes and 37 seconds
......@@ -6,7 +6,6 @@
gene_product_molar_mass::Union{Function,Dict{String,Float64}},
gene_mass_group::Union{Function,Dict{String,String}} = _ -> "uncategorized",
gene_mass_group_bound::Union{Function,Dict{String,Float64}},
relaxed_arm_reaction_bounds = false,
)
Wrap a model into a [`GeckoModel`](@ref), following the structure given by
......@@ -27,16 +26,6 @@ GECKO algorithm (see [`GeckoModel`](@ref) documentation for details).
GECKO.
- `gene_mass_group_bound` is a function that returns the maximum mass for a given
mass group.
- `relaxed_arm_reaction_bounds` is a boolean flag that relaxes the constraints
on the "arm" reactions specified by GECKO. By default (value `false`), there
is a separate constraint that limits the total flux through forward-direction
reaction for all its isozymes (ensuring that the sum of forward rates is less
than "global" upper bound), and another separate constraint that limits the
total flux through reverse-direction reaction isozymes. Value `true` groups
both forward and reverse reactions in a single constraint, allowing the total
forward flux to be actually greater than the upper bound IF the reverse flux
can balance it to fit into the upper and lower bound constraints (in turn,
more enzyme can be "wasted" by a reaction that runs in both directions).
Alternatively, all function arguments may be also supplied as dictionaries that
provide the same data lookup.
......@@ -50,7 +39,8 @@ function make_gecko_model(
gene_mass_group_bound::Union{Function,Dict{String,Float64}},
)
ris_ =
reaction_isozymes isa Function ? reaction_isozymes : (rid -> get(reaction_isozymes, rid, []))
reaction_isozymes isa Function ? reaction_isozymes :
(rid -> get(reaction_isozymes, rid, []))
gpb_ =
gene_product_bounds isa Function ? gene_product_bounds :
(gid -> gene_product_bounds[gid])
......@@ -58,13 +48,15 @@ function make_gecko_model(
gene_product_molar_mass isa Function ? gene_product_molar_mass :
(gid -> gene_product_molar_mass[gid])
gmg_ = gene_mass_group isa Function ? gene_mass_group : (gid -> gene_mass_group[gid])
gmgb_ = gene_mass_group_bound isa Function ? gene_mass_group_bound : (grp -> gene_mass_group_bound[grp])
gmgb_ =
gene_mass_group_bound isa Function ? gene_mass_group_bound :
(grp -> gene_mass_group_bound[grp])
# ...it would be nicer to have an overload for this, but kwargs can't be used for dispatch
columns = Vector{_gecko_column}()
coupling_row_reaction = Int[]
coupling_row_gene_product = Int[]
coupling_row_mass_group = String[]
# coupling_row_mass_group = String[]
gids = genes(model)
(lbs, ubs) = bounds(model)
......@@ -72,12 +64,11 @@ function make_gecko_model(
gene_name_lookup = Dict(gids .=> 1:length(gids))
gene_row_lookup = Dict{Int,Int}()
mass_group_lookup = Dict{String,Int}()
for i = 1:n_reactions(model)
isozymes = ris_(rids[i])
if isempty(isozymes)
push!(columns, _gecko_column(i, 0, 0, 0, lbs[i], ubs[i], [], []))
push!(columns, _gecko_column(i, 0, 0, 0, lbs[i], ubs[i], []))
continue
end
......@@ -99,7 +90,7 @@ function make_gecko_model(
push!(coupling_row_reaction, i)
length(coupling_row_reaction)
end : 0
# all isozymes in this direction
for (iidx, isozyme) in enumerate(isozymes)
kcat = kcatf(isozyme)
......@@ -120,31 +111,6 @@ function make_gecko_model(
haskey(gene_name_lookup, gene)
)
# prepare the coupling with the mass groups
gp_groups = gmg_.(keys(isozyme.gene_product_count))
gp_mass = gpmm_.(keys(isozyme.gene_product_count))
groups = unique(filter(!isnothing, gp_groups))
group_idx = Dict(groups .=> 1:length(groups))
vals = [0.0 for _ in groups]
for (gpg, mass) in zip(gp_groups, gp_mass)
if !isnothing(gpg)
vals[group_idx[gpg]] += mass / kcat
end
end
mass_group_coupling = collect(
isnothing(group) ? 0 :
begin
if !haskey(mass_group_lookup, group)
push!(coupling_row_mass_group, group)
mass_group_lookup[group] =
length(coupling_row_mass_group)
end
(mass_group_lookup[group], val)
end for (group, val) in zip(groups, vals)
)
# make a new column
push!(
columns,
......@@ -156,7 +122,6 @@ function make_gecko_model(
max(lb, 0),
ub,
gene_product_coupling,
mass_group_coupling,
),
)
end
......@@ -164,11 +129,37 @@ function make_gecko_model(
end
end
GeckoModel(
# prepare enzyme capacity constraints
mg_gid_lookup = Dict{String, Vector{String}}()
for gid in gids[coupling_row_gene_product]
mg = gmg_(gid)
if haskey(mg_gid_lookup, mg)
push!(mg_gid_lookup[mg], gid)
else
mg_gid_lookup[mg] = [gid]
end
end
coupling_row_mass_group = Vector{Tuple{Vector{Int}, Vector{Float64}, Float64}}()
for (grp, gs) in mg_gid_lookup
idxs = Int.(indexin(gs, gids))
mms = gpmm_.(gs)
push!(coupling_row_mass_group, (idxs, mms, gmgb_(grp)))
end
gm = GeckoModel(
spzeros(length(columns) + length(coupling_row_gene_product)),
columns,
coupling_row_reaction,
collect(zip(coupling_row_gene_product, gpb_.(gids[coupling_row_gene_product]))),
collect(zip(coupling_row_mass_group, gmgb_.(coupling_row_mass_group))),
coupling_row_mass_group,
model,
)
# set objective (do separately because gene products can also be objectives)
gm.objective .= [
_gecko_column_reactions(gm)' * objective(gm.inner)
spzeros(length(coupling_row_gene_product))
]
return gm
end
"""
struct _gecko_column
......@@ -12,7 +11,6 @@ struct _gecko_column
lb::Float64
ub::Float64
gene_product_coupling::Vector{Tuple{Int,Float64}}
mass_group_coupling::Vector{Tuple{Int,Float64}}
end
"""
......@@ -20,7 +18,7 @@ end
A model with complex enzyme concentration and capacity bounds, as described in
*Sánchez, Benjamín J., et al. "Improving the phenotype predictions of a yeast
genomescale metabolic model by incorporating enzymatic constraints." Molecular
genome-scale metabolic model by incorporating enzymatic constraints." Molecular
systems biology 13.8 (2017): 935.*
Use [`make_gecko_model`](@ref) or [`with_gecko`](@ref) to construct this kind
......@@ -31,10 +29,9 @@ The model wraps another "internal" model, and adds following modifications:
forward and reverse variants for each isozyme,
- reaction coupling is added to ensure the groups of isozyme reactions obey the
global reaction flux bounds from the original model,
- coupling is added to simulate available gene concentrations as "virtual
metabolites" consumed by each reaction by its gene product stoichiometry,
which can constrained by the user (to reflect realistic measurements such as
from mass spectrometry),
- gene concentrations specified by each reaction and its gene product stoichiometry,
can constrained by the user to reflect measurements, such as
from mass spectrometry,
- additional coupling is added to simulate total masses of different proteins
grouped by type (e.g., membrane-bound and free-floating proteins), which can
be again constrained by the user (this is slightly generalized from original
......@@ -54,10 +51,11 @@ constraints are implemented using [`coupling`](@ref) and
purely virtual and do not occur in [`metabolites`](@ref).
"""
struct GeckoModel <: ModelWrapper
objective::SparseVec
columns::Vector{_gecko_column}
coupling_row_reaction::Vector{Int}
coupling_row_gene_product::Vector{Tuple{Int,Tuple{Float64,Float64}}}
coupling_row_mass_group::Vector{Tuple{String,Float64}}
coupling_row_mass_group::Vector{Tuple{Vector{Int}, Vector{Float64}, Float64}}
inner::MetabolicModel
end
......@@ -71,26 +69,33 @@ Return a stoichiometry of the [`GeckoModel`](@ref). The enzymatic reactions are
split into unidirectional forward and reverse ones, each of which may have
multiple variants per isozyme.
"""
stoichiometry(model::GeckoModel) =
stoichiometry(model.inner) * _gecko_column_reactions(model)
function stoichiometry(model::GeckoModel)
irrevS = stoichiometry(model.inner) * COBREXA._gecko_column_reactions(model)
enzS = COBREXA._gecko_gene_product_coupling(model)
[
irrevS spzeros(size(irrevS, 1), size(enzS, 1))
-enzS I(size(enzS, 1))
]
end
"""
objective(model::GeckoModel)
Reconstruct an objective of the [`GeckoModel`](@ref), following the objective
of the inner model.
Reconstruct an objective of the [`GeckoModel`](@ref).
"""
objective(model::GeckoModel) = _gecko_column_reactions(model)' * objective(model.inner)
objective(model::GeckoModel) = model.objective
"""
reactions(model::GeckoModel)
Returns the internal reactions in a [`GeckoModel`](@ref) (these may be split
to forward- and reverse-only parts with different isozyme indexes; reactions
IDs are mangled accordingly with suffixes).
IDs are mangled accordingly with suffixes), as well as the genes associated
with enzymatic reactions. In the context of a GeckoModel, this is better described
as the variables in the problem.
"""
reactions(model::GeckoModel) =
let inner_reactions = reactions(model.inner)
function reactions(model::GeckoModel)
rxnnames = let inner_reactions = reactions(model.inner)
[
_gecko_reaction_name(
inner_reactions[col.reaction_idx],
......@@ -99,21 +104,35 @@ reactions(model::GeckoModel) =
) for col in model.columns
]
end
[rxnnames; genes(model)]
end
"""
reactions(model::GeckoModel)
n_reactions(model::GeckoModel)
Returns the number of all irreversible reactions in `model`.
Returns the number of all irreversible reactions in `model` as well as the number of gene products
that take part in enzymatic reactions. In the context of a GeckoModel, this is better described
as the number of variables in the problem.
"""
n_reactions(model::GeckoModel) = length(model.columns)
n_reactions(model::GeckoModel) =
length(model.columns) + length(model.coupling_row_gene_product)
"""
bounds(model::GeckoModel)
Return variable bounds for [`GeckoModel`](@ref).
"""
bounds(model::GeckoModel) =
([col.lb for col in model.columns], [col.ub for col in model.columns])
function bounds(model::GeckoModel)
lbs = [
[col.lb for col in model.columns]
[lb for (_, (lb, _)) in model.coupling_row_gene_product]
]
ubs = [
[col.ub for col in model.columns]
[ub for (_, (_, ub)) in model.coupling_row_gene_product]
]
(lbs, ubs)
end
"""
reaction_flux(model::GeckoModel)
......@@ -121,22 +140,32 @@ bounds(model::GeckoModel) =
Get the mapping of the reaction rates in [`GeckoModel`](@ref) to the original
fluxes in the wrapped model.
"""
reaction_flux(model::GeckoModel) =
_gecko_column_reactions(model)' * reaction_flux(model.inner)
function reaction_flux(model::GeckoModel)
gecko_mat = _gecko_column_reactions(model)'
inner_mat = reaction_flux(model.inner)
[
gecko_mat*inner_mat spzeros(size(gecko_mat, 1), n_genes(model))
spzeros(n_genes(model), size(inner_mat, 2)) I(n_genes(model))
]
end
"""
coupling(model::GeckoModel)
Return the coupling of [`GeckoModel`](@ref). That combines the coupling of the
wrapped model, coupling for split reactions, and the coupling for the total
wrapped model, coupling for split (arm) reactions, and the coupling for the total
enzyme capacity.
"""
coupling(model::GeckoModel) = vcat(
coupling(model.inner) * _gecko_column_reactions(model),
_gecko_reaction_coupling(model),
_gecko_gene_product_coupling(model),
_gecko_mass_group_coupling(model),
)
function coupling(model::GeckoModel)
innerC = coupling(model.inner) * _gecko_column_reactions(model)
rxnC = _gecko_reaction_coupling(model)
enzcap = _gecko_mass_group_coupling(model)
[
innerC spzeros(size(innerC, 1), n_genes(model))
rxnC spzeros(size(rxnC, 1), n_genes(model))
spzeros(length(model.coupling_row_mass_group), length(model.columns)) enzcap
]
end
"""
n_coupling_constraints(model::GeckoModel)
......@@ -147,7 +176,6 @@ Count the coupling constraints in [`GeckoModel`](@ref) (refer to
n_coupling_constraints(model::GeckoModel) =
n_coupling_constraints(model.inner) +
length(model.coupling_row_reaction) +
length(model.coupling_row_gene_product) +
length(model.coupling_row_mass_group)
"""
......@@ -162,15 +190,43 @@ function coupling_bounds(model::GeckoModel)
return (
vcat(
iclb,
ilb[model.coupling_row_reaction], #! fix bound
[lb for (_, (lb, _)) in model.coupling_row_gene_product],
ilb[model.coupling_row_reaction],
[0.0 for _ in model.coupling_row_mass_group],
),
vcat(
icub,
iub[model.coupling_row_reaction], #! fix bound
[ub for (_, (_, ub)) in model.coupling_row_gene_product],
[ub for (_, ub) in model.coupling_row_mass_group],
iub[model.coupling_row_reaction],
[ub for (_, _, ub) in model.coupling_row_mass_group],
),
)
end
"""
balance(model::GeckoModel)
Return the balance of the inner model, concatenated with a vector of
zeros representing the enzyme balance of a [`GeckoModel`](@ref).
"""
balance(model::GeckoModel) = [balance(model.inner); spzeros(length(model.coupling_row_gene_product))]
"""
n_genes(model::GeckoModel)
Return the number of genes that have enzymatic constraints associated with them.
"""
n_genes(model::GeckoModel) = length(model.coupling_row_gene_product)
"""
genes(model::GeckoModel)
Return the gene ids of genes that have enzymatic constraints associated with them.
"""
genes(model::GeckoModel) = genes(model.inner)[[idx for (idx, _) in model.coupling_row_gene_product]]
"""
fluxes(model::GeckoModel)
Return the original reaction ids and the gene ids that were variables
in a [`GeckoModel`](@ref).
"""
fluxes(model::GeckoModel) = [reactions(model.inner); genes(model)]
\ No newline at end of file
......@@ -70,17 +70,16 @@ _gecko_gene_product_coupling(model::GeckoModel) =
Compute the part of the coupling for [`GeckoModel`](@ref) that limits the total
mass of each group of gene products.
"""
_gecko_mass_group_coupling(model::GeckoModel) =
let
tmp = [
(row, i, val) for (i, col) in enumerate(model.columns) for
(row, val) in col.mass_group_coupling
]
sparse(
[row for (row, _, _) in tmp],
[col for (_, col, _) in tmp],
[val for (_, _, val) in tmp],
length(model.coupling_row_mass_group),
length(model.columns),
)
end
function _gecko_mass_group_coupling(model::GeckoModel)
tmp = [
(i, j, mm) for (i, mg) in enumerate(model.coupling_row_mass_group) for
(j, mm) in zip(mg[1], mg[2])
]
sparse(
[i for (i, _, _) in tmp],
[j for (_, j, _) in tmp],
[mm for (_, _, mm) in tmp],
length(model.coupling_row_mass_group),
n_genes(model),
)
end
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment