Commit ea25d314 authored by Miroslav Kratochvil's avatar Miroslav Kratochvil
Browse files

gecko stage 1 (still TODO: the mass fraction coupling)

parent d8a69f72
Pipeline #55023 failed with stages
in 10 minutes and 9 seconds
"""
make_geckomodel(
model::StandardModel;
rid_isozymes = Dict{String, Vector{Isozyme}}(),
enzyme_capacities = [(),],
)
Construct a `GeckoModel` based on `model` using the kinetic data encoded by
`rid_isozymes`. Enzyme capacity constraints can be added through `enzyme_capacities`,
which is a vector of tuples. In the first position of the tuple is a list of gene ids,
and the second position is mass upperbound of the sum of these gene ids.
The units of the fluxes and protein concentration depend on those used in
`rid_isozymes` for the kcats and the molar masses encoded in the genes of
`model`. Currently only `modifications` that change attributes of the
`optimizer` are supported.
# Example
```
gm = make_geckomodel(
model;
rid_isozymes,
enzyme_capacities = [(get_genes_with_kcats(rid_isozymes), total_protein_mass)],
)
opt_model = flux_balance_analysis(
gm,
Tulip.Optimizer
)
rxn_fluxes = flux_dict(gm, opt_model)
prot_concens = protein_dict(gm, opt_model)
```
"""
function make_geckomodel(
function make_gecko_model(
model::StandardModel;
rid_isozymes = Dict{String,Vector{Isozyme}}(),
enzyme_capacities = [()],
reaction_isozymes::Function,
reaction_isozyme_masses::Function,
gene_product_limit::Function,
reaction_mass_group::Function = _ -> "uncategorized",
mass_faction_limit::Function,
)
S, lb_fluxes, ub_fluxes, reaction_map, metabolite_map =
_build_irreversible_stoichiometric_matrix(model, rid_isozymes)
#: find all gene products that have kcats associated with them
gene_ids = get_genes_with_kcats(rid_isozymes)
#: size of resultant model
num_reactions = size(S, 2)
num_genes = length(gene_ids)
num_metabolites = size(S, 1)
num_vars = num_reactions + num_genes
#: equality lhs
E_components = ( #TODO add size hints if possible
row_idxs = Vector{Int}(),
col_idxs = Vector{Int}(),
coeffs = Vector{Float64}(),
)
columns = Vector{_gecko_column}()
coupling_row_reaction = Int[]
coupling_row_gene_product = Int[]
coupling_row_mass_group = String[]
for (rid, col_idx) in reaction_map
original_rid = string(split(rid, "§")[1])
gene_name_lookup = Dict(genes(model) .=> 1:n_genes(model))
gene_row_lookup = Dict{Int,Int}()
mass_group_lookup = Dict{String,Int}()
# skip these entries
contains(rid, "§ARM") && continue
!haskey(rid_isozymes, original_rid) && continue
(lbs, ubs) = bounds(model)
rids = reactions(model)
# these entries have kcats
if contains(rid, "§ISO")
iso_num = parse(
Int,
replace(
first(filter(startswith("ISO"), split(rid, "§")[2:end])),
"ISO" => "",
),
)
else # only one enzyme
iso_num = 1
for i = 1:n_reactions(model)
isozymes = reaction_isozymes(rids[i])
if isempty(isozymes)
push!(columns, _gecko_column(i, 0, 0, 0, lbs[i], ubs[i], 0, 0, 0, 0))
continue
end
# add all entries to column of matrix
_add_enzyme_variable(
rid_isozymes,
iso_num, # only one enzyme
rid,
original_rid,
E_components,
col_idx,
gene_ids,
)
group = reaction_mass_group(rids[i])
mass_group_row =
isnothing(group) ? 0 :
haskey(massgroup_lookup, group) ? mass_group_lookup[group] :
begin
push!(coupling_row_mass_group, group)
mass_group_lookup[group] = length(coupling_row_mass_group)
end
push!(coupling_row_reaction, i)
reaction_coupling_row = length(coupling_row_reaction)
masses = group > 0 ? reaction_isozyme_masses(rids[i]) : zeros(length(isozymes))
for (iidx, isozyme) in enumerate(isozymes)
if min(lbs[i], ubs[i]) < 0 && isozyme.kcat_reverse > _constants.tolerance
# reaction can run in reverse
push!(
columns,
_gecko_column(
i,
iidx,
-1,
reaction_coupling_row,
max(-ubs[i], 0),
-lbs[i],
_gecko_make_gene_product_coupling(
isozyme.gene_product_count,
isozyme.kcat_reverse,
gene_name_lookup,
gene_row_lookup,
coupling_row_gene_product,
),
mass_group_row,
masses[iidx] / isozyme.kcat_reverse,
),
)
end
if max(lbs[i], ubs[i]) > 0 && isozyme.kcat_forward > _constants.tolerance
# reaction can run forward
push!(
columns,
_gecko_column(
i,
iidx,
1,
reaction_coupling_row,
max(lbs[i], 0),
ubs[i],
_gecko_make_gene_product_coupling(
isozyme.gene_product_count,
isozyme.kcat_forward,
gene_name_lookup,
gene_row_lookup,
coupling_row_gene_product,
),
mass_group_row,
masses[iidx] / isozyme.kcat_forward,
),
)
end
end
end
Se = sparse(
E_components.row_idxs,
E_components.col_idxs,
E_components.coeffs,
num_genes,
num_reactions,
)
coupling_row_mass_group =
collect(zip(coupling_row_mass_group, mass_fraction_limit.(coupling_row_mass_group)))
stoich_mat = sparse([
S zeros(num_metabolites, num_genes)
Se I(num_genes)
])
#: equality rhs
b = spzeros(num_metabolites + num_genes)
#: find objective (assume objective is forward)
obj_idx_orig = first(findnz(objective(model))[1])
obj_id_orig = reactions(model)[obj_idx_orig]
obj_id = obj_id_orig * "§FOR"
c = spzeros(num_vars)
obj_idx = reaction_map[obj_id]
c[obj_idx] = 1.0
#: inequality constraints
xl = sparse([lb_fluxes; fill(0.0, num_genes)])
xu = sparse([ub_fluxes; fill(1000.0, num_genes)])
#: enzyme capacity constraints
mw_proteins = [model.genes[pid].molar_mass for pid in gene_ids]
C = spzeros(length(enzyme_capacities), num_vars)
cl = spzeros(length(enzyme_capacities))
cu = spzeros(length(enzyme_capacities))
for (i, enz_cap) in enumerate(enzyme_capacities)
enz_idxs = indexin(first(enz_cap), gene_ids)
C[i, num_reactions.+enz_idxs] .= mw_proteins[enz_idxs]
cu[i] = last(enz_cap)
end
coupling_row_gene_product = collect(
zip(coupling_row_gene_product, gene_product_limit.(coupling_row_gene_product)),
)
return GeckoModel(
reactions(model),
_order_id_to_idx_dict(reaction_map),
_order_id_to_idx_dict(metabolite_map),
gene_ids,
c,
stoich_mat,
b,
xl,
xu,
C,
cl,
cu,
columns,
coupling_row_reaction,
coupling_row_gene_product,
coupling_row_mass_group,
model,
)
end
"""
_add_enzyme_variable(
rid_isozymes,
iso_num,
rid,
original_rid,
E_components,
col_idx,
gene_ids,
)
Helper function to add an column into the enzyme stoichiometric matrix.
"""
function _add_enzyme_variable(
rid_isozymes,
iso_num,
rid,
original_rid,
E_components,
col_idx,
gene_ids,
_gecko_make_gene_product_coupling(
gene_product_count::Dict{String,Int},
kcat::Float64,
name_lookup::Dict{String,Int},
row_lookup::Dict{Int,Int},
rows::Vector{Int},
) = collect(
begin
gidx = name_lookup[gene]
row_idx = if haskey(row_lookup, gidx)
row_lookup[gidx]
else
push!(rows, gidx)
row_lookup[gidx] = length(rows)
end
(row_idx, 1 / kcat)
end for (gene, count) in gene_product_count if haskey(name_lookup, gene)
)
pstoich = rid_isozymes[original_rid][iso_num].stoichiometry
kcat =
contains(rid, "§FOR") ? rid_isozymes[original_rid][iso_num].kcats[1] :
rid_isozymes[original_rid][iso_num].kcats[2]
for (pid, pst) in pstoich
push!(E_components.row_idxs, first(indexin([pid], gene_ids)))
push!(E_components.col_idxs, col_idx)
push!(E_components.coeffs, -pst / kcat)
end
end
......@@ -39,45 +39,44 @@ function make_smoment_model(
if isnothing(isozyme)
# non-enzymatic reaction (or a totally ignored one)
push!(columns, _smoment_column(i, 0, 0, lbs[i], ubs[i], 0))
else
# pick a new row for "arm reaction" coupling
coupling_row = length(coupling_row_reaction) + 1
push!(coupling_row_reaction, i)
continue
end
# pick a new row for "arm reaction" coupling
push!(coupling_row_reaction, i)
coupling_row = length(coupling_row_reaction)
mw = sum(
gene_product_molar_mass(gid) * ps for
(gid, ps) in isozyme.gene_product_count
)
mw = sum(
gene_product_molar_mass(gid) * ps for (gid, ps) in isozyme.gene_product_count
)
if min(lbs[i], ubs[i]) < 0 && isozyme.kcat_reverse > _constants.tolerance
# reaction can run in reverse
push!(
columns,
_smoment_column(
i,
-1,
coupling_row,
max(-ubs[i], 0),
-lbs[i],
mw / isozyme.kcat_reverse,
),
)
end
if min(lbs[i], ubs[i]) < 0 && isozyme.kcat_reverse > _constants.tolerance
# reaction can run in reverse
push!(
columns,
_smoment_column(
i,
-1,
coupling_row,
max(-ubs[i], 0),
-lbs[i],
mw / isozyme.kcat_reverse,
),
)
end
if max(lbs[i], ubs[i]) > 0 && isozyme.kcat_forward > _constants.tolerance
# reaction can run forward
push!(
columns,
_smoment_column(
i,
1,
coupling_row,
max(lbs[i], 0),
ubs[i],
mw / isozyme.kcat_forward,
),
)
end
if max(lbs[i], ubs[i]) > 0 && isozyme.kcat_forward > _constants.tolerance
# reaction can run forward
push!(
columns,
_smoment_column(
i,
1,
coupling_row,
max(lbs[i], 0),
ubs[i],
mw / isozyme.kcat_forward,
),
)
end
end
......
"""
mutable struct GeckoModel <: MetabolicModel
A model that incorporates enzyme capacity and kinetic constraints via the GECKO
formulation. See `Sánchez, Benjamín J., et al. "Improving the phenotype
predictions of a yeast genome‐scale metabolic model by incorporating enzymatic
constraints." Molecular systems biology, 2017.` for implementation details.
Note, since the model uses irreversible reactions internally, `"§FOR"` (for the
forward direction) and `"§REV"` (for the reverse direction) is appended to each
reaction internally. Hence, `"§"` is reserved for internal use as a delimiter,
no reaction id should contain this character.
To actually run GECKO, call [`flux_balance_analysis`](@ref) on a `GeckoModel`.
# Fields
```
reaction_ids::Vector{String}
irrev_reaction_ids::Vector{String}
metabolites::Vector{String}
gene_ids::Vector{String}
c::SparseVec
S::SparseMat
b::SparseVec
xl::SparseVec
xu::SparseVec
C::SparseMat
cl::Vector{Float64}
cu::Vector{Float64}
```
"""
mutable struct GeckoModel <: MetabolicModel
reaction_ids::Vector{String}
irrev_reaction_ids::Vector{String}
metabolites::Vector{String}
gene_ids::Vector{String}
# gecko
c::SparseVec
S::SparseMat
b::SparseVec
xl::SparseVec
xu::SparseVec
# enzyme capacity constraints
C::SparseMat
cl::Vector{Float64}
cu::Vector{Float64}
end
"""
stoichiometry(model::GeckoModel)
Return stoichiometry matrix that includes enzymes as metabolites.
"""
stoichiometry(model::GeckoModel) = model.S
"""
balance(model::GeckoModel)
struct _gecko_column
reaction_idx::Int
isozyme_idx::Int
direction::Int
reaction_coupling_row::Int
lb::Float64
ub::Float64
gene_product_coupling::Vector{Tuple{Int,Float64}}
mass_group_row::Int
mass_required::Float64
end
Return stoichiometric balance.
"""
balance(model::GeckoModel) = model.b
struct GeckoModel <: ModelWrapper
columns::Vector{_gecko_column}
coupling_row_reaction::Vector{Int}
coupling_row_gene_product::Vector{Tuple{Int,Float64}}
coupling_row_mass_group::Vector{Tuple{String,Float64}} #TODO add to matrices
"""
objective(model::GeckoModel)
inner::MetabolicModel
end
Return objective of `model`.
"""
objective(model::GeckoModel) = model.c
unwrap_model(model::GeckoModel) = model.inner
"""
fluxes(model::GeckoModel)
stoichiometry(model::GeckoModel)
Returns the reversible reactions in `model`. For
the irreversible reactions, use [`reactions`][@ref].
Return a stoichiometry of the [`GeckoModel`](@ref). The enzymatic reactions are
split into unidirectional forward and reverse ones, each of which may have
multiple variants per isozyme.
"""
fluxes(model::GeckoModel) = model.reaction_ids
stoichiometry(model::GeckoModel) =
stoichiometry(model.inner) * _gecko_column_reactions(model)
"""
n_reactions(model::GeckoModel)
objective(model::GeckoModel)
Returns the number of reversible reactions in the model.
Reconstruct an objective of the [`GeckoModel`](@ref), following the objective
of the inner model.
"""
n_fluxes(model::GeckoModel) = length(model.reaction_ids)
objective(model::GeckoModel) = _gecko_column_reactions(model)' * objective(model.inner)
"""
reactions(model::GeckoModel)
Returns the irreversible reactions in `model`.
"""
reactions(model::GeckoModel) = model.irrev_reaction_ids
Returns the internal reactions in a [`GeckoModel`](@ref) (these may be split
to forward- and reverse-only parts with different isozyme indexes; reactions
IDs are mangled accordingly with suffixes).
"""
reactions(model::GeckoModel) =
let inner_reactions = reactions(model.inner)
[
_gecko_reaction_name(
inner_reactions[col.reaction_idx],
col.direction,
col.isozyme_idx,
) for col in model.columns
]
end
"""
reactions(model::GeckoModel)
Returns the number of all irreversible reactions in `model`.
"""
n_reactions(model::GeckoModel) = length(model.irrev_reaction_ids)
"""
genes(model::GeckoModel)
Returns the genes (proteins) in the order as they appear as variables in the
model.
"""
genes(model::GeckoModel) = model.gene_ids
"""
n_genes(model::GeckoModel)
n_reactions(model::GeckoModel) = length(model.columns)
Returns the number of genes in the model.
"""
n_genes(model::GeckoModel) = length(model.gene_ids)
"""
metabolites(model::GeckoModel)
bounds(model::GeckoModel)
Return the metabolites in `model`.
Return variable bounds for [`GeckoModel`](@ref).
"""
metabolites(model::GeckoModel) = model.metabolites
bounds(model::GeckoModel) =
([col.lb for col in model.columns], [col.ub for col in model.columns])
"""
n_metabolites(model::GeckoModel) =
reaction_flux(model::GeckoModel)
Return the number of metabolites in `model`.
Get the mapping of the reaction rates in [`GeckoModel`](@ref) to the original
fluxes in the wrapped model.
"""
n_metabolites(model::GeckoModel) = length(metabolites(model))
reaction_flux(model::GeckoModel) =
_gecko_column_reactions(model)' * reaction_flux(model.inner)
"""
bounds(model::GeckoModel)
coupling(model::GeckoModel)
Return variable bounds for `GeckoModel`.
Return the coupling of [`GeckoModel`](@ref). That combines the coupling of
the wrapped model, coupling for split reactions, and the coupling for the total
enzyme capacity.
"""
bounds(model::GeckoModel) = (model.xl, model.xu)
coupling(model::GeckoModel) = vcat(
coupling(model.inner) * _gecko_column_reactions(model),
_gecko_reaction_coupling(model),
_gecko_gene_product_coupling(model),
)
"""
coupling(model::GeckoModel)
n_coupling_constraints(model::GeckoModel)
Coupling constraint matrix for a `GeckoModel`.
Count the coupling constraints in [`GeckoModel`](@ref) (refer to
[`coupling`](@ref) for details).
"""
coupling(model::GeckoModel) = model.C
n_coupling_constraints(model::GeckoModel) =
n_coupling_constraints(model.inner) +
length(model.coupling_row_reaction) +
length(model.coupling_row_gene_product)
"""
coupling_bounds(model::GeckoModel)
Coupling bounds for a `GeckoModel`.
"""
coupling_bounds(model::GeckoModel) = (model.cl, model.cu)
The coupling bounds for [`GeckoModel`](@ref) (refer to [`coupling`](@ref) for
details).
"""
function coupling_bounds(model::GeckoModel)
(iclb, icub) = coupling_bounds(model.inner)
(ilb, iub) = bounds(model.inner)
return (
vcat(
iclb,
ilb[model.coupling_row_reaction],
[0.0 for _ in model.coupling_row_gene_product],
),
vcat(
icub,
rub[model.coupling_row_reaction],
[c for (i, c) in model.coupling_row_gene_product],
),
)
end
"""
reaction_flux(model::MetabolicModel)
reaction_flux(model::GeckoModel)
Helper function to get fluxes from optimization problem.
"""
......
......@@ -5,7 +5,7 @@
A helper type that describes the contents of [`SMomentModel`](@ref)s.
"""
struct _smoment_column
reaction_id::Int # number of the corresponding reaction in the inner model
reaction_idx::Int # number of the corresponding reaction in the inner model