Unverified Commit 9671d5aa authored by St. Elmo's avatar St. Elmo Committed by GitHub
Browse files

Merge pull request #519 from LCSB-BioCore/mo-update-maxmin

Extend max-min driving force analysis
parents c8b2fca6 d21cde80
Pipeline #50107 passed with stages
in 21 minutes and 58 seconds
......@@ -11,130 +11,152 @@
# strategy as a tradeoff between energy yield and protein cost.", Proceedings
# of the National Academy of Sciences 110.24 (2013): 10039-10044.
using COBREXA, Tulip
# Let's first make a model of glycolysis fermentation.
mets = [
"13dpg",
"2pg",
"3pg",
"adp",
"atp",
"dhap",
"f6p",
"fdp",
"g3p",
"g6p",
"glc__D",
"h",
"h2o",
"lac__D",
"nad",
"nadh",
"pep",
"pi",
"pyr",
]
rxns = Dict(
"ENO" => Dict("2pg" => -1.0, "h2o" => 1.0, "pep" => 1.0),
"FBA" => Dict("fdp" => -1.0, "dhap" => 1.0, "g3p" => 1.0),
"GAPD" => Dict(
"g3p" => -1.0,
"nad" => -1.0,
"pi" => -1.0,
"h" => 1.0,
"nadh" => 1.0,
"13dpg" => 1.0,
),
"HEX" =>
Dict("atp" => -1.0, "glc__D" => -1.0, "g6p" => 1.0, "adp" => 1.0, "h" => 1.0),
"LDH" =>
Dict("pyr" => -1.0, "nadh" => -1.0, "h" => -1.0, "nad" => 1.0, "lac__D" => 1.0),
"PFK" => Dict("f6p" => -1.0, "atp" => -1.0, "adp" => 1.0, "h" => 1.0, "fdp" => 1.0),
"PGI" => Dict("g6p" => -1.0, "f6p" => 1.0),
"PGK" => Dict("13dpg" => -1.0, "adp" => -1.0, "atp" => 1.0, "3pg" => 1.0),
"PGM" => Dict("3pg" => -1.0, "2pg" => 1.0),
"PYK" =>
Dict("pep" => -1.0, "adp" => -1.0, "h" => -1.0, "atp" => 1.0, "pyr" => 1.0),
"TPI" => Dict("dhap" => -1.0, "g3p" => 1.0),
)
using COBREXA, GLPK, Tulip
model = StandardModel("Glycolysis")
# Let's load the core E. coli model
add_metabolites!(model, Metabolite.(mets))
add_reactions!(model, collect(Reaction(rid; metabolites = mets) for (rid, mets) in rxns))
model = load_model("e_coli_core.json")
model
# We need some thermodynamic data. You can get Gibbs free energies (ΔG⁰) e.g.
# from [eQuilibrator](https://equilibrator.weizmann.ac.il/), possibly using the
# We need some thermodynamic data. You can get reaction Gibbs free energies (ΔG⁰) from
# e.g. [eQuilibrator](https://equilibrator.weizmann.ac.il/), possibly using the
# [Julia wrapper](https://github.com/stelmo/eQuilibrator.jl) that allows you to
# automate this step. Here, we make a dictionary that maps the reaction IDs to
# calculated Gibbs free energies of reactions.
gibbs_free_energies = Dict( # ΔG⁰ in kJ/mol
"TPI" => 5.57535,
"PGK" => -19.32,
"PFK" => -14.5988,
"ENO" => -3.81089,
"PYK" => -27.5833,
"LDH" => -23.6803,
"FBA" => 22.3932,
"PGI" => 2.6617,
"GAPD" => 4.60271,
"PGM" => -4.52041,
"HEX" => -17.90,
# calculated Gibbs free energy of reaction for each reaction (including the transporters).
reaction_standard_gibbs_free_energies = Dict( # kJ/mol
"ACALD" => -21.26,
"PTAr" => 8.65,
"ALCD2x" => 17.47,
"PDH" => -34.24,
"PYK" => -24.48,
"CO2t" => 0.00,
"MALt2_2" => -6.83,
"CS" => -39.33,
"PGM" => -4.47,
"TKT1" => -1.49,
"ACONTa" => 8.46,
"GLNS" => -15.77,
"ICL" => 9.53,
"FBA" => 23.37,
"SUCCt3" => -43.97,
"FORt2" => -3.42,
"G6PDH2r" => -7.39,
"AKGDH" => -28.23,
"TKT2" => -10.31,
"FRD7" => 73.61,
"SUCOAS" => -1.15,
"FBP" => -11.60,
"ICDHyr" => 5.39,
"AKGt2r" => 10.08,
"GLUSy" => -47.21,
"TPI" => 5.62,
"FORt" => 13.50,
"ACONTb" => -1.62,
"GLNabc" => -30.19,
"RPE" => -3.38,
"ACKr" => 14.02,
"THD2" => -33.84,
"PFL" => -19.81,
"RPI" => 4.47,
"D_LACt2" => -3.42,
"TALA" => -0.94,
"PPCK" => 10.65,
"ACt2r" => -3.41,
"NH4t" => -13.60,
"PGL" => -25.94,
"NADTRHD" => -0.01,
"PGK" => 19.57,
"LDH_D" => 20.04,
"ME1" => 12.08,
"PIt2r" => 10.41,
"ATPS4r" => -37.57,
"PYRt2" => -3.42,
"GLCpts" => -45.42,
"GLUDy" => 32.83,
"CYTBD" => -59.70,
"FUMt2_2" => -6.84,
"FRUpts2" => -42.67,
"GAPD" => 0.53,
"H2Ot" => 0.00,
"PPC" => -40.81,
"NADH16" => -80.37,
"PFK" => -18.54,
"MDH" => 25.91,
"PGI" => 2.63,
"O2t" => 0.00,
"ME2" => 12.09,
"GND" => 10.31,
"SUCCt2_2" => -6.82,
"GLUN" => -14.38,
"ETOHt2r" => -16.93,
"ADK1" => 0.38,
"ACALDt" => 0.00,
"SUCDi" => -73.61,
"ENO" => -3.81,
"MALS" => -39.22,
"GLUt2r" => -3.49,
"PPS" => -6.05,
"FUM" => -3.42,
)
# In general you cannot be certain that all fluxes will be positive. This poses problems
# for systematically enforcing that ΔᵣG ≤ 0 as done in the original formulation. In COBREXA
# ΔᵣG ⋅ vᵢ ≤ 0 is instead enforced, where vᵢ is the flux of reaction i. By default all fluxes
# are assumed to be positive, but by supplying thermodynamically consistent flux solution
# it is possible to drop this implicit assumption and makes it easier to directly incorporate
# the max min driving force into non-customized models.
flux_solution = flux_balance_analysis_dict(
model,
GLPK.Optimizer;
modifications = [add_loopless_constraints()],
)
# Run max min driving force analysis with some reasonable constraints. Protons
# and water are removed from the concentration calculation of the optimization
# problem, thus we specify their IDs explicitly. The reason for this is that
# the standard Gibbs free energy change of biochemical reactions take place at
# constant pH, so proton concentration should not change to make the analysis
# behave reasonably; likewise we just assume that reactions occur in relatively
# stable aqueous environments, hence water excluded too.
# Run max min driving force analysis with some reasonable constraints. Protons and water are
# removed from the concentration calculation of the optimization problem, thus we specify
# their IDs explicitly. The reason for this is that the Gibbs free energies of biochemical
# reactions is measured at constant pH, so proton concentrations is fixed; likewise we
# assume that reactions occur in aqueous environments, hence water is excluded too.
df, dgs, concens = max_min_driving_force(
sol = max_min_driving_force(
model,
gibbs_free_energies,
reaction_standard_gibbs_free_energies,
Tulip.Optimizer;
ignore_metabolites = ["h", "h2o"],
concentration_ratios = Dict(("atp", "adp") => 10.0, ("nadh", "nad") => 0.1),
constant_concentrations = Dict("pi" => 1e-2), # constant phosphate concentration set to 10 mM
concentration_lb = 1e-6, # minimum 1 μM for all metabolites
concentration_ub = 1e-2, # maximum 10 mM or all metabolites
flux_solution = flux_solution,
proton_ids = ["h_c", "h_e"],
water_ids = ["h2o_c", "h2o_e"],
concentration_ratios = Dict(
("atp_c", "adp_c") => 10.0,
("nadh_c", "nad_c") => 0.13,
("nadph_c", "nadp_c") => 1.3,
),
concentration_lb = 1e-6,
concentration_ub = 100e-3,
ignore_reaction_ids = [
"H2Ot", # ignore water transport because water's concentration CANNOT change in the implementation of this function (also protons)
],
)
sol.mmdf
# Plot the results to show how the concentrations can be used to ensure that
# each reach proceeds "down hill" (ΔᵣG < 0) and that the driving force is as
# large as possible across all the reactions in the model. Compare this to the
# driving forces at standard conditions.
# driving forces at standard conditions. Note, we only plot glycolysis for simplicity.
# We additionally scale the fluxes according to their stoichiometry in the
# pathway. From the output, it is clear that that metabolite concentrations
# play a large role in ensuring the thermodynamic consistency of in vivo enzyme
# reactions.
relative_flux = Dict(
"HEX" => 1.0,
"PGI" => 1.0,
"PFK" => 1.0,
"FBA" => 1.0,
"TPI" => 1.0,
"GAPD" => 2.0,
"PGK" => 2.0,
"PGM" => 2.0,
"ENO" => 2.0,
"PYK" => 2.0,
"LDH" => 2.0,
)
rids = ["HEX", "PGI", "PFK", "FBA", "TPI", "GAPD", "PGK", "PGM", "ENO", "PYK", "LDH"] # in order of pathway
rid_rf = [relative_flux[rid] for rid in rids]
dg_standard = [gibbs_free_energies[rid] for rid in rids]
dg_opt = [dgs[rid] for rid in rids]
rids = ["GLCpts", "PGI", "PFK", "FBA", "TPI", "GAPD", "PGK", "PGM", "ENO", "PYK"] # glycolysis
rid_rf = [flux_solution[rid] for rid in rids]
dg_standard = cumsum([
reaction_standard_gibbs_free_energies[rid] * flux_solution[rid] for rid in rids
])
dg_standard .-= first(dg_standard)
dg_opt = cumsum([sol.dg_reactions[rid] * flux_solution[rid] for rid in rids])
dg_opt .-= first(dg_opt)
using CairoMakie
......@@ -146,29 +168,13 @@ ax = Axis(
ylabel = "Cumulative ΔG [kJ/mol]",
);
lines!(
ax,
1:length(rids),
(cumsum(dg_standard) .- first(dg_standard)) .* rid_rf;
color = :red,
label = "Standard",
)
lines!(
ax,
1:length(rids),
(cumsum(dg_opt) .- first(dg_opt)) .* rid_rf;
color = :blue,
label = "Optimized",
)
lines!(ax, 1:length(rids), dg_standard; color = :red, label = "Standard")
lines!(ax, 1:length(rids), dg_opt, color = :blue, label = "Optimized")
ax.xticks = (1:length(rids), rids)
fig[1, 2] = Legend(fig, ax, "ΔG'", framevisible = false)
fig
#md # !!! tip "Directions of reactions"
#md # Be careful when constructing models for MMDFA, the reaction directions in the model
#md # and thermodynamic data need to be consistent with the overall flux
#md # direction implied by the model. For example, in BiGG, `LDH_D` is written
#md # `lac__D + nad ⟷ h + nadh + pyr` and the associated ΔrG'⁰ is 23.6803 kJ/mol.
#md # For MMDFA no flux is calculated, so you need to write the reaction
#md # in the direction of flux, i.e. `h + nadh + pyr ⟶ lac__D + nad` with ΔrG'⁰ as
#md # -23.6803 kJ/mol.
#md # !!! tip "Tip: Thermodynamic variability"
#md # Much like flux variability, thermodynamic constraints in a model are also
#md # degenerate. Check out [`max_min_driving_force_variability`](@ref) for ways
#md # to explore the thermodynamic solution space.
"""
max_min_driving_force(
model::MetabolicModel,
gibbs_free_energies::Dict{String,Float64},
reaction_standard_gibbs_free_energies::Dict{String,Float64},
optimizer;
ignore_metabolites::Vector{String} = ["h", "h2o"],
flux_solution::Dict{String,Float64} = Dict{String,Float64}(),
proton_ids::Vector{String} = ["h_c", "h_e"],
water_ids::Vector{String} = ["h2o_c", "h2o_e"],
constant_concentrations::Dict{String,Float64} = Dict{String,Float64}(),
concentration_ratios::Dict{Tuple{String,String},Float64} = Dict{
Tuple{String,String},
Float64,
}(),
concentration_lb = 1e-6,
concentration_ub = 10e-3,
concentration_lb = 1e-9,
concentration_ub = 100e-3,
T = _constants.T,
R = _constants.R,
small_flux_tol = 1e-6,
modifications = [],
ignore_reaction_ids = [],
)
Perform a max-min driving force analysis on the `model`, as defined by Noor, et al.,
"Pathway thermodynamics highlights kinetic obstacles in central metabolism.", PLoS
computational biology, 2014.
The analysis uses the supplied `optimizer` and Gibbs free energies of the
reactions (in `gibbs_free_energies`) to find the max-min driving force, Gibbs
free energy of the reactions and the concentrations of metabolites that
optimize the following problem:
The function uses the supplied `optimizer` and `reaction_standard_gibbs_free_energies`.
Optionally, `flux_solution` can be used to set the directions of each reaction in `model`
(all reactions are assumed to proceed forward and are active by default). The supplied
`flux_solution` should be free of internal cycles i.e. thermodynamically consistent. This
optional input is important if a reaction in `model` normally runs in reverse (negative
flux). Note, reactions in `flux_solution` that are smaller than `small_flux_tol` are also
ignored in the analysis function (for numerical stability).
The max-min driving force algorithm returns the Gibbs free energy of the reactions, the
concentrations of metabolites and the actual maximum minimum driving force. The optimization
problem solved is:
```
max min -ΔᵣG
s.t. ΔᵣG = ΔG⁰ + R T S' ln(C)
ΔᵣG ≤ 0 (∀r)
s.t. ΔᵣG = ΔrG⁰ + R T S' ln(C)
ΔᵣG ≤ 0
ln(Cₗ) ≤ ln(C) ≤ ln(Cᵤ)
```
where `ΔᵣG` are the Gibbs energies dissipated by the reactions, `ΔᵣG⁰` are the
Gibbs free energies of the reactions, R is the gas constant, T is the
temperature, S is the stoichiometry of the model, and C is the vector of
metabolite concentrations (and their respective lower and upper bounds).
where `ΔrG` are the Gibbs energies dissipated by the reactions, R is the gas constant, T is
the temperature, S is the stoichiometry of the model, and C is the vector of metabolite
concentrations (and their respective lower and upper bounds).
In case no feasible solution exists, `nothing` is returned.
Metabolites specified in `ignore_metabolites` are internally ignored -- that
allows to specify e.g. removal of protons and water, thus allowing the
thermodynamic calculations to assume constant pH and aqueous conditions. Note, if using
biochemical thermodynamic data then you _must_ include the ids of protons and water here.
Reactions specified in `ignore_reaction_ids` are internally ignored when calculating the
max-min driving force. This should include water and proton importers.
Since biochemical thermodynamics are assumed, the `proton_ids` and `water_ids` need to be
specified so that they can be ignored in the calculations. Effectively this assumes an
aqueous environment at constant pH is used.
`constant_concentrations` is used to fix the concentrations of certain metabolites (such as
CO₂). `concentration_ratios` is used to specify additional constraints on metabolite pair
......@@ -55,56 +67,81 @@ optimization problems.
"""
function max_min_driving_force(
model::MetabolicModel,
gibbs_free_energies::Dict{String,Float64},
reaction_standard_gibbs_free_energies::Dict{String,Float64},
optimizer;
ignore_metabolites::Vector{String} = ["h", "h2o"],
flux_solution::Dict{String,Float64} = Dict{String,Float64}(),
proton_ids::Vector{String} = ["h_c", "h_e"],
water_ids::Vector{String} = ["h2o_c", "h2o_e"],
constant_concentrations::Dict{String,Float64} = Dict{String,Float64}(),
concentration_ratios::Dict{Tuple{String,String},Float64} = Dict{
Tuple{String,String},
Float64,
}(),
concentration_lb = 1e-6,
concentration_ub = 10e-3,
concentration_lb = 1e-9,
concentration_ub = 100e-3,
T = _constants.T,
R = _constants.R,
small_flux_tol = 1e-6,
modifications = [],
ignore_reaction_ids = [],
)
# get ΔGs in order of reactions
dg0s = [gibbs_free_energies[rid] for rid in reactions(model)]
# get all metabolites that participate in the ΔG calculations (no protons, no water)
midxs = [
midx for
(midx, mid) in enumerate(metabolites(model)) if !(mid in ignore_metabolites)
] # idx in original model
mids = metabolites(model)[midxs]
# find the corresponding metabolites
S = stoichiometry(model)[midxs, :]
# start building the optimization model
opt_model = Model(optimizer)
@variables opt_model begin
minDF
logcs[1:length(midxs)]
dgs[1:n_reactions(model)]
mmdf
logcs[1:n_metabolites(model)]
dgrs[1:n_reactions(model)]
end
# set proton log concentration to zero so that it won't impact any calculations (biothermodynamics assumption)
proton_idxs = Int.(indexin(proton_ids, metabolites(model)))
for idx in proton_idxs
JuMP.fix(logcs[idx], 0.0)
end
@constraints opt_model begin
minDF .<= -dgs
dgs .<= 0
dgs .== dg0s .+ R * T .* S' * logcs
# set water concentrations to zero (aqueous condition assumptions)
water_idxs = Int.(indexin(water_ids, metabolites(model)))
for idx in water_idxs
JuMP.fix(logcs[idx], 0.0)
end
@objective(opt_model, Max, minDF)
# only consider reactions with supplied thermodynamic data AND have a flux bigger than
# small_flux_tol => finds a thermodynamic profile that explains flux_solution
active_rids = filter(
rid ->
haskey(reaction_standard_gibbs_free_energies, rid) &&
abs(get(flux_solution, rid, small_flux_tol / 2)) > small_flux_tol &&
!(rid in ignore_reaction_ids),
reactions(model),
)
active_ridxs = Int.(indexin(active_rids, reactions(model)))
# give dummy dG0 for reactions that don't have data
dg0s =
[get(reaction_standard_gibbs_free_energies, rid, 0.0) for rid in reactions(model)]
S = stoichiometry(model)
@constraint(opt_model, dgrs .== dg0s .+ (R * T) * S' * logcs)
# thermodynamics should correspond to the fluxes
flux_signs = [sign(get(flux_solution, rid, 1.0)) for rid in reactions(model)]
# only constrain reactions that have thermo data
@constraint(opt_model, dgrs[active_ridxs] .* flux_signs[active_ridxs] .<= 0)
# add the absolute bounds
for (midx, mid) in enumerate(mids) # idx in opt_model (missing ignore_metabolites)
missing_mets =
[mid for mid in keys(constant_concentrations) if !(mid in metabolites(model))]
!isempty(missing_mets) &&
throw(DomainError(missing_mets, "metabolite(s) not found in model."))
for (midx, mid) in enumerate(metabolites(model)) # idx in opt_model (missing ignore_metabolites)
midx in water_idxs && continue
midx in proton_idxs && continue
if haskey(constant_concentrations, mid)
# we have an exact bound for this metabolite
@constraint(opt_model, logcs[midx] == log(constant_concentrations[mid]))
JuMP.fix(logcs[midx], log(constant_concentrations[mid]))
else
# this metabolite needs default bounds
# this metabolite needs bounds
@constraint(
opt_model,
log(concentration_lb) <= logcs[midx] <= log(concentration_ub)
......@@ -114,13 +151,16 @@ function max_min_driving_force(
# add the relative bounds
for ((mid1, mid2), val) in concentration_ratios
idxs = indexin([mid1, mid2], mids)
any(isnothing.(idxs)) && throw(
DomainError((mid1, mid2), "metabolite pair not found in relevant reactions"),
)
idxs = indexin([mid1, mid2], metabolites(model)) # TODO: this is not performant
any(isnothing.(idxs)) &&
throw(DomainError((mid1, mid2), "metabolite pair not found in model."))
@constraint(opt_model, logcs[idxs[1]] == log(val) + logcs[idxs[2]])
end
@constraint(opt_model, mmdf .<= -dgrs[active_ridxs] .* flux_signs[active_ridxs])
@objective(opt_model, Max, mmdf)
# apply the modifications, if any
for mod in modifications
mod(model, opt_model)
......@@ -131,8 +171,139 @@ function max_min_driving_force(
is_solved(opt_model) || return nothing
return (
mmdf = objective_value(opt_model),
dgs = Dict(rid => value(dgs[i]) for (i, rid) in enumerate(reactions(model))),
cs = Dict(mid => exp(value(logcs[i])) for (i, mid) in enumerate(mids)),
mmdf = value(opt_model[:mmdf]),
dg_reactions = Dict(
rid => value(opt_model[:dgrs][i]) for (i, rid) in enumerate(reactions(model))
),
concentrations = Dict(
mid => exp(value(opt_model[:logcs][i])) for
(i, mid) in enumerate(metabolites(model))
),
)
end
"""
max_min_driving_force_variability(
model::MetabolicModel,
reaction_standard_gibbs_free_energies::Dict{String,Float64},
optimizer;
workers =[myid()],
optimal_objective_value = nothing,
bounds = z -> (z, Inf),
flux_solution::Dict{String,Float64} = Dict{String,Float64}(),
proton_ids::Vector{String} = ["h_c", "h_e"],
water_ids::Vector{String} = ["h2o_c", "h2o_e"],
constant_concentrations::Dict{String,Float64} = Dict{String,Float64}(),
concentration_ratios::Dict{Tuple{String,String},Float64} = Dict{
Tuple{String,String},
Float64,
}(),
concentration_lb = 1e-9,
concentration_ub = 100e-3,
T = _constants.T,
R = _constants.R,
small_flux_tol = 1e-6,
modifications = [],
ignore_reaction_ids = [],
)
Perform a variant of flux variability analysis on a max min driving force type problem.
Arguments are forwarded to [`max_min_driving_force`](@ref). Calls [`screen`](@ref)
internally and possibly distributes computation across `workers`. If
`optimal_objective_value = nothing`, the function first performs regular max min driving
force analysis to find the max min driving force of the model and sets this to
`optimal_objective_value`. Then iteratively maximizes and minimizes the driving force across
each reaction, and then the concentrations while staying close to the original max min
driving force as specified in `bounds`.
The `bounds` is a user-supplied function that specifies the max min driving force bounds for
the variability optimizations, by default it restricts the flux objective value to the
precise optimum reached in the normal max min driving force analysis. It can return `-Inf`
and `Inf` in first and second pair to remove the limit. Use [`gamma_bounds`](@ref) and
[`objective_bounds`](@ref) for simple bounds.
Returns a matrix of solutions to [`max_min_driving_force`](@ref) additionally constrained as
described above, where the rows are in the order of the reactions and then the metabolites
of the `model`. For the reaction rows the first column is the maximum dG of that reaction,
and the second column is the minimum dG of that reaction subject to the above constraints.
For the metabolite rows, the first column is the maximum concentration, and the second column
is the minimum concentration subject to the constraints above.
"""
function max_min_driving_force_variability(
model::MetabolicModel,
reaction_standard_gibbs_free_energies::Dict{String,Float64},
optimizer;
workers = [myid()],
optimal_objective_value = nothing,
bounds = z -> (z, Inf),
modifications = [],
kwargs...,
)
if isnothing(optimal_objective_value)
initsol = max_min_driving_force(
model,
reaction_standard_gibbs_free_energies,
optimizer;
modifications,
kwargs...,
)
mmdf = initsol.mmdf
else
mmdf = optimal_objective_value
end
lb, ub = bounds(mmdf)
dgr_variants = [
[[_mmdf_add_df_bound(lb, ub), _mmdf_dgr_objective(ridx, sense)]] for
ridx = 1:n_reactions(model), sense in [MOI.MAX_SENSE, MOI.MIN_SENSE]
]
concen_variants = [
[[_mmdf_add_df_bound(lb, ub), _mmdf_concen_objective(midx, sense)]] for
midx = 1:n_metabolites(model), sense in [MOI.MAX_SENSE, MOI.MIN_SENSE]