Unverified Commit d21cde80 authored by St. Elmo's avatar St. Elmo
Browse files

Extended max min driving force to general models and implement variability analysis

parent 0cb74ae7
Pipeline #50106 passed with stages
in 9 minutes and 19 seconds
...@@ -11,130 +11,152 @@ ...@@ -11,130 +11,152 @@
# strategy as a tradeoff between energy yield and protein cost.", Proceedings # strategy as a tradeoff between energy yield and protein cost.", Proceedings
# of the National Academy of Sciences 110.24 (2013): 10039-10044. # of the National Academy of Sciences 110.24 (2013): 10039-10044.
using COBREXA, Tulip using COBREXA, GLPK, Tulip
# Let's first make a model of glycolysis fermentation.
mets = [
"13dpg",
"2pg",
"3pg",
"adp",
"atp",
"dhap",
"f6p",
"fdp",
"g3p",
"g6p",
"glc__D",
"h",
"h2o",
"lac__D",
"nad",
"nadh",
"pep",
"pi",
"pyr",
]
rxns = Dict(
"ENO" => Dict("2pg" => -1.0, "h2o" => 1.0, "pep" => 1.0),
"FBA" => Dict("fdp" => -1.0, "dhap" => 1.0, "g3p" => 1.0),
"GAPD" => Dict(
"g3p" => -1.0,
"nad" => -1.0,
"pi" => -1.0,
"h" => 1.0,
"nadh" => 1.0,
"13dpg" => 1.0,
),
"HEX" =>
Dict("atp" => -1.0, "glc__D" => -1.0, "g6p" => 1.0, "adp" => 1.0, "h" => 1.0),
"LDH" =>
Dict("pyr" => -1.0, "nadh" => -1.0, "h" => -1.0, "nad" => 1.0, "lac__D" => 1.0),
"PFK" => Dict("f6p" => -1.0, "atp" => -1.0, "adp" => 1.0, "h" => 1.0, "fdp" => 1.0),
"PGI" => Dict("g6p" => -1.0, "f6p" => 1.0),
"PGK" => Dict("13dpg" => -1.0, "adp" => -1.0, "atp" => 1.0, "3pg" => 1.0),
"PGM" => Dict("3pg" => -1.0, "2pg" => 1.0),
"PYK" =>
Dict("pep" => -1.0, "adp" => -1.0, "h" => -1.0, "atp" => 1.0, "pyr" => 1.0),
"TPI" => Dict("dhap" => -1.0, "g3p" => 1.0),
)
model = StandardModel("Glycolysis") # Let's load the core E. coli model
add_metabolites!(model, Metabolite.(mets)) model = load_model("e_coli_core.json")
add_reactions!(model, collect(Reaction(rid; metabolites = mets) for (rid, mets) in rxns))
model # We need some thermodynamic data. You can get reaction Gibbs free energies (ΔG⁰) from
# e.g. [eQuilibrator](https://equilibrator.weizmann.ac.il/), possibly using the
# We need some thermodynamic data. You can get Gibbs free energies (ΔG⁰) e.g.
# from [eQuilibrator](https://equilibrator.weizmann.ac.il/), possibly using the
# [Julia wrapper](https://github.com/stelmo/eQuilibrator.jl) that allows you to # [Julia wrapper](https://github.com/stelmo/eQuilibrator.jl) that allows you to
# automate this step. Here, we make a dictionary that maps the reaction IDs to # automate this step. Here, we make a dictionary that maps the reaction IDs to
# calculated Gibbs free energies of reactions. # calculated Gibbs free energy of reaction for each reaction (including the transporters).
gibbs_free_energies = Dict( # ΔG⁰ in kJ/mol reaction_standard_gibbs_free_energies = Dict( # kJ/mol
"TPI" => 5.57535, "ACALD" => -21.26,
"PGK" => -19.32, "PTAr" => 8.65,
"PFK" => -14.5988, "ALCD2x" => 17.47,
"ENO" => -3.81089, "PDH" => -34.24,
"PYK" => -27.5833, "PYK" => -24.48,
"LDH" => -23.6803, "CO2t" => 0.00,
"FBA" => 22.3932, "MALt2_2" => -6.83,
"PGI" => 2.6617, "CS" => -39.33,
"GAPD" => 4.60271, "PGM" => -4.47,
"PGM" => -4.52041, "TKT1" => -1.49,
"HEX" => -17.90, "ACONTa" => 8.46,
"GLNS" => -15.77,
"ICL" => 9.53,
"FBA" => 23.37,
"SUCCt3" => -43.97,
"FORt2" => -3.42,
"G6PDH2r" => -7.39,
"AKGDH" => -28.23,
"TKT2" => -10.31,
"FRD7" => 73.61,
"SUCOAS" => -1.15,
"FBP" => -11.60,
"ICDHyr" => 5.39,
"AKGt2r" => 10.08,
"GLUSy" => -47.21,
"TPI" => 5.62,
"FORt" => 13.50,
"ACONTb" => -1.62,
"GLNabc" => -30.19,
"RPE" => -3.38,
"ACKr" => 14.02,
"THD2" => -33.84,
"PFL" => -19.81,
"RPI" => 4.47,
"D_LACt2" => -3.42,
"TALA" => -0.94,
"PPCK" => 10.65,
"ACt2r" => -3.41,
"NH4t" => -13.60,
"PGL" => -25.94,
"NADTRHD" => -0.01,
"PGK" => 19.57,
"LDH_D" => 20.04,
"ME1" => 12.08,
"PIt2r" => 10.41,
"ATPS4r" => -37.57,
"PYRt2" => -3.42,
"GLCpts" => -45.42,
"GLUDy" => 32.83,
"CYTBD" => -59.70,
"FUMt2_2" => -6.84,
"FRUpts2" => -42.67,
"GAPD" => 0.53,
"H2Ot" => 0.00,
"PPC" => -40.81,
"NADH16" => -80.37,
"PFK" => -18.54,
"MDH" => 25.91,
"PGI" => 2.63,
"O2t" => 0.00,
"ME2" => 12.09,
"GND" => 10.31,
"SUCCt2_2" => -6.82,
"GLUN" => -14.38,
"ETOHt2r" => -16.93,
"ADK1" => 0.38,
"ACALDt" => 0.00,
"SUCDi" => -73.61,
"ENO" => -3.81,
"MALS" => -39.22,
"GLUt2r" => -3.49,
"PPS" => -6.05,
"FUM" => -3.42,
)
# In general you cannot be certain that all fluxes will be positive. This poses problems
# for systematically enforcing that ΔᵣG ≤ 0 as done in the original formulation. In COBREXA
# ΔᵣG ⋅ vᵢ ≤ 0 is instead enforced, where vᵢ is the flux of reaction i. By default all fluxes
# are assumed to be positive, but by supplying thermodynamically consistent flux solution
# it is possible to drop this implicit assumption and makes it easier to directly incorporate
# the max min driving force into non-customized models.
flux_solution = flux_balance_analysis_dict(
model,
GLPK.Optimizer;
modifications = [add_loopless_constraints()],
) )
# Run max min driving force analysis with some reasonable constraints. Protons # Run max min driving force analysis with some reasonable constraints. Protons and water are
# and water are removed from the concentration calculation of the optimization # removed from the concentration calculation of the optimization problem, thus we specify
# problem, thus we specify their IDs explicitly. The reason for this is that # their IDs explicitly. The reason for this is that the Gibbs free energies of biochemical
# the standard Gibbs free energy change of biochemical reactions take place at # reactions is measured at constant pH, so proton concentrations is fixed; likewise we
# constant pH, so proton concentration should not change to make the analysis # assume that reactions occur in aqueous environments, hence water is excluded too.
# behave reasonably; likewise we just assume that reactions occur in relatively
# stable aqueous environments, hence water excluded too.
df, dgs, concens = max_min_driving_force( sol = max_min_driving_force(
model, model,
gibbs_free_energies, reaction_standard_gibbs_free_energies,
Tulip.Optimizer; Tulip.Optimizer;
ignore_metabolites = ["h", "h2o"], flux_solution = flux_solution,
concentration_ratios = Dict(("atp", "adp") => 10.0, ("nadh", "nad") => 0.1), proton_ids = ["h_c", "h_e"],
constant_concentrations = Dict("pi" => 1e-2), # constant phosphate concentration set to 10 mM water_ids = ["h2o_c", "h2o_e"],
concentration_lb = 1e-6, # minimum 1 μM for all metabolites concentration_ratios = Dict(
concentration_ub = 1e-2, # maximum 10 mM or all metabolites ("atp_c", "adp_c") => 10.0,
("nadh_c", "nad_c") => 0.13,
("nadph_c", "nadp_c") => 1.3,
),
concentration_lb = 1e-6,
concentration_ub = 100e-3,
ignore_reaction_ids = [
"H2Ot", # ignore water transport because water's concentration CANNOT change in the implementation of this function (also protons)
],
) )
sol.mmdf
# Plot the results to show how the concentrations can be used to ensure that # Plot the results to show how the concentrations can be used to ensure that
# each reach proceeds "down hill" (ΔᵣG < 0) and that the driving force is as # each reach proceeds "down hill" (ΔᵣG < 0) and that the driving force is as
# large as possible across all the reactions in the model. Compare this to the # large as possible across all the reactions in the model. Compare this to the
# driving forces at standard conditions. # driving forces at standard conditions. Note, we only plot glycolysis for simplicity.
# We additionally scale the fluxes according to their stoichiometry in the # We additionally scale the fluxes according to their stoichiometry in the
# pathway. From the output, it is clear that that metabolite concentrations # pathway. From the output, it is clear that that metabolite concentrations
# play a large role in ensuring the thermodynamic consistency of in vivo enzyme # play a large role in ensuring the thermodynamic consistency of in vivo enzyme
# reactions. # reactions.
relative_flux = Dict( rids = ["GLCpts", "PGI", "PFK", "FBA", "TPI", "GAPD", "PGK", "PGM", "ENO", "PYK"] # glycolysis
"HEX" => 1.0, rid_rf = [flux_solution[rid] for rid in rids]
"PGI" => 1.0, dg_standard = cumsum([
"PFK" => 1.0, reaction_standard_gibbs_free_energies[rid] * flux_solution[rid] for rid in rids
"FBA" => 1.0, ])
"TPI" => 1.0, dg_standard .-= first(dg_standard)
"GAPD" => 2.0, dg_opt = cumsum([sol.dg_reactions[rid] * flux_solution[rid] for rid in rids])
"PGK" => 2.0, dg_opt .-= first(dg_opt)
"PGM" => 2.0,
"ENO" => 2.0,
"PYK" => 2.0,
"LDH" => 2.0,
)
rids = ["HEX", "PGI", "PFK", "FBA", "TPI", "GAPD", "PGK", "PGM", "ENO", "PYK", "LDH"] # in order of pathway
rid_rf = [relative_flux[rid] for rid in rids]
dg_standard = [gibbs_free_energies[rid] for rid in rids]
dg_opt = [dgs[rid] for rid in rids]
using CairoMakie using CairoMakie
...@@ -146,29 +168,13 @@ ax = Axis( ...@@ -146,29 +168,13 @@ ax = Axis(
ylabel = "Cumulative ΔG [kJ/mol]", ylabel = "Cumulative ΔG [kJ/mol]",
); );
lines!( lines!(ax, 1:length(rids), dg_standard; color = :red, label = "Standard")
ax, lines!(ax, 1:length(rids), dg_opt, color = :blue, label = "Optimized")
1:length(rids),
(cumsum(dg_standard) .- first(dg_standard)) .* rid_rf;
color = :red,
label = "Standard",
)
lines!(
ax,
1:length(rids),
(cumsum(dg_opt) .- first(dg_opt)) .* rid_rf;
color = :blue,
label = "Optimized",
)
ax.xticks = (1:length(rids), rids) ax.xticks = (1:length(rids), rids)
fig[1, 2] = Legend(fig, ax, "ΔG'", framevisible = false) fig[1, 2] = Legend(fig, ax, "ΔG'", framevisible = false)
fig fig
#md # !!! tip "Directions of reactions" #md # !!! tip "Tip: Thermodynamic variability"
#md # Be careful when constructing models for MMDFA, the reaction directions in the model #md # Much like flux variability, thermodynamic constraints in a model are also
#md # and thermodynamic data need to be consistent with the overall flux #md # degenerate. Check out [`max_min_driving_force_variability`](@ref) for ways
#md # direction implied by the model. For example, in BiGG, `LDH_D` is written #md # to explore the thermodynamic solution space.
#md # `lac__D + nad ⟷ h + nadh + pyr` and the associated ΔrG'⁰ is 23.6803 kJ/mol.
#md # For MMDFA no flux is calculated, so you need to write the reaction
#md # in the direction of flux, i.e. `h + nadh + pyr ⟶ lac__D + nad` with ΔrG'⁰ as
#md # -23.6803 kJ/mol.
""" """
max_min_driving_force( max_min_driving_force(
model::MetabolicModel, model::MetabolicModel,
gibbs_free_energies::Dict{String,Float64}, reaction_standard_gibbs_free_energies::Dict{String,Float64},
optimizer; optimizer;
ignore_metabolites::Vector{String} = ["h", "h2o"], flux_solution::Dict{String,Float64} = Dict{String,Float64}(),
proton_ids::Vector{String} = ["h_c", "h_e"],
water_ids::Vector{String} = ["h2o_c", "h2o_e"],
constant_concentrations::Dict{String,Float64} = Dict{String,Float64}(), constant_concentrations::Dict{String,Float64} = Dict{String,Float64}(),
concentration_ratios::Dict{Tuple{String,String},Float64} = Dict{ concentration_ratios::Dict{Tuple{String,String},Float64} = Dict{
Tuple{String,String}, Tuple{String,String},
Float64, Float64,
}(), }(),
concentration_lb = 1e-6, concentration_lb = 1e-9,
concentration_ub = 10e-3, concentration_ub = 100e-3,
T = _constants.T, T = _constants.T,
R = _constants.R, R = _constants.R,
small_flux_tol = 1e-6,
modifications = [], modifications = [],
ignore_reaction_ids = [],
) )
Perform a max-min driving force analysis on the `model`, as defined by Noor, et al., Perform a max-min driving force analysis on the `model`, as defined by Noor, et al.,
"Pathway thermodynamics highlights kinetic obstacles in central metabolism.", PLoS "Pathway thermodynamics highlights kinetic obstacles in central metabolism.", PLoS
computational biology, 2014. computational biology, 2014.
The analysis uses the supplied `optimizer` and Gibbs free energies of the The function uses the supplied `optimizer` and `reaction_standard_gibbs_free_energies`.
reactions (in `gibbs_free_energies`) to find the max-min driving force, Gibbs Optionally, `flux_solution` can be used to set the directions of each reaction in `model`
free energy of the reactions and the concentrations of metabolites that (all reactions are assumed to proceed forward and are active by default). The supplied
optimize the following problem: `flux_solution` should be free of internal cycles i.e. thermodynamically consistent. This
optional input is important if a reaction in `model` normally runs in reverse (negative
flux). Note, reactions in `flux_solution` that are smaller than `small_flux_tol` are also
ignored in the analysis function (for numerical stability).
The max-min driving force algorithm returns the Gibbs free energy of the reactions, the
concentrations of metabolites and the actual maximum minimum driving force. The optimization
problem solved is:
``` ```
max min -ΔᵣG max min -ΔᵣG
s.t. ΔᵣG = ΔG⁰ + R T S' ln(C) s.t. ΔᵣG = ΔrG⁰ + R T S' ln(C)
ΔᵣG ≤ 0 (∀r) ΔᵣG ≤ 0
ln(Cₗ) ≤ ln(C) ≤ ln(Cᵤ) ln(Cₗ) ≤ ln(C) ≤ ln(Cᵤ)
``` ```
where `ΔᵣG` are the Gibbs energies dissipated by the reactions, `ΔᵣG⁰` are the where `ΔrG` are the Gibbs energies dissipated by the reactions, R is the gas constant, T is
Gibbs free energies of the reactions, R is the gas constant, T is the the temperature, S is the stoichiometry of the model, and C is the vector of metabolite
temperature, S is the stoichiometry of the model, and C is the vector of concentrations (and their respective lower and upper bounds).
metabolite concentrations (and their respective lower and upper bounds).
In case no feasible solution exists, `nothing` is returned. In case no feasible solution exists, `nothing` is returned.
Metabolites specified in `ignore_metabolites` are internally ignored -- that Reactions specified in `ignore_reaction_ids` are internally ignored when calculating the
allows to specify e.g. removal of protons and water, thus allowing the max-min driving force. This should include water and proton importers.
thermodynamic calculations to assume constant pH and aqueous conditions. Note, if using
biochemical thermodynamic data then you _must_ include the ids of protons and water here. Since biochemical thermodynamics are assumed, the `proton_ids` and `water_ids` need to be
specified so that they can be ignored in the calculations. Effectively this assumes an
aqueous environment at constant pH is used.
`constant_concentrations` is used to fix the concentrations of certain metabolites (such as `constant_concentrations` is used to fix the concentrations of certain metabolites (such as
CO₂). `concentration_ratios` is used to specify additional constraints on metabolite pair CO₂). `concentration_ratios` is used to specify additional constraints on metabolite pair
...@@ -55,56 +67,81 @@ optimization problems. ...@@ -55,56 +67,81 @@ optimization problems.
""" """
function max_min_driving_force( function max_min_driving_force(
model::MetabolicModel, model::MetabolicModel,
gibbs_free_energies::Dict{String,Float64}, reaction_standard_gibbs_free_energies::Dict{String,Float64},
optimizer; optimizer;
ignore_metabolites::Vector{String} = ["h", "h2o"], flux_solution::Dict{String,Float64} = Dict{String,Float64}(),
proton_ids::Vector{String} = ["h_c", "h_e"],
water_ids::Vector{String} = ["h2o_c", "h2o_e"],
constant_concentrations::Dict{String,Float64} = Dict{String,Float64}(), constant_concentrations::Dict{String,Float64} = Dict{String,Float64}(),
concentration_ratios::Dict{Tuple{String,String},Float64} = Dict{ concentration_ratios::Dict{Tuple{String,String},Float64} = Dict{
Tuple{String,String}, Tuple{String,String},
Float64, Float64,
}(), }(),
concentration_lb = 1e-6, concentration_lb = 1e-9,
concentration_ub = 10e-3, concentration_ub = 100e-3,
T = _constants.T, T = _constants.T,
R = _constants.R, R = _constants.R,
small_flux_tol = 1e-6,
modifications = [], modifications = [],
ignore_reaction_ids = [],
) )
# get ΔGs in order of reactions
dg0s = [gibbs_free_energies[rid] for rid in reactions(model)]
# get all metabolites that participate in the ΔG calculations (no protons, no water)
midxs = [
midx for
(midx, mid) in enumerate(metabolites(model)) if !(mid in ignore_metabolites)
] # idx in original model
mids = metabolites(model)[midxs]
# find the corresponding metabolites
S = stoichiometry(model)[midxs, :]
# start building the optimization model
opt_model = Model(optimizer) opt_model = Model(optimizer)
@variables opt_model begin @variables opt_model begin
minDF mmdf
logcs[1:length(midxs)] logcs[1:n_metabolites(model)]
dgs[1:n_reactions(model)] dgrs[1:n_reactions(model)]
end
# set proton log concentration to zero so that it won't impact any calculations (biothermodynamics assumption)
proton_idxs = Int.(indexin(proton_ids, metabolites(model)))
for idx in proton_idxs
JuMP.fix(logcs[idx], 0.0)
end end
@constraints opt_model begin # set water concentrations to zero (aqueous condition assumptions)
minDF .<= -dgs water_idxs = Int.(indexin(water_ids, metabolites(model)))
dgs .<= 0 for idx in water_idxs
dgs .== dg0s .+ R * T .* S' * logcs JuMP.fix(logcs[idx], 0.0)
end end
@objective(opt_model, Max, minDF) # only consider reactions with supplied thermodynamic data AND have a flux bigger than
# small_flux_tol => finds a thermodynamic profile that explains flux_solution
active_rids = filter(
rid ->
haskey(reaction_standard_gibbs_free_energies, rid) &&
abs(get(flux_solution, rid, small_flux_tol / 2)) > small_flux_tol &&
!(rid in ignore_reaction_ids),
reactions(model),
)
active_ridxs = Int.(indexin(active_rids, reactions(model)))
# give dummy dG0 for reactions that don't have data
dg0s =
[get(reaction_standard_gibbs_free_energies, rid, 0.0) for rid in reactions(model)]
S = stoichiometry(model)
@constraint(opt_model, dgrs .== dg0s .+ (R * T) * S' * logcs)
# thermodynamics should correspond to the fluxes
flux_signs = [sign(get(flux_solution, rid, 1.0)) for rid in reactions(model)]
# only constrain reactions that have thermo data
@constraint(opt_model, dgrs[active_ridxs] .* flux_signs[active_ridxs] .<= 0)
# add the absolute bounds # add the absolute bounds
for (midx, mid) in enumerate(mids) # idx in opt_model (missing ignore_metabolites) missing_mets =
[mid for mid in keys(constant_concentrations) if !(mid in metabolites(model))]
!isempty(missing_mets) &&
throw(DomainError(missing_mets, "metabolite(s) not found in model."))
for (midx, mid) in enumerate(metabolites(model)) # idx in opt_model (missing ignore_metabolites)
midx in water_idxs && continue
midx in proton_idxs && continue
if haskey(constant_concentrations, mid) if haskey(constant_concentrations, mid)
# we have an exact bound for this metabolite JuMP.fix(logcs[midx], log(constant_concentrations[mid]))
@constraint(opt_model, logcs[midx] == log(constant_concentrations[mid]))
else else
# this metabolite needs default bounds # this metabolite needs bounds
@constraint( @constraint(
opt_model, opt_model,
log(concentration_lb) <= logcs[midx] <= log(concentration_ub) log(concentration_lb) <= logcs[midx] <= log(concentration_ub)
...@@ -114,13 +151,16 @@ function max_min_driving_force( ...@@ -114,13 +151,16 @@ function max_min_driving_force(
# add the relative bounds # add the relative bounds
for ((mid1, mid2), val) in concentration_ratios for ((mid1, mid2), val) in concentration_ratios
idxs = indexin([mid1, mid2], mids) idxs = indexin([mid1, mid2], metabolites(model)) # TODO: this is not performant
any(isnothing.(idxs)) && throw( any(isnothing.(idxs)) &&
DomainError((mid1, mid2), "metabolite pair not found in relevant reactions"), throw(DomainError((mid1, mid2), "metabolite pair not found in model."))
)
@constraint(opt_model, logcs[idxs[1]] == log(val) + logcs[idxs[2]]) @constraint(opt_model, logcs[idxs[1]] == log(val) + logcs[idxs[2]])
end end
@constraint(opt_model, mmdf .<= -dgrs[active_ridxs] .* flux_signs[active_ridxs])
@objective(opt_model, Max, mmdf)
# apply the modifications, if any # apply the modifications, if any
for mod in modifications for mod in modifications
mod(model, opt_model) mod(model, opt_model)
...@@ -131,8 +171,139 @@ function max_min_driving_force( ...@@ -131,8 +171,139 @@ function max_min_driving_force(
is_solved(opt_model) || return nothing is_solved(opt_model) || return nothing
return ( return (
mmdf = objective_value(opt_model), mmdf = value(opt_model[:mmdf]),
dgs = Dict(rid => value(dgs[i]) for (i, rid) in enumerate(reactions(model))), dg_reactions = Dict(
cs = Dict(mid => exp(value(logcs[i])) for (i, mid) in enumerate(mids)), rid => value(opt_model[:dgrs][i]) for (i, rid) in enumerate(reactions(model))
),
concentrations = Dict(
mid => exp(value(opt_model[:logcs][i])) for
(i, mid) in enumerate(metabolites(model))
),
)
end
"""
max_min_driving_force_variability(
model::MetabolicModel,
reaction_standard_gibbs_free_energies::Dict{String,Float64},
optimizer;
workers =[myid()],
optimal_objective_value = nothing,
bounds = z -> (z, Inf),
flux_solution::Dict{String,Float64} = Dict{String,Float64}(),
proton_ids::Vector{String} = ["h_c", "h_e"],
water_ids::Vector{String} = ["h2o_c", "h2o_e"],
constant_concentrations::Dict{String,Float64} = Dict{String,Float64}(),
concentration_ratios::Dict{Tuple{String,String},Float64} = Dict{
Tuple{String,String},
Float64,
}(),
concentration_lb = 1e-9,
concentration_ub = 100e-3,
T = _constants.T,