Unverified Commit da5e807e authored by Laurent Heirendt's avatar Laurent Heirendt Committed by GitHub
Browse files

Merge pull request #537 from LCSB-BioCore/develop

Regular merge develop→master
parents c0dcab54 181120f3
Pipeline #50425 passed with stages
in 14 minutes and 35 seconds
......@@ -44,9 +44,9 @@ variables:
# Test environment & platform settings
#
.global_julia15: &global_julia15
.global_julia17: &global_julia17
variables:
JULIA_VER: "v1.5.3"
JULIA_VER: "v1.7.0"
.global_julia16: &global_julia16
variables:
......@@ -85,7 +85,7 @@ variables:
# any available docker and current julia
#
docker:julia1.6:
docker:julia1.7:
stage: test
image: $CI_REGISTRY/r3/docker/julia-custom
script:
......@@ -99,12 +99,12 @@ docker:julia1.6:
# built & deployed
#
linux:julia1.5:
linux:julia1.7:
stage: test
tags:
- slave01
<<: *global_trigger_full_tests
<<: *global_julia15
<<: *global_julia17
<<: *global_env_linux
linux:julia1.6:
......@@ -119,22 +119,22 @@ linux:julia1.6:
# Additional platform&environment compatibility tests
#
windows8:julia1.5:
windows8:julia1.7:
stage: test-compat
<<: *global_trigger_compat_tests
<<: *global_julia15
<<: *global_julia17
<<: *global_env_win8
windows10:julia1.5:
windows10:julia1.7:
stage: test-compat
<<: *global_trigger_compat_tests
<<: *global_julia15
<<: *global_julia17
<<: *global_env_win10
mac:julia1.5:
mac:julia1.7:
stage: test-compat
<<: *global_trigger_compat_tests
<<: *global_julia15
<<: *global_julia17
<<: *global_env_mac
windows8:julia1.6:
......@@ -221,6 +221,7 @@ documentation-assets:gource:
- docker run -v "$PWD":/visualization $CI_REGISTRY/r3/docker/gource
artifacts:
paths: ['output.gif']
expire_in: 1 year
<<: *global_trigger_build_doc
#
......@@ -238,12 +239,12 @@ documentation-assets:gource:
- julia --project=@. -e 'import Pkg; Pkg.instantiate();'
- julia --project=@. --color=yes test/doctests.jl
doc-tests-pr:julia1.6:
doc-tests-pr:julia1.7:
stage: documentation
<<: *global_doctests
<<: *global_trigger_pull_request
doc-tests:julia1.6:
doc-tests:julia1.7:
stage: test
<<: *global_doctests
<<: *global_trigger_full_tests
......
name = "COBREXA"
uuid = "babc4406-5200-4a30-9033-bf5ae714c842"
authors = ["The developers of COBREXA.jl"]
version = "1.1"
version = "1.1.1"
[deps]
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
DistributedData = "f6a0035f-c5ac-4ad0-b410-ad102ced35df"
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
......@@ -23,29 +22,25 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
[compat]
DistributedData = "0.1.4"
Documenter = "0.26"
JSON = "0.21"
JuMP = "0.21"
Literate = "2.8"
MAT = "0.10"
MacroTools = "0.5.6"
OSQP = "0.6"
OrderedCollections = "1.4"
SBML = "0.7, 0.8"
SBML = "0.7, 0.8.2"
StableRNGs = "1.0"
Tulip = "0.7"
julia = "1"
julia = "1.5, 1.6, 1.7"
[extras]
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
GLPK = "60bf3e95-4087-53dc-ae20-288a0d20c6a6"
Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
OSQP = "ab2f91bb-94b4-55e3-9ba0-7f65df51de79"
SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Tulip = "6dd1b50a-3aae-11e9-10b5-ef983d2400fa"
[targets]
test = ["Aqua", "Downloads", "OSQP", "SHA", "Test", "Tulip", "GLPK"]
test = ["Aqua", "Downloads", "GLPK", "OSQP", "SHA", "Test", "Tulip"]
......@@ -12,3 +12,4 @@ Tulip = "6dd1b50a-3aae-11e9-10b5-ef983d2400fa"
[compat]
Documenter = "0.26"
Literate = "2.8"
......@@ -11,130 +11,163 @@
# strategy as a tradeoff between energy yield and protein cost.", Proceedings
# of the National Academy of Sciences 110.24 (2013): 10039-10044.
using COBREXA, Tulip
# Let's first make a model of glycolysis fermentation.
mets = [
"13dpg",
"2pg",
"3pg",
"adp",
"atp",
"dhap",
"f6p",
"fdp",
"g3p",
"g6p",
"glc__D",
"h",
"h2o",
"lac__D",
"nad",
"nadh",
"pep",
"pi",
"pyr",
]
rxns = Dict(
"ENO" => Dict("2pg" => -1.0, "h2o" => 1.0, "pep" => 1.0),
"FBA" => Dict("fdp" => -1.0, "dhap" => 1.0, "g3p" => 1.0),
"GAPD" => Dict(
"g3p" => -1.0,
"nad" => -1.0,
"pi" => -1.0,
"h" => 1.0,
"nadh" => 1.0,
"13dpg" => 1.0,
),
"HEX" =>
Dict("atp" => -1.0, "glc__D" => -1.0, "g6p" => 1.0, "adp" => 1.0, "h" => 1.0),
"LDH" =>
Dict("pyr" => -1.0, "nadh" => -1.0, "h" => -1.0, "nad" => 1.0, "lac__D" => 1.0),
"PFK" => Dict("f6p" => -1.0, "atp" => -1.0, "adp" => 1.0, "h" => 1.0, "fdp" => 1.0),
"PGI" => Dict("g6p" => -1.0, "f6p" => 1.0),
"PGK" => Dict("13dpg" => -1.0, "adp" => -1.0, "atp" => 1.0, "3pg" => 1.0),
"PGM" => Dict("3pg" => -1.0, "2pg" => 1.0),
"PYK" =>
Dict("pep" => -1.0, "adp" => -1.0, "h" => -1.0, "atp" => 1.0, "pyr" => 1.0),
"TPI" => Dict("dhap" => -1.0, "g3p" => 1.0),
)
using COBREXA, GLPK, Tulip
model = StandardModel("Glycolysis")
# Let's load the core E. coli model
add_metabolites!(model, Metabolite.(mets))
add_reactions!(model, collect(Reaction(rid; metabolites = mets) for (rid, mets) in rxns))
model = load_model("e_coli_core.json")
model
# We need some thermodynamic data. You can get Gibbs free energies (ΔG⁰) e.g.
# from [eQuilibrator](https://equilibrator.weizmann.ac.il/), possibly using the
# We need some thermodynamic data. You can get reaction Gibbs free energies (ΔG⁰) from
# e.g. [eQuilibrator](https://equilibrator.weizmann.ac.il/), possibly using the
# [Julia wrapper](https://github.com/stelmo/eQuilibrator.jl) that allows you to
# automate this step. Here, we make a dictionary that maps the reaction IDs to
# calculated Gibbs free energies of reactions.
gibbs_free_energies = Dict( # ΔG⁰ in kJ/mol
"TPI" => 5.57535,
"PGK" => -19.32,
"PFK" => -14.5988,
"ENO" => -3.81089,
"PYK" => -27.5833,
"LDH" => -23.6803,
"FBA" => 22.3932,
"PGI" => 2.6617,
"GAPD" => 4.60271,
"PGM" => -4.52041,
"HEX" => -17.90,
# calculated Gibbs free energy of reaction for each reaction (including the transporters).
reaction_standard_gibbs_free_energies = Dict( # kJ/mol
"ACALD" => -21.26,
"PTAr" => 8.65,
"ALCD2x" => 17.47,
"PDH" => -34.24,
"PYK" => -24.48,
"CO2t" => 0.00,
"MALt2_2" => -6.83,
"CS" => -39.33,
"PGM" => -4.47,
"TKT1" => -1.49,
"ACONTa" => 8.46,
"GLNS" => -15.77,
"ICL" => 9.53,
"FBA" => 23.37,
"SUCCt3" => -43.97,
"FORt2" => -3.42,
"G6PDH2r" => -7.39,
"AKGDH" => -28.23,
"TKT2" => -10.31,
"FRD7" => 73.61,
"SUCOAS" => -1.15,
"FBP" => -11.60,
"ICDHyr" => 5.39,
"AKGt2r" => 10.08,
"GLUSy" => -47.21,
"TPI" => 5.62,
"FORt" => 13.50,
"ACONTb" => -1.62,
"GLNabc" => -30.19,
"RPE" => -3.38,
"ACKr" => 14.02,
"THD2" => -33.84,
"PFL" => -19.81,
"RPI" => 4.47,
"D_LACt2" => -3.42,
"TALA" => -0.94,
"PPCK" => 10.65,
"ACt2r" => -3.41,
"NH4t" => -13.60,
"PGL" => -25.94,
"NADTRHD" => -0.01,
"PGK" => 19.57,
"LDH_D" => 20.04,
"ME1" => 12.08,
"PIt2r" => 10.41,
"ATPS4r" => -37.57,
"PYRt2" => -3.42,
"GLCpts" => -45.42,
"GLUDy" => 32.83,
"CYTBD" => -59.70,
"FUMt2_2" => -6.84,
"FRUpts2" => -42.67,
"GAPD" => 0.53,
"H2Ot" => 0.00,
"PPC" => -40.81,
"NADH16" => -80.37,
"PFK" => -18.54,
"MDH" => 25.91,
"PGI" => 2.63,
"O2t" => 0.00,
"ME2" => 12.09,
"GND" => 10.31,
"SUCCt2_2" => -6.82,
"GLUN" => -14.38,
"ETOHt2r" => -16.93,
"ADK1" => 0.38,
"ACALDt" => 0.00,
"SUCDi" => -73.61,
"ENO" => -3.81,
"MALS" => -39.22,
"GLUt2r" => -3.49,
"PPS" => -6.05,
"FUM" => -3.42,
);
# In general we cannot be certain that all fluxes will be positive for a given flux
# solution. This poses problems for systematically enforcing that ΔᵣG ≤ 0 for each reaction,
# because it implicitly assumes that all fluxes are positive, as done in the original
# formulation of MMDF. In `max_min_driving_force` we instead enforce ΔᵣG ⋅ vᵢ ≤ 0, where vᵢ
# is the flux of reaction i. By default all fluxes are assumed to be positive, but by
# supplying thermodynamically consistent flux solution it is possible to drop this implicit
# assumption and makes it easier to directly incorporate the max min driving force into
# non-customized models. Here, customized model means a model written such that a negative
# ΔᵣG is associated with each positive flux in the model, and only positive fluxes are used
# by the model.
flux_solution = flux_balance_analysis_dict( # find a thermodynamically consistent solution
model,
GLPK.Optimizer;
modifications = [add_loopless_constraints()],
)
# Run max min driving force analysis with some reasonable constraints. Protons
# and water are removed from the concentration calculation of the optimization
# problem, thus we specify their IDs explicitly. The reason for this is that
# the standard Gibbs free energy change of biochemical reactions take place at
# constant pH, so proton concentration should not change to make the analysis
# behave reasonably; likewise we just assume that reactions occur in relatively
# stable aqueous environments, hence water excluded too.
# Run max min driving force analysis with some reasonable constraints on metabolite
# concentration bounds. To remove protons and water from the concentration calculations, we
# explicitly specify their IDs. Note, protons and water need to be removed from the
# concentration calculation of the optimization problem, because the Gibbs free energies of
# biochemical reactions are measured at constant pH, so proton concentration is fixed, and
# reactions occur in aqueous environments, hence water concentration does not change.
df, dgs, concens = max_min_driving_force(
sol = max_min_driving_force(
model,
gibbs_free_energies,
reaction_standard_gibbs_free_energies,
Tulip.Optimizer;
ignore_metabolites = ["h", "h2o"],
concentration_ratios = Dict(("atp", "adp") => 10.0, ("nadh", "nad") => 0.1),
constant_concentrations = Dict("pi" => 1e-2), # constant phosphate concentration set to 10 mM
concentration_lb = 1e-6, # minimum 1 μM for all metabolites
concentration_ub = 1e-2, # maximum 10 mM or all metabolites
flux_solution = flux_solution,
proton_ids = ["h_c", "h_e"],
water_ids = ["h2o_c", "h2o_e"],
concentration_ratios = Dict(
("atp_c", "adp_c") => 10.0,
("nadh_c", "nad_c") => 0.13,
("nadph_c", "nadp_c") => 1.3,
),
concentration_lb = 1e-6, # M
concentration_ub = 100e-3, # M
ignore_reaction_ids = [
"H2Ot", # ignore water transporter
],
)
# Plot the results to show how the concentrations can be used to ensure that
sol.mmdf
#md # !!! note "Note: transporters"
#md # Transporters can be included in MMDF analysis, however water and proton
#md # transporters must be excluded explicitly in `ignore_reaction_ids`. Due to
#md # the way the method is implemented, the ΔᵣG for these transport reactions
#md # will always be 0. If not excluded, the MMDF will only have a zero solution (if
#md # these reactions are used in the flux solution).
# Next, we plot the results to show how the concentrations can be used to ensure that
# each reach proceeds "down hill" (ΔᵣG < 0) and that the driving force is as
# large as possible across all the reactions in the model. Compare this to the
# driving forces at standard conditions.
# driving forces at standard conditions. Note, we only plot glycolysis for simplicity.
# We additionally scale the fluxes according to their stoichiometry in the
# pathway. From the output, it is clear that that metabolite concentrations
# play a large role in ensuring the thermodynamic consistency of in vivo enzyme
# reactions.
relative_flux = Dict(
"HEX" => 1.0,
"PGI" => 1.0,
"PFK" => 1.0,
"FBA" => 1.0,
"TPI" => 1.0,
"GAPD" => 2.0,
"PGK" => 2.0,
"PGM" => 2.0,
"ENO" => 2.0,
"PYK" => 2.0,
"LDH" => 2.0,
)
rids = ["HEX", "PGI", "PFK", "FBA", "TPI", "GAPD", "PGK", "PGM", "ENO", "PYK", "LDH"] # in order of pathway
rid_rf = [relative_flux[rid] for rid in rids]
dg_standard = [gibbs_free_energies[rid] for rid in rids]
dg_opt = [dgs[rid] for rid in rids]
# pathway. From the output, we can clearly see that that metabolite concentrations
# play a large role in ensuring the thermodynamic consistency of in vivo reactions.
rids = ["GLCpts", "PGI", "PFK", "FBA", "TPI", "GAPD", "PGK", "PGM", "ENO", "PYK"] # glycolysis
rid_rf = [flux_solution[rid] for rid in rids]
dg_standard = cumsum([
reaction_standard_gibbs_free_energies[rid] * flux_solution[rid] for rid in rids
])
dg_standard .-= first(dg_standard)
dg_opt = cumsum([sol.dg_reactions[rid] * flux_solution[rid] for rid in rids])
dg_opt .-= first(dg_opt)
using CairoMakie
......@@ -146,29 +179,13 @@ ax = Axis(
ylabel = "Cumulative ΔG [kJ/mol]",
);
lines!(
ax,
1:length(rids),
(cumsum(dg_standard) .- first(dg_standard)) .* rid_rf;
color = :red,
label = "Standard",
)
lines!(
ax,
1:length(rids),
(cumsum(dg_opt) .- first(dg_opt)) .* rid_rf;
color = :blue,
label = "Optimized",
)
lines!(ax, 1:length(rids), dg_standard; color = :red, label = "Standard")
lines!(ax, 1:length(rids), dg_opt, color = :blue, label = "Optimized")
ax.xticks = (1:length(rids), rids)
fig[1, 2] = Legend(fig, ax, "ΔG'", framevisible = false)
fig
#md # !!! tip "Directions of reactions"
#md # Be careful when constructing models for MMDFA, the reaction directions in the model
#md # and thermodynamic data need to be consistent with the overall flux
#md # direction implied by the model. For example, in BiGG, `LDH_D` is written
#md # `lac__D + nad ⟷ h + nadh + pyr` and the associated ΔrG'⁰ is 23.6803 kJ/mol.
#md # For MMDFA no flux is calculated, so you need to write the reaction
#md # in the direction of flux, i.e. `h + nadh + pyr ⟶ lac__D + nad` with ΔrG'⁰ as
#md # -23.6803 kJ/mol.
#md # !!! tip "Tip: Thermodynamic variability"
#md # Much like flux variability, thermodynamic constraints in a model are also
#md # degenerate. Check out [`max_min_driving_force_variability`](@ref) for ways
#md # to explore the thermodynamic solution space.
module COBREXA
using Dates
using Distributed
using DistributedData
using JSON
......@@ -19,11 +18,9 @@ import Base: findfirst, getindex, show
import Pkg
import SBML # conflict with Reaction struct name
include("banner.jl")
function __init__()
_print_banner()
end
include("banner.jl")
_print_banner()
# autoloading
const _inc(path...) = include(joinpath(path...))
......
"""
max_min_driving_force(
model::MetabolicModel,
gibbs_free_energies::Dict{String,Float64},
reaction_standard_gibbs_free_energies::Dict{String,Float64},
optimizer;
ignore_metabolites::Vector{String} = ["h", "h2o"],
flux_solution::Dict{String,Float64} = Dict{String,Float64}(),
proton_ids::Vector{String} = ["h_c", "h_e"],
water_ids::Vector{String} = ["h2o_c", "h2o_e"],
constant_concentrations::Dict{String,Float64} = Dict{String,Float64}(),
concentration_ratios::Dict{Tuple{String,String},Float64} = Dict{
Tuple{String,String},
Float64,
}(),
concentration_lb = 1e-6,
concentration_ub = 10e-3,
concentration_lb = 1e-9,
concentration_ub = 100e-3,
T = _constants.T,
R = _constants.R,
small_flux_tol = 1e-6,
modifications = [],
ignore_reaction_ids = [],
)
Perform a max-min driving force analysis on the `model`, as defined by Noor, et al.,
"Pathway thermodynamics highlights kinetic obstacles in central metabolism.", PLoS
computational biology, 2014.
The analysis uses the supplied `optimizer` and Gibbs free energies of the
reactions (in `gibbs_free_energies`) to find the max-min driving force, Gibbs
free energy of the reactions and the concentrations of metabolites that
optimize the following problem:
The function uses the supplied `optimizer` and `reaction_standard_gibbs_free_energies`.
Optionally, `flux_solution` can be used to set the directions of each reaction in `model`
(all reactions are assumed to proceed forward and are active by default). The supplied
`flux_solution` should be free of internal cycles i.e. thermodynamically consistent. This
optional input is important if a reaction in `model` normally runs in reverse (negative
flux). Note, reactions in `flux_solution` that are smaller than `small_flux_tol` are also
ignored in the analysis function (for numerical stability).
The max-min driving force algorithm returns the Gibbs free energy of the reactions, the
concentrations of metabolites and the actual maximum minimum driving force. The optimization
problem solved is:
```
max min -ΔᵣG
s.t. ΔᵣG = ΔG⁰ + R T S' ln(C)
ΔᵣG ≤ 0 (∀r)
s.t. ΔᵣG = ΔrG⁰ + R T S' ln(C)
ΔᵣG ≤ 0
ln(Cₗ) ≤ ln(C) ≤ ln(Cᵤ)
```
where `ΔᵣG` are the Gibbs energies dissipated by the reactions, `ΔᵣG⁰` are the
Gibbs free energies of the reactions, R is the gas constant, T is the
temperature, S is the stoichiometry of the model, and C is the vector of
metabolite concentrations (and their respective lower and upper bounds).
where `ΔrG` are the Gibbs energies dissipated by the reactions, R is the gas constant, T is
the temperature, S is the stoichiometry of the model, and C is the vector of metabolite
concentrations (and their respective lower and upper bounds).
In case no feasible solution exists, `nothing` is returned.
Metabolites specified in `ignore_metabolites` are internally ignored -- that
allows to specify e.g. removal of protons and water, thus allowing the
thermodynamic calculations to assume constant pH and aqueous conditions. Note, if using
biochemical thermodynamic data then you _must_ include the ids of protons and water here.
Reactions specified in `ignore_reaction_ids` are internally ignored when calculating the
max-min driving force. This should include water and proton importers.
Since biochemical thermodynamics are assumed, the `proton_ids` and `water_ids` need to be
specified so that they can be ignored in the calculations. Effectively this assumes an
aqueous environment at constant pH is used.
`constant_concentrations` is used to fix the concentrations of certain metabolites (such as
CO₂). `concentration_ratios` is used to specify additional constraints on metabolite pair
......@@ -55,56 +67,81 @@ optimization problems.
"""
function max_min_driving_force(
model::MetabolicModel,
gibbs_free_energies::Dict{String,Float64},
reaction_standard_gibbs_free_energies::Dict{String,Float64},
optimizer;
ignore_metabolites::Vector{String} = ["h", "h2o"],
flux_solution::Dict{String,Float64} = Dict{String,Float64}(),
proton_ids::Vector{String} = ["h_c", "h_e"],
water_ids::Vector{String} = ["h2o_c", "h2o_e"],
constant_concentrations::Dict{String,Float64} = Dict{String,Float64}(),
concentration_ratios::Dict{Tuple{String,String},Float64} = Dict{
Tuple{String,String},
Float64,
}(),
concentration_lb = 1e-6,
concentration_ub = 10e-3,
concentration_lb = 1e-9,
concentration_ub = 100e-3,
T = _constants.T,
R = _constants.R,
small_flux_tol = 1e-6,
modifications = [],
ignore_reaction_ids = [],
)
# get ΔGs in order of reactions
dg0s = [gibbs_free_energies[rid] for rid in reactions(model)]
# get all metabolites that participate in the ΔG calculations (no protons, no water)
midxs = [
midx for
(midx, mid) in enumerate(metabolites(model)) if !(mid in ignore_metabolites)
] # idx in original model
mids = metabolites(model)[midxs]
# find the corresponding metabolites
S = stoichiometry(model)[midxs, :]
# start building the optimization model
opt_model = Model(optimizer)
@variables opt_model begin
minDF
logcs[1:length(midxs)]
dgs[1:n_reactions(model)]
mmdf
logcs[1:n_metabolites(model)]
dgrs[1:n_reactions(model)]
end
# set proton log concentration to zero so that it won't impact any calculations (biothermodynamics assumption)
proton_idxs = Int.(indexin(proton_ids, metabolites(model)))
for idx in proton_idxs
JuMP.fix(logcs[idx], 0.0)
end
@constraints opt_model begin
minDF .<= -dgs
dgs .<= 0
dgs .== dg0s .+ R * T .* S' * logcs
# set water concentrations to zero (aqueous condition assumptions)
water_idxs = Int.(indexin(water_ids, metabolites(model)))
for idx in water_idxs
JuMP.fix(logcs[idx], 0.0)
end
@objective(opt_model, Max, minDF)