Commit c60830fd authored by St. Elmo's avatar St. Elmo
Browse files

reorganize structure

parent 021302ea
......@@ -15,7 +15,6 @@ Gurobi = "2e9cd046-0924-5485-92f1-d5272153d98b"
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
HypothesisTests = "09f84164-cd44-5f33-b23f-e6b0d136a0d5"
Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9"
JLD = "4138dd39-2aa7-5051-a626-17a0bb65d9c8"
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d"
......
using CobraTools
using Measurements
using Statistics
using PyCall
using StringDistances
using Plots
gr()
modelpath = joinpath("models", "iML1515.json")
model = CobraTools.read_model(modelpath)
gibbs = CobraTools.map_gibbs_rxns(model.rxns)
brenda = CobraTools.get_ec_turnover(joinpath("data", "brenda_download.brenda.txt"))
kcats = Dict{String, Union{Float64, Measurement{Float64}}}()
for rxnid in keys(gibbs)
# rxnid = "GHBDHx"
metnames = String[]
rxn = findfirst(model.rxns, rxnid)
ec = get(rxn.annotation, "ec-code", [""])[1]
metnames = [m.name for m in keys(rxn.metabolites)]
brs = get(brenda, ec, [])
ks = Float64[]
for br in brs
scomps = [compare(lowercase(metname), lowercase(br[2]), Levenshtein()) for metname in metnames]
if maximum(scomps) > 0.65 # could probably make this stricter
push!(ks, br[1])
# ind = argmax(scomps)
# println(br[2], " <-> ", metnames[ind], " = ", maximum(scomps))
end
end
if !isempty(ks)
if length(ks) == 1
kcats[rxnid] = ks[1]
else
kcats[rxnid] = mean(ks) ± std(ks)
end
end
end
dkeys = intersect(keys(gibbs), keys(kcats))
plot(xlabel="|ΔG'⁰|", ylabel="Turnover [s⁻¹]")
pvs = []
for dk in dkeys
if gibbs[dk] == 0.0 # skip default values
continue
end
ΔG = log10(abs(gibbs[dk]))
kcat = log10(kcats[dk])
push!(pvs, [ΔG, kcat])
if typeof(kcat) == Float64
scatter!([ΔG], [kcat], color="blue")
else
if abs(ΔG.err) < 5.0 && abs(kcat.err) < 5.0
scatter!([ΔG], [kcat], color="blue")
end
end
end
plot!(legend=false)
plot!(xlim=(-2, 3))
\ No newline at end of file
using CobraTools
using Plots
gr()
# iML1515.json = E. coli K12
# iJN678.json = Synechocystis sp. PCC 6803
# iJN746.json = Pseudomonas putida KT2440
# iRC1080.json = Chlamydomonas reinhardtii
# iYO844.json = Bacillus subtilis subsp. subtilis str. 168
# iMM904.json = S. cerevisiae S288C
# iLJ478.json = Thermotoga maritima MSB8
models = ["iML1515.json", "iJN678.json", "iJN746.json", "iRC1080.json", "iYO844.json", "iMM904.json", "iLJ478.json"]
mnames = Dict("iML1515" => "E. coli K12",
"iJN678" => "Synechocystis\nsp. PCC6803",
"iJN746" => "P. putida",
"iRC1080" => "C. reinhardtii",
"iYO844" => "B. subtilis",
"iMM904" => "S. cerevisiae",
"iLJ478" => "T. maritima")
md = Dict{String, Float64}()
for modelname in models
modelpath = joinpath("models", modelname)
model = CobraTools.read_model(modelpath)
gibbs = CobraTools.map_gibbs_rxns(model.rxns)
nkeys = length([k for k in keys(gibbs) if !startswith(k, "EX_") && gibbs[k] != 0.0])
nrxns = length(model.rxns)
md[modelname[1:end-5]] = nkeys/nrxns
end
mdks = keys(md)
vs = [md[k] for k in mdks]
xnms = [k*"\n"*mnames[k] for k in mdks]
bar(vs)
plot!(xticks=(1:7,xnms), legend=false, ylabel="Fraction of reactions with ΔG⁰", xrotation=60)
# CobraTools.jl
This is package aims to provide basic constraint based reconstruction analysis (COBRA) tools in the Julia environment.
This is package aims to provide constraint based reconstruction and analysis (COBRA) tools in the Julia environment.
This package provides basic convenience functions, e.g. FBA, pFBA, sampling, model construction, etc.
More importantly, it also exposes the user to the core machinary used in this type of analysis, e.g. the stoichiometric matrix, so that custom optimization routines can be written as painlessly as possible (due in large part to JuMP).
## Installation
......@@ -29,6 +31,7 @@ To install this package: `] add CobraTools`.
- [ ] Double gene knockout
- [ ] Model construction tools
- [x] Gibbs integration
- [x] Brenda integration (basic)
- [ ] Distributed analysis (COBRA.jl integration?)
## Usage
......
......@@ -17,15 +17,20 @@ using Tulip
using GLPK
using Ipopt
include("cobra_base.jl")
include("parse_models.jl")
include("rxn_tools.jl")
include("met_tools.jl")
include("rxn_tools.jl")
include("gene_tools.jl")
include("io_tools.jl")
include("model_tools.jl")
include("construction_tools.jl")
include("basic_analysis.jl")
include("gibbs_tools.jl")
include("sampling.jl")
include("brenda_tools.jl")
include("sampling.jl")
# export
= Metabolite("∅") # for exchange reactions
export , , , , , ,
export Reaction, Metabolite, Gene
......
"""
ReactionFluxes holds references to the objective, reactions and fluxes from analysis (e.g. FBA)
"""
mutable struct ReactionFluxes
objective_id :: String
objective :: Float64
rxns :: Array{Reaction, 1}
fluxes :: Array{Float64, 1}
end
"""
getindex(reactionfluxes, rxn)
Return the index of rxn in reactionfluxes and -1 if it is not found.
Note, this is slightly different from the normal getindex function.
"""
function Base.getindex(rfs::ReactionFluxes, rxn::Reaction)
for i in eachindex(rfs.rxns)
if rxn.id == rfs.rxns[i].id
return i
end
end
return -1
end
"""
Pretty printing of ReactionFluxes objects.
"""
function Base.show(io::IO, rfs::ReactionFluxes)
println(io, "Optimum for $(rfs.objective_id) = ", round(rfs.objective, digits=4))
println()
inds = sortperm(rfs.fluxes) # consuming fluxes
println("Consuming fluxes:")
counter = 0
for i in inds
if startswith(rfs.rxns[i].id, "EX_")
println(io, rfs.rxns[i].name, " = ", round(rfs.fluxes[i], digits=4), " mmol/gDW/h")
counter += 1
end
if counter > 8 # only display top 10
break
end
end
println()
inds = sortperm(rfs.fluxes, rev=true) # consuming fluxes
println("Producing fluxes:")
counter = 0
for i in inds
if startswith(rfs.rxns[i].id, "EX_")
println(io, rfs.rxns[i].name, " = ", round(rfs.fluxes[i], digits=4), " mmol/gDW/h")
counter += 1
end
if counter > 8 # only display top 10
break
end
end
end
"""
cbmodel = CBM(model::Model)
cbmodel, v, mb, ubs, lbs = CBM(model::Model)
Initialize a constraint based model. Creates a model that satisfies the mass balance
and flux constraints but no objective or optimizer is set. Returns the JuMP model.
This is useful if you want to write your own optimization problem.
cbmodel is the JuMP model. v are the fluxes, mb is S*v == 0, and lbs <= v <= ubs.
"""
function CBM(model::Model)
S, b, ubs, lbs = get_core_model(model) # Construct S, b, lbs, ubs from model
......@@ -76,10 +19,10 @@ function CBM(model::Model)
end
"""
reactionfluxes = fba(model::Model, objective_rxns; weights = Float64[], optimizer="gubori")
reactionfluxes = fba(model::Model, objective_rxns::Union{Reaction, Array{Reaction, 1}}; weights = Float64[], optimizer="gubori")
Run flux balance analysis on the model using objective_rxn(s) and optionally specifying their weights.
Optimiser can also be set here. Uses the constraints implied by the model object. Objective is set separately.
Run flux balance analysis (FBA) on the model using objective_rxn(s) and optionally specifying their weights (empty weights mean equal weighting per reaction).
Optimiser can also be set here. Uses the constraints implied by the model object.
"""
function fba(model::Model, objective_rxns::Union{Reaction, Array{Reaction, 1}}; weights=Float64[], optimizer="gurobi")
cbm, _, _, _, _ = CBM(model) # get the base constraint based model
......@@ -130,11 +73,9 @@ function fba(model::Model, objective_rxns::Union{Reaction, Array{Reaction, 1}};
status = termination_status(cbm) == MOI.OPTIMAL
if status
objective_id = length(objective_indices) == 1 ? model.rxns[objective_indices[1]].id : "multiple reactions"
fluxes = [value(v[i]) for i in eachindex(model.rxns)]
return ReactionFluxes(objective_id, objective_value(cbm), model.rxns, fluxes)
return map_fluxes(v, model.rxns)
else
return ReactionFluxes("Optimization issues", 0.0, Reaction[], Float64[])
return Dict{String, Float64}()
end
end
......@@ -220,33 +161,18 @@ function pfba(model::Model, objective_rxns::Union{Reaction, Array{Reaction, 1}};
pfba_status = termination_status(cbm) == MOI.OPTIMAL
if fba_status && pfba_status
fluxes = [value(v[i]) for i in eachindex(model.rxns)]
return ReactionFluxes("Σᵢ||vᵢ||", objective_value(cbm), model.rxns, fluxes)
return map_fluxes(v, model.rxns)
else
return ReactionFluxes("Optimization issues", 0.0, Reaction[], Float64[])
@warn "Optimization issues occurred."
return Dict{String, Float64}() # something went wrong
end
end
"""
atom_balance_dict = atom_exchange(reactionfluxes)
atom_balance_dict = atom_exchange(fluxdict::Dict{String, Float64}, model::Model)
Return the composition of atoms consumed or produced by the model according to reactionfluxes.
Return the composition of atoms consumed or produced by the model according to fluxdict.
"""
function atom_exchange(rfs::ReactionFluxes)
# find exchange reactions
ex_inds = [i for i in eachindex(rfs.rxns) if startswith(rfs.rxns[i].id, "EX_")]
atom_balance = Dict{String, Float64}()
for ex_ind in ex_inds
for (met, w) in rfs.rxns[ex_ind].metabolites
for (atom, stoich) in get_atoms(met)
atom_balance[atom] = get(atom_balance, atom, 0.0) + stoich*w*value(rfs.fluxes[ex_ind])
end
end
end
return atom_balance
end
function atom_exchange(fluxdict::Dict{String, Float64}, model::Model)
exrxns = [k for k in keys(fluxdict) if startswith(k, "EX_")] # get exchange reactions
......@@ -265,7 +191,7 @@ end
"""
map_fluxes(v, model::Model)
Map fluxes from an optimization problem (v) to rxns in a model.
Map fluxes from an optimization problem (v) to rxns in a model. v can be a JuMP object (fluxes) or an array of Float64 fluxes.
Assumes they are in order, which they should be since they are constructed from model.
"""
function map_fluxes(v::Array{VariableRef,1}, model::Model)
......@@ -288,7 +214,8 @@ end
setbound(index, ubconstaintref, lbconstaintref; ub=1000, lb=-1000)
Helper function to set the bounds of variables.
The JuMP set_normalized_rhs function is a little confusing...
The JuMP set_normalized_rhs function is a little confusing, so this function simplifies setting
constraints. Just supply the constraint index and the bound objects and they will be set appropriately.
"""
function set_bound(vind, ubs, lbs; ub=1000, lb=-1000)
if lb <= 0
......@@ -300,9 +227,9 @@ function set_bound(vind, ubs, lbs; ub=1000, lb=-1000)
end
"""
get_exchanges(rxndict::Dict{String, Float64})
get_exchanges(rxndict::Dict{String, Float64}; topN=8, ignorebound=1000)
Display the top producing and consuming exchange fluxes. Ignores infinite (problem upper/lower bound) fluxes.
Display the topN producing and consuming exchange fluxes. Ignores infinite (problem upper/lower bound) fluxes (set with ignorebound).
"""
function get_exchanges(rxndict::Dict{String, Float64}; topN=8, ignorebound=1000)
fluxes = Float64[]
......
......@@ -72,7 +72,7 @@ end
"""
brenda_data = parse_brenda(brenda_txt_file_path)
Parse the Brenda txt file.
Parse the Brenda txt file line by line.
Download it from https://www.brenda-enzymes.org/download_brenda_without_registration.php.
"""
function parse_brenda(brenda_loc)
......
# Structs and printing
"""
Metabolite struct (mutable)
"""
mutable struct Metabolite
id :: String
name :: String
formula :: String
charge :: Int64
compartment :: String
notes :: Dict{String, Array{String, 1}}
annotation :: Dict{String, Union{Array{String, 1}, String}}
end
"""
metabolite = Metabolite()
Empty metabolite constructor.
"""
function Metabolite()
id = ""
name = ""
formula = ""
charge = 0
compartment = ""
notes = Dict{String, Array{String, 1}}()
annotation = Dict{String, Union{Array{String, 1}, String}}()
Metabolite(id, name, formula, charge, compartment, notes, annotation)
end
"""
Metabolite(id::String)
Assigns only the id field to a metabolite struct.
"""
function Metabolite(id::String)
name = ""
formula = ""
charge = 0
compartment = ""
notes = Dict{String, Array{String, 1}}()
annotation = Dict{String, Union{Array{String, 1}, String}}()
Metabolite(id, name, formula, charge, compartment, notes, annotation)
end
"""
metabolite = Metabolite(field_dict::Dict{String, Any})
Assign a metabolite using fields contained in d.
"""
function Metabolite(d::Dict{String, Any})
id = ""
name = ""
formula = ""
charge = 0
compartment = ""
notes = Dict{String, Array{String, 1}}()
annotation = Dict{String, Union{Array{String, 1}, String}}()
for (k, v) in d
if k == "id"
id = v
elseif k == "name"
name = v
elseif k == "formula"
formula = v
elseif k == "charge"
charge = v
elseif k == "compartment"
compartment = v
elseif k == "notes"
notes = Dict{String, Array{String, 1}}(kk=>vv for (kk, vv) in v)
elseif k == "annotation"
annotation = Dict{String, Union{Array{String, 1}, String}}()
for (kk, vv) in v
if typeof(vv) == String
annotation[kk] = vv
else
annotation[kk] = convert(Array{String, 1}, vv)
end
end
else
@warn "Unrecognized reaction field: $k"
end
end
Metabolite(id, name, formula, charge, compartment, notes, annotation)
end
"""
Reaction struct (mutable)
"""
mutable struct Reaction
id :: String
name :: String
metabolites :: Dict{Metabolite, Float64}
lb :: Float64
ub :: Float64
grr :: String
subsystem :: String
notes :: Dict{String, Array{String, 1}}
annotation :: Dict{String, Union{Array{String, 1}, String}} # SBO is a single string term
objective_coefficient :: Float64
end
"""
reaction = Reaction()
Empty reaction constructor.
"""
function Reaction()
id = ""
name = ""
metabolites = Dict{Metabolite, Float64}()
lb = -1000.0
ub = 1000.0
grr = ""
subsystem = ""
notes = Dict{String, Array{String, 1}}()
annotation = Dict{String, Union{Array{String, 1}, String}}()
objective_coefficient = 0.0
Reaction(id, name, metabolites, lb, ub, grr, subsystem, notes, annotation, objective_coefficient)
end
"""
reaction = Reaction(metabolite_dict::Dict{Metabolite, Float64}, dir::String)
Assign metabolites and their associated stoichiometries from metabolite_dict to a reaction struct.
Directionality is specified using "for" (forward), "rev" (reverse), or "" for bidirectional reactions.
All other fields are left unassigned.
"""
function Reaction(metabolites::Dict{Metabolite, Float64}, dir::String)
id = ""
name = ""
if dir == "for"
lb = 0.0
ub = 1000.0
elseif dir == "rev"
lb = -1000.0
ub = 0.0
else
lb = -1000.0
ub = 1000.0
end
grr = ""
subsystem = ""
notes = Dict{String, Array{String, 1}}()
annotation = Dict{String, Union{Array{String, 1}, String}}()
objective_coefficient = 0.0
Reaction(id, name, metabolites, lb, ub, grr, subsystem, notes, annotation, objective_coefficient)
end
"""
reaction = Reaction(rxn_dict :: Dict{String, Any}, mets::Array{Metabolite, 1})
Assign a reaction struct using rxn_dict and also check that metabolites in this struct exist in the model.
If not a warning is issued and that metabolite is not added to the reaction.
"""
function Reaction(d :: Dict{String, Any}, mets::Array{Metabolite, 1})
id = ""
name = ""
metabolites = Dict{Metabolite, Float64}()
lb = -1000.0
ub = 1000.0
grr = ""
subsystem = ""
notes = Dict{String, Array{String, 1}}()
annotation = Dict{String, Union{Array{String, 1}, String}}()
objective_coefficient = 0.0
for (k, v) in d
if k == "id"
id = v
elseif k == "name"
name = v
elseif k == "metabolites"
metabolites = Dict{Metabolite, Float64}()
for (kk, vv) in v
ind = findfirst(x->x.id == kk, mets)
isnothing(ind) ? (@warn "Metabolite $kk not found in reaction assignment."; continue) : nothing
metabolites[mets[ind]] = vv
end
elseif k == "lower_bound"
lb = v
elseif k == "upper_bound"
ub = v
elseif k == "gene_reaction_rule"
grr = v
elseif k == "subsystem"
subsystem = v
elseif k == "notes"
notes = Dict{String, Array{String, 1}}(kk=>vv for (kk, vv) in v)
elseif k == "annotation"
annotation = Dict{String, Union{Array{String, 1}, String}}()
for (kk, vv) in v
if typeof(vv) == String
annotation[kk] = vv
else
annotation[kk] = convert(Array{String, 1}, vv)
end
end
elseif k == "objective_coefficient"
objective_coefficient = v
else
@warn "Unrecognized reaction field: $k"
end
end
Reaction(id, name, metabolites, lb, ub, grr, subsystem, notes, annotation, objective_coefficient)
end
"""
Gene struct (mutable)
"""
mutable struct Gene
id :: String
name :: String
notes :: Dict{String, Array{String, 1}}
annotation :: Dict{String, Union{Array{String, 1}, String}}
end
"""
gene = Gene()
Empty gene constructor.
"""
function Gene()
id = ""
name = ""
notes = Dict{String, Array{String, 1}}()
annotation = Dict{String, Union{Array{String, 1}, String}}()
Gene(id, name, notes, annotation)
end
"""
gene = Gene(gene_dict :: Dict{String, Any},)
Assign a gene based on the fields contained in gene_dict.
"""
function Gene(d)
id = ""
name = ""
notes = Dict{String, Array{String, 1}}()
annotation = Dict{String, Union{Array{String, 1}, String}}()
for (k, v) in d
if k == "id"
id = v
elseif k == "name"
name = v
elseif k == "notes"
notes = Dict{String, Array{String, 1}}(kk=>vv for (kk, vv) in v)
elseif k == "annotation"
annotation = Dict{String, Union{Array{String, 1}, String}}()
for (kk, vv) in v
if typeof(vv) == String
annotation[kk] = vv
else
annotation[kk] = convert(Array{String, 1}, vv)
end
end
else
@warn "Unrecognized reaction field: $k"
end
end
Gene(id, name, notes, annotation)
end
"""
Model struct of a constraint based metabolic model.
"""
struct Model
id :: String # model name
rxns :: Array{Reaction, 1} # reaction metadata
mets :: Array{Metabolite, 1} # metabolite metadata
genes :: Array{Gene, 1} # gene metadata
grrs :: Dict{String, Array{Array{String, 1}, 1}} # reaction -> [[gene & gene...] or [gene & gene...] or [gene & gene...]]
end
"""
Model()
Empty model constructor.
"""
function Model()
Model("blank", Array{Reaction, 1}(), Array{Metabolite, 1}(), Array{Gene, 1}(), Dict{String, Array{Array{String, 1}, 1}}())
end