Unverified Commit d4509bae authored by Miroslav Kratochvil's avatar Miroslav Kratochvil Committed by GitHub
Browse files

Merge pull request #270 from LCSB-BioCore/mo-community-model-join-function

 Basic community model construction functions
parents 1011dcb5 d5afd324
Pipeline #42910 passed with stages
in 20 minutes and 22 seconds
......@@ -271,12 +271,62 @@ function verify_consistency(
end
"""
remove_reactions(m::CoreModel, rxns::Vector{Int})
remove_metabolites(model::CoreModel, metabolites)
Removes a set of `metabolites` from the `model` of type `CoreModel` and returns
a new `CoreModel` without those metabolites. Here, `metabolites` can be either a
string, a vector of strings, an index or a vector of indices. Also removes any
reactions that have no associated metabolites after the metabolites have been
removed.
# Example
```
model = load_model(CoreModel, "e_coli_core.json")
m1 = remove_metabolites(model, ["glc__D_e", "for_c"])
m2 = remove_metabolites(model, "glc__D_e")
m3 = remove_metabolites(model, indexin(["glc__D_e", "for_c"], metabolites(model)))
m4 = remove_metabolites(model, first(indexin(["glc__D_e"], metabolites(model))))
```
"""
function remove_metabolites(model::CoreModel, mets)
mets_to_keep = filter(x -> x mets, 1:n_metabolites(model))
temp_S = model.S[mets_to_keep, :]
(I, rxns_to_keep, val) = findnz(temp_S)
sort!(rxns_to_keep)
unique!(rxns_to_keep)
new_S = model.S[mets_to_keep, rxns_to_keep]
new_b = model.b[mets_to_keep]
new_c = model.c[rxns_to_keep]
new_lbs = model.xl[rxns_to_keep]
new_ubs = model.xu[rxns_to_keep]
new_rxns = model.rxns[rxns_to_keep]
new_mets = model.mets[mets_to_keep]
return CoreModel(new_S, new_b, new_c, new_lbs, new_ubs, new_rxns, new_mets)
end
Remove reaction(s) from a `CoreModel`.
function remove_metabolites(model::CoreModel, met::Integer)
return remove_metabolites(model, [met])
end
Also removes any metabolites not involved in any reaction after the deletion.
function remove_metabolites(model::CoreModel, met::String)
return remove_metabolites(model, [met])
end
function remove_metabolites(model::CoreModel, mets::Vector{String})
met_indices = filter(!isnothing, indexin(mets, metabolites(model)))
if isempty(met_indices)
return model
else
return remove_metabolites(model, met_indices)
end
end
"""
Removes a set of reactions from a CoreModel.
Also removes the metabolites not involved in any reaction.
"""
function remove_reactions(m::CoreModel, rxns::Vector{Int})
rxns_to_keep = filter(e -> e rxns, 1:n_reactions(m))
......
"""
add_objective!(
community::CoreModel,
objective_mets::Vector{String};
objective_weights = Float64[],
objective_column_index = 0,
)
Add an objective to the `community` model. Supply the string names of the
objective metabolites in `objective_mets`. Optionally specify the weight to
assign each metabolite in the objective function, if unassigned then equal
weight is assumed. Also, optionally specify whether the objective already exists
in the model by assigning `objective_column_index`. If unassigned then an
objective column will be added, otherwise the column at `objective_column_index`
will be updated.
Note, the weights are negated inside the function so that the objective metabolites
are seen as reagents/substrates, not products in the reaction equation.
# Example
```
add_objective!(model, ["met1", "met2"]) # adds a new column with weights = [1,1]
add_objective!(model, ["met1", "met2"]; objective_weights=[0.1, 0.9]) # adds a new column
add_objective!(model, ["met1", "met2"]; objective_weights=[0.1, 0.9], objective_column_index=10) # updates column 10
```
"""
function add_objective!(
community::CoreModel,
objective_mets::Vector{String};
objective_weights = Float64[],
objective_column_index = 0,
)
obj_inds = indexin(objective_mets, metabolites(community))
if isempty(objective_weights)
objective_weights = repeat([1.0], inner = length(objective_mets))
end
if objective_column_index == 0 # needs to be created
nr, _ = size(community.S)
objcol = spzeros(nr)
objcol[obj_inds] .= -objective_weights
# extend model by one reaction
community.S = hcat(community.S, objcol)
community.xl = [community.xl; 0.0]
community.xu = [community.xu; 1000.0]
community.rxns = [community.rxns; "community_biomass"]
community.c = spzeros(size(community.S, 2))
community.c[end] = 1.0
else # only update
community.S[:, objective_column_index] .= 0.0 # reset
community.S[obj_inds, objective_column_index] .= -objective_weights
dropzeros!(community.S)
community.c = spzeros(size(community.S, 2))
community.c[objective_column_index] = 1.0
community.xl[objective_column_index] = 0.0
community.xu[objective_column_index] = 1000.0
end
end
"""
add_model_with_exchanges(
community::CoreModel,
model::MetabolicModel,
exchange_rxn_ids::Vector{String},
exchange_met_ids::Vector{String};
model_name = "unknown_species",
biomass_id = nothing,
)::CoreModel
Add `model` to `community`, which is a pre-existing community model with
`exchange_rxn_ids` and `exchange_met_ids`. The `model_name` is appended to
each reaction and metabolite, see [`join`](@ref). If `biomass_id` is specified
then a biomass metabolite for `model` is also added to the resulting model. The
column corresponding to the `biomass_id` reaction then produces this new biomass
metabolite with unit coefficient. Note, `exchange_rxn_ids` and
`exchange_met_ids` must already exist in the `community` model.
# Example
```
community = add_model_with_exchanges(community, model, exchange_rxn_ids, exchange_met_ids; model_name="species_2", biomass_id="BIOMASS_Ecoli_core_w_GAM")
```
"""
function add_model_with_exchanges(
community::CoreModel,
model::MetabolicModel,
exchange_rxn_ids::Vector{String},
exchange_met_ids::Vector{String};
model_name = "unknown_species",
biomass_id = nothing,
)::CoreModel
exchange_met_community_inds = indexin(exchange_met_ids, metabolites(community))
exchange_rxn_community_inds = indexin(exchange_rxn_ids, reactions(community))
if any(isnothing.(exchange_met_community_inds)) ||
any(isnothing.(exchange_rxn_community_inds))
throw(
DomainError(
"exchange metabolite/reaction not found.",
"Exchange metabolites/reactions must already be contained in the community model.",
),
)
end
n_cmodel_rows, n_cmodel_cols = size(stoichiometry(community))
n_model_rows, n_model_cols = size(stoichiometry(model))
# A note on the variable names here.Suppose M is some sparse matrix, then I
# = row indices, J = column indices and V = values at the associated
# indices. So I[a] = i, J[a]=j and then M[i,j] = V[a]
Iadd, Jadd, Vadd = findnz(stoichiometry(model))
# shift to fit into community
Iadd .+= n_cmodel_rows
Jadd .+= n_cmodel_cols
# when adding a single model not that many reactions, push! okay?
exchange_rxn_model_idxs = indexin(exchange_rxn_ids, reactions(model))
for i = 1:length(exchange_rxn_ids)
isnothing(exchange_rxn_model_idxs[i]) && continue
push!(Iadd, exchange_met_community_inds[i]) # already present ex met in community model
push!(Jadd, n_cmodel_cols + exchange_rxn_model_idxs[i]) # new column hence the offset
push!(Vadd, 1.0)
end
biomass_met = 0.0
if biomass_id != "" # add biomass metabolite
biomass_rxn = first(indexin([biomass_id], reactions(model)))
push!(Iadd, n_model_rows + n_cmodel_rows + 1)
push!(Jadd, biomass_rxn + n_cmodel_cols)
push!(Vadd, 1.0)
biomass_met = 1
end
n_metabolites_total = n_model_rows + n_cmodel_rows + biomass_met
n_reactions_total = n_cmodel_cols + n_model_cols
I, J, V = findnz(stoichiometry(community))
I = [I; Iadd]
J = [J; Jadd]
V = [V; Vadd]
S = sparse(I, J, V, n_metabolites_total, n_reactions_total)
# A note on the variables here. The bounds are vectors of upper and lower
# bounds for each reaction. So lbs = [lb_1, lb_2, lb_i, ...], ubs = [ub_1,
# ub_2, ub_i, ...] for reaction i. See the bounds function for more
# information
lbsadd, ubsadd = bounds(model)
lbs, ubs = bounds(community)
lbs = [lbs; lbsadd]
ubs = [ubs; ubsadd]
rxnsadd = "$(model_name)_" .* reactions(model)
if !isnothing(biomass_id)
metsadd =
["$(model_name)_" .* metabolites(model); "$(model_name)_" * biomass_id]
else
metsadd = "$(model_name)_" .* metabolites(model)
end
rxns = [reactions(community); rxnsadd]
mets = [metabolites(community); metsadd]
# adds to the original community data, could possibly reset?
I, V = findnz(balance(community))
b = sparsevec(I, V, n_metabolites_total)
I, V = findnz(objective(community))
c = sparsevec(I, V, n_reactions_total)
return CoreModel(S, b, c, lbs, ubs, rxns, mets)
end
"""
join_with_exchanges(models::Vector{M},
exchange_rxn_ids::Vector{String},
exchange_met_ids::Vector{String};
add_biomass_objective=false,
biomass_ids::Vector{String},
model_names=String[]
)
Return a `CoreModel` representing the community model of `models` joined through
their `exchange_rxn_ids` and `exchange_met_ids`. These exchange reactions and
metabolites link to environmental metabolites and reactions.
Optionally specify `model_names` to append a specific name to each reaction
and metabolite of an organism for easier reference (default is `species_i` for
each model index i in `models`). Note, the bounds of the environmental variables
are all set to zero. Thus, to run a simulation you need to constrain them
appropriately. All the other bounds are inherited from the models used to
construct the community model.
If `add_biomass_objective` is true then `biomass_ids` needs to be supplied as
well. This creates a model with an extra reaction added to the end of the
stoichiometric matrix (last column) that can be assigned as the objective
reaction. It also creates biomass "metabolites" that can be used in this
objective reaction. Note, this reaction is unspecified, further action needs to
be taken to specify it, e.g. assign weights to the last column of the
stoichiometric matrix in the rows corresponding to the biomass metabolites.
To further clarify how this `join` works. Suppose you have 2 organisms with
stoichiometric matrices S₁ and S₂ and you want to link them with
`exchange_met_ids = [em₁, em₂, em₃, ...]` and `exchange_rxn_ids = [er₁, er₂,
er₃, ...]`. Then a new community stoichiometric matrix is constructed that looks
like this:
```
_ er₁ er₂ er₃ ... b_
|S₁ |
| S₂ |
em₁| |
S = em₂| |
em₃| |
...| |
bm₁| |
bm₂|_ _|
```
The exchange reactions in each model get linked to environmental metabolites,
`emᵢ`, and these get linked to environmental exchanges, `erᵢ`. These `erᵢ`
behave like normal single organism exchange reactions. When
`add_biomass_objective` is true each model's biomass becomes a pseudo-metabolite
(`bmᵢ`). These can be weighted in column `b`, called the `community_biomass`
reaction in the community model, if desired. Refer to the tutorial if this is
unclear.
# Example
```
m1 = load_model(core_model_path)
m2 = load_model(CoreModel, core_model_path)
# need to list ALL the exchanges that will form part of the entire model
exchange_rxn_ids = filter(looks_like_exchange_reaction, reactions(m1))
exchange_met_ids = [first(keys(reaction_stoichiometry(m1, ex_rxn))) for ex_rxn in exchange_rxn_ids]
biomass_ids = ["BIOMASS_Ecoli_core_w_GAM", "BIOMASS_Ecoli_core_w_GAM"]
community = join_with_exchanges(
[m1, m2],
exchange_rxn_ids,
exchange_met_ids;
add_biomass_objective = true,
biomass_ids = biomass_ids,
)
```
"""
function join_with_exchanges(
models::Vector{M},
exchange_rxn_ids::Vector{String},
exchange_met_ids::Vector{String};
add_biomass_objective = true,
biomass_ids = String[],
model_names = String[],
)::CoreModel where M <: MetabolicModel
if add_biomass_objective && isempty(biomass_ids)
throw(
DomainError(
"biomass_ids",
"Please add supply the string ids of the biomass functions when `add_biomass_objective` is true.",
),
)
end
if length(exchange_met_ids) != length(exchange_rxn_ids)
throw(
DomainError(
"Exchange identifiers are misspecified",
"The lenghts of the exchange metabolite and reactions are different.",
),
)
end
# get offsets to construct community S
reaction_lengths = [n_reactions(model) for model in models]
metabolite_lengths = [n_metabolites(model) for model in models]
reaction_offset = [0; cumsum(reaction_lengths[1:end-1])]
metabolite_offset = [0; cumsum(metabolite_lengths[1:end-1])]
# get each model's S matrix (needed for the size calculations)
stoichs = [stoichiometry(model) for model in models]
nnzs = [findnz(stoich) for stoich in stoichs] # nonzero indices per model
# size calculations
column_add = add_biomass_objective ? 1 : 0 # objective rxn
row_add = add_biomass_objective ? length(models) : 0 # biomass as metabolites
nnz_add = add_biomass_objective ? (1 + length(models)) : 0
nnz_total =
sum(length(first(nnz)) for nnz in nnzs) +
length(models) * length(exchange_rxn_ids) +
length(exchange_met_ids) + nnz_add
n_reactions_metabolic = sum(reaction_lengths)
n_reactions_total = n_reactions_metabolic + length(exchange_rxn_ids) + column_add
n_metabolites_metabolic = sum(metabolite_lengths)
n_metabolites_total = n_metabolites_metabolic + length(exchange_met_ids) + row_add
# Create empty storage vectors
lbs = spzeros(n_reactions_total)
ubs = spzeros(n_reactions_total)
rxns = Array{String,1}(undef, n_reactions_total)
mets = Array{String,1}(undef, n_metabolites_total)
I = Array{Int,1}(undef, nnz_total)
J = Array{Int,1}(undef, nnz_total)
V = Array{Float64,1}(undef, nnz_total)
# build metabolic components, block diagonals
kstart = 1
for i = 1:length(models)
kend = kstart + length(nnzs[i][3]) - 1
rng = kstart:kend
I[rng] .= nnzs[i][1] .+ metabolite_offset[i]
J[rng] .= nnzs[i][2] .+ reaction_offset[i]
V[rng] .= nnzs[i][3]
kstart += length(nnzs[i][3])
end
# build environmental - exchange links
for i = 1:length(models)
exchange_rxn_inds = indexin(exchange_rxn_ids, reactions(models[i]))
exchange_met_inds = indexin(exchange_met_ids, metabolites(models[i]))
for (n, (ex_rxn, ex_met)) in enumerate(zip(exchange_rxn_inds, exchange_met_inds)) # each exchange rxn has one exchange met
isnothing(ex_rxn) && continue
isnothing(ex_met) && continue
# connect environmental metabolite with exchange metabolite
I[kstart] = n_metabolites_metabolic + n
J[kstart] = ex_rxn + reaction_offset[i]
V[kstart] = -stoichs[i][ex_met, ex_rxn] # ex is normally negative, make positive
kstart += 1
end
end
# # build diagonal environmental exchanges
for i = 1:length(exchange_rxn_ids)
I[kstart] = n_metabolites_metabolic + i
J[kstart] = n_reactions_metabolic + i
V[kstart] = -1.0
kstart += 1
end
if add_biomass_objective
n_before_biomass_row = n_metabolites_metabolic + length(exchange_met_ids)
for i = 1:length(models)
biomass_ind = first(indexin([biomass_ids[i]], reactions(models[i])))
I[kstart] = i + n_before_biomass_row
J[kstart] = biomass_ind + reaction_offset[i]
V[kstart] = 1.0
kstart += 1
end
end
S = sparse(
I[1:kstart-1],
J[1:kstart-1],
V[1:kstart-1],
n_metabolites_total,
n_reactions_total,
) # could be that some microbes don't have all the exchanges, hence kstart-1
_reaction_offsets = cumsum(reaction_lengths)
_metabolite_offsets = cumsum(metabolite_lengths)
for i = 1:length(models)
species = isempty(model_names) ? "species_$(i)" : model_names[i]
tlbs, tubs = bounds(models[i])
lbs[reaction_offset[i]+1:_reaction_offsets[i]] .= tlbs
ubs[reaction_offset[i]+1:_reaction_offsets[i]] .= tubs
rxns[reaction_offset[i]+1:_reaction_offsets[i]] =
"$(species)_" .* reactions(models[i])
mets[metabolite_offset[i]+1:_metabolite_offsets[i]] =
"$(species)_" .* metabolites(models[i])
end
mets[_metabolite_offsets[end]+1:_metabolite_offsets[end]+length(exchange_met_ids)] .=
exchange_met_ids
rxns[_reaction_offsets[end]+1:_reaction_offsets[end]+length(exchange_rxn_ids)] .=
exchange_rxn_ids
if add_biomass_objective
rxns[end] = "community_biomass"
for i = 1:length(models)
species = isempty(model_names) ? "species_$(i)" : model_names[i]
mets[end-length(biomass_ids)+i] = "$(species)_" .* biomass_ids[i]
end
end
return CoreModel(S, spzeros(size(S, 1)), spzeros(size(S, 2)), lbs, ubs, rxns, mets)
end
......@@ -192,3 +192,21 @@ end
@test reactions(modLp) == reactions(lp)[2:3]
@test metabolites(modLp) == metabolites(lp)[1:3]
end
@testset "Remove metabolites" begin
model = load_model(CoreModel, model_paths["e_coli_core.json"])
m1 = remove_metabolites(model, ["glc__D_e", "for_c"])
m2 = remove_metabolites(model, "glc__D_e")
m3 = remove_metabolites(model, indexin(["glc__D_e", "for_c"], metabolites(model)))
m4 = remove_metabolites(model, first(indexin(["glc__D_e"], metabolites(model))))
@test size(stoichiometry(m1)) == (70, 94)
@test size(stoichiometry(m2)) == (71, 94)
@test size(stoichiometry(m3)) == (70, 94)
@test size(stoichiometry(m4)) == (71, 94)
@test any(["glc__D_e", "for_c"] . Ref(metabolites(m1)))
@test any(["glc__D_e"] . Ref(metabolites(m2)))
@test any(["glc__D_e", "for_c"] . Ref(metabolites(m3)))
@test any(["glc__D_e"] . Ref(metabolites(m4)))
end
@testset "Small model join" begin
m1 = load_model(model_paths["e_coli_core.json"])
m2 = load_model(CoreModel, model_paths["e_coli_core.json"])
exchange_rxn_ids = filter(looks_like_exchange_reaction, reactions(m2))
exchange_met_ids = [first(keys(reaction_stoichiometry(m2, ex_rxn))) for ex_rxn in exchange_rxn_ids]
biomass_ids = ["BIOMASS_Ecoli_core_w_GAM", "BIOMASS_Ecoli_core_w_GAM"]
community = join_with_exchanges(
[m1, m2],
exchange_rxn_ids,
exchange_met_ids;
add_biomass_objective = true,
biomass_ids = biomass_ids,
)
env_ex_inds = indexin(exchange_rxn_ids, reactions(community))
m2_ex_inds = indexin(exchange_rxn_ids, reactions(m2))
community.xl[env_ex_inds] .= m2.xl[m2_ex_inds]
community.xu[env_ex_inds] .= m2.xu[m2_ex_inds]
biomass_ids =
["species_1_BIOMASS_Ecoli_core_w_GAM", "species_2_BIOMASS_Ecoli_core_w_GAM"]
add_objective!(
community,
biomass_ids;
objective_column_index = first(
indexin(["community_biomass"], reactions(community)),
),
)
d = flux_balance_analysis_dict(community, Tulip.Optimizer)
@test size(stoichiometry(community)) == (166, 211)
@test isapprox(d["community_biomass"], 0.41559777495618294, atol = TEST_TOLERANCE)
end
@testset "Heterogenous model join" begin
m1 = load_model(CoreModel, model_paths["e_coli_core.json"])
m2 = load_model(CoreModel, model_paths["iJO1366.mat"])
exchange_rxn_ids = filter(looks_like_exchange_reaction, reactions(m2))
exchange_met_ids = [first(keys(reaction_stoichiometry(m2, ex_rxn))) for ex_rxn in exchange_rxn_ids]
biomass_ids = ["BIOMASS_Ecoli_core_w_GAM", "BIOMASS_Ec_iJO1366_core_53p95M"]
community = join_with_exchanges(
[m1, m2],
exchange_rxn_ids,
exchange_met_ids;
add_biomass_objective = true,
biomass_ids = biomass_ids,
)
env_ex_inds = indexin(exchange_rxn_ids, reactions(community))
m2_ex_inds = indexin(exchange_rxn_ids, reactions(m2))
m1_ex_inds = indexin(exchange_rxn_ids, reactions(m1))
for (env_ex, m2_ex, m1_ex) in zip(env_ex_inds, m2_ex_inds, m1_ex_inds)
m2lb = isnothing(m2_ex) ? 0.0 : m2.xl[m2_ex]
m2ub = isnothing(m2_ex) ? 0.0 : m2.xu[m2_ex]
m1lb = isnothing(m1_ex) ? 0.0 : m1.xl[m1_ex]
m1ub = isnothing(m1_ex) ? 0.0 : m1.xu[m1_ex]
community.xl[env_ex] = m1lb + m2lb
community.xu[env_ex] = m1ub + m2ub
end
biomass_metabolite_inds = indexin(
["species_1_BIOMASS_Ecoli_core_w_GAM", "species_2_BIOMASS_Ec_iJO1366_core_53p95M"],
metabolites(community),
)
community.S[biomass_metabolite_inds, end] .= -1.0
community.c[end] = 1.0
community.xl[end] = 0.0
community.xu[end] = 1000.0
d = flux_balance_analysis_dict(
community,
Tulip.Optimizer;
modifications = [change_optimizer_attribute("IPM_IterationsLimit", 1000)],
)
@test size(stoichiometry(community)) == (2203, 3003)
@test isapprox(d["community_biomass"], 0.8739215069675402, atol = TEST_TOLERANCE)
end
@testset "Community model modifications" begin
m1 = load_model(CoreModel, model_paths["e_coli_core.json"])
exchange_rxn_ids = filter(looks_like_exchange_reaction, reactions(m1))
exchange_met_ids = [first(keys(reaction_stoichiometry(m1, ex_rxn))) for ex_rxn in exchange_rxn_ids]
biomass_ids = ["BIOMASS_Ecoli_core_w_GAM"]
community = join_with_exchanges(
[m1],
exchange_rxn_ids,
exchange_met_ids;
add_biomass_objective = true,
biomass_ids = biomass_ids,
)
env_ex_inds = indexin(exchange_rxn_ids, reactions(community))
m1_ex_inds = indexin(exchange_rxn_ids, reactions(m1))
community.xl[env_ex_inds] .= m1.xl[m1_ex_inds]
community.xu[env_ex_inds] .= m1.xu[m1_ex_inds]
m2 = load_model(CoreModel, model_paths["e_coli_core.json"])
community = add_model_with_exchanges(
community,
m2,
exchange_rxn_ids,
exchange_met_ids;
model_name = "species_2",
biomass_id = "BIOMASS_Ecoli_core_w_GAM",
)
biomass_ids =
["species_1_BIOMASS_Ecoli_core_w_GAM", "species_2_BIOMASS_Ecoli_core_w_GAM"]
add_objective!(
community,
biomass_ids;
objective_column_index = first(
indexin(["community_biomass"], reactions(community)),
),
)