Unverified Commit 74bd5833 authored by Miroslav Kratochvil's avatar Miroslav Kratochvil Committed by GitHub
Browse files

Merge pull request #620 from LCSB-BioCore/develop

Develop→master for 1.3.1
parents 2b71b8fc 6d0e83c7
Pipeline #56612 passed with stages
in 21 minutes and 16 seconds
on:
issue_comment:
types: [created]
name: PR auto-formatting command
jobs:
format:
if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER' || github.event.issue.user.id == github.event.comment.user.id) && startsWith(github.event.comment.body, '/format') }}
name: auto-format
runs-on: ubuntu-latest
steps:
- name: Clone the repository
uses: actions/checkout@v2
- name: Checkout the pull request code
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
gh pr checkout ${{ github.event.issue.number }}
- name: Install JuliaFormatter and format
run: |
julia -e 'import Pkg; Pkg.add("JuliaFormatter")'
julia -e 'using JuliaFormatter; format(".")'
- name: Remove trailing whitespace
run: |
find -name '*.jl' -or -name '*.md' -or -name '*.toml' -or -name '*.yml' | while read filename ; do
# remove any trailing spaces
sed --in-place -e 's/\s*$//' "$filename"
# add a final newline if missing
if [[ -s "$filename" && $(tail -c 1 "$filename" |wc -l) -eq 0 ]] ; then
echo >> "$filename"
fi
# squash superfluous final newlines
sed -i -e :a -e '/^\n*$/{$d;N;};/\n$/ba' "$filename"
done
- name: Commit and push the changes
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
if [ `git status -s | wc -l` -ne 0 ] ; then
git config --local user.name "$GITHUB_ACTOR"
git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
git commit -a -m "automatic formatting" -m "triggered by @$GITHUB_ACTOR on PR #${{ github.event.issue.number }}"
if git push
then gh pr comment ${{ github.event.issue.number }} --body \
":heavy_check_mark: auto-formatting triggered by [this comment](${{ github.event.comment.html_url }}) succeeded, commited as `git rev-parse HEAD`"
else gh pr comment ${{ github.event.issue.number }} --body \
":x: auto-formatting triggered by [this comment](${{ github.event.comment.html_url }}) failed, perhaps someone pushed to the PR in the meantime?"
fi
else
echo "No changes, all good!"
fi
on:
pull_request:
name: Formatting check
jobs:
formatcheck:
name: check-code-format
runs-on: ubuntu-latest
steps:
- name: Check out the code
uses: actions/checkout@v2
- name: Install JuliaFormatter and format
run: |
julia -e 'import Pkg; Pkg.add("JuliaFormatter")'
julia -e 'using JuliaFormatter; format(".")'
- name: Remove trailing whitespace
run: |
find -name '*.jl' -or -name '*.md' -or -name '*.toml' -or -name '*.yml' | while read filename ; do
# remove any trailing spaces
sed --in-place -e 's/\s*$//' "$filename"
# add a final newline if missing
if [[ -s "$filename" && $(tail -c 1 "$filename" |wc -l) -eq 0 ]] ; then
echo >> "$filename"
fi
# squash superfluous final newlines
sed -i -e :a -e '/^\n*$/{$d;N;};/\n$/ba' "$filename"
done
- name: Report any problems
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
if [ `git status -s | wc -l` -ne 0 ]
then gh pr comment ${{ github.event.number }} --body \
":red_square:  Commit ${{ github.event.pull_request.head.sha }} requires formatting!\
"$'\n\n'"Required formatting changes summary:\
"$'\n```\n'"`git diff --stat`"$'\n```'
else gh pr comment ${{ github.event.number }} --body \
":green_circle:  Commit ${{ github.event.pull_request.head.sha }} is formatted properly."
fi
......@@ -189,44 +189,6 @@ mac:julia1.6:
<<: *global_julia16
<<: *global_env_mac
#
# CODE FORMAT CHECKER
#
format:
stage: test
script:
- |
docker run -v "$PWD":/project $CI_REGISTRY/r3/docker/julia-custom julia -e 'using JuliaFormatter; format("/project", verbose=true);'
- |
if [ `docker run -i -v "$PWD":/git alpine/git status -s | wc -l` -ne 0 ] ; then
GITHUB_COMMENT=":red_square: &nbsp;Commit ${CI_COMMIT_SHORT_SHA} requires formatting!\
"$'\n\n'"Required formatting changes summary:\
"$'\n```\n'"`docker run -i -v \"$PWD\":/git alpine/git diff --stat`"$'\n```'
else
GITHUB_COMMENT=":green_circle: &nbsp;Commit ${CI_COMMIT_SHORT_SHA} is formatted properly."
fi
- |
export GITHUB_TOKEN="${GITHUB_ACCESS_TOKEN_FORMATTER}"
export GITHUB_OWNER="lcsb-biocore"
export GITHUB_REPO="cobrexa.jl"
export GITHUB_COMMENT_TYPE=pr
export GITHUB_PR_ISSUE_NUMBER="${CI_EXTERNAL_PULL_REQUEST_IID}"
export GITHUB_COMMENT_FORMAT=""
export GITHUB_COMMENT
- |
docker run -i --rm \
-e GITHUB_TOKEN \
-e GITHUB_OWNER \
-e GITHUB_REPO \
-e GITHUB_COMMENT_TYPE \
-e GITHUB_PR_ISSUE_NUMBER \
-e GITHUB_COMMENT_FORMAT \
-e GITHUB_COMMENT \
cloudposse/github-commenter
<<: *global_dind
<<: *global_trigger_pull_request
#
# ASSETS
#
......
name = "COBREXA"
uuid = "babc4406-5200-4a30-9033-bf5ae714c842"
authors = ["The developers of COBREXA.jl"]
version = "1.3.0"
version = "1.3.1"
[deps]
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
......
......@@ -12,3 +12,9 @@ Pages = map(file -> joinpath("base", "types", "abstract", file), readdir("../src
Modules = [COBREXA]
Pages = map(file -> joinpath("base", "types", file), readdir("../src/base/types"))
```
## Model type wrappers
```@autodocs
Modules = [COBREXA]
Pages = map(file -> joinpath("base", "types", "wrappers", file), readdir("../src/base/types/wrappers"))
```
......@@ -147,24 +147,15 @@ function make_gecko_model(
push!(coupling_row_mass_group, _gecko_capacity(grp, idxs, mms, gmgb_(grp)))
end
# create model with dummy objective
gm = GeckoModel(
spzeros(length(columns) + length(coupling_row_gene_product)),
GeckoModel(
[
_gecko_reaction_column_reactions(columns, model)' * objective(model)
spzeros(length(coupling_row_gene_product))
],
columns,
coupling_row_reaction,
collect(zip(coupling_row_gene_product, gpb_.(gids[coupling_row_gene_product]))),
coupling_row_mass_group,
model,
)
#=
Set objective. This is a separate field because gene products can also be objectives.
This way they can be set as objectives by the user.
=#
gm.objective .= [
_gecko_reaction_column_reactions(gm)' * objective(gm.inner)
spzeros(length(coupling_row_gene_product))
]
return gm
end
......@@ -71,7 +71,7 @@ function warmup_from_variability(
end
)
map(fetch, save_at.(workers, :cobrexa_sampling_warmup_optmodel, Ref(save_model)))
asyncmap(fetch, save_at.(workers, :cobrexa_sampling_warmup_optmodel, Ref(save_model)))
fluxes = hcat(
dpmap(
......@@ -92,7 +92,7 @@ function warmup_from_variability(
)
# free the data on workers
map(fetch, remove_from.(workers, :cobrexa_sampling_warmup_optmodel))
asyncmap(fetch, remove_from.(workers, :cobrexa_sampling_warmup_optmodel))
return fluxes, lbs, ubs
end
......
......@@ -129,9 +129,9 @@ function _screen_impl(
workers = [myid()],
)::Array where {V<:AbstractVector,A,N}
map(fetch, save_at.(workers, :cobrexa_screen_variants_model, Ref(model)))
map(fetch, save_at.(workers, :cobrexa_screen_variants_analysis_fn, Ref(analysis)))
map(fetch, get_from.(workers, Ref(:(precache!(cobrexa_screen_variants_model)))))
asyncmap(fetch, save_at.(workers, :cobrexa_screen_variants_model, Ref(model)))
asyncmap(fetch, save_at.(workers, :cobrexa_screen_variants_analysis_fn, Ref(analysis)))
asyncmap(fetch, get_from.(workers, Ref(:(precache!(cobrexa_screen_variants_model)))))
res = pmap(
(vars, args)::Tuple -> screen_variant(
......@@ -144,8 +144,8 @@ function _screen_impl(
zip(variants, args),
)
map(fetch, remove_from.(workers, :cobrexa_screen_variants_model))
map(fetch, remove_from.(workers, :cobrexa_screen_variants_analysis_fn))
asyncmap(fetch, remove_from.(workers, :cobrexa_screen_variants_model))
asyncmap(fetch, remove_from.(workers, :cobrexa_screen_variants_analysis_fn))
return res
end
......@@ -276,7 +276,7 @@ function _screen_optmodel_modifications_impl(
workers = [myid()],
)::Array where {V<:AbstractVector,VF<:AbstractVector,A,N}
map(
asyncmap(
fetch,
save_at.(
workers,
......@@ -290,12 +290,15 @@ function _screen_optmodel_modifications_impl(
),
),
)
map(fetch, save_at.(workers, :cobrexa_screen_optmodel_modifications_fn, Ref(analysis)))
asyncmap(
fetch,
save_at.(workers, :cobrexa_screen_optmodel_modifications_fn, Ref(analysis)),
)
res = pmap(_screen_optmodel_item, CachingPool(workers), zip(modifications, args))
map(fetch, remove_from.(workers, :cobrexa_screen_optmodel_modifications_data))
map(fetch, remove_from.(workers, :cobrexa_screen_optmodel_modifications_fn))
asyncmap(fetch, remove_from.(workers, :cobrexa_screen_optmodel_modifications_data))
asyncmap(fetch, remove_from.(workers, :cobrexa_screen_optmodel_modifications_fn))
return res
end
......@@ -19,16 +19,18 @@ function make_optimization_model(model::MetabolicModel, optimizer; sense = MAX_S
xl, xu = bounds(model)
optimization_model = Model(optimizer)
@variable(optimization_model, x[i = 1:n])
@variable(optimization_model, x[1:n])
@objective(optimization_model, sense, objective(model)' * x)
@constraint(optimization_model, mb, stoichiometry(model) * x .== balance(model)) # mass balance
@constraint(optimization_model, lbs, xl .<= x) # lower bounds
@constraint(optimization_model, ubs, x .<= xu) # upper bounds
C = coupling(model) # empty if no coupling
cl, cu = coupling_bounds(model)
isempty(C) || @constraint(optimization_model, c_lbs, cl .<= C * x) # coupling lower bounds
isempty(C) || @constraint(optimization_model, c_ubs, C * x .<= cu) # coupling upper bounds
isempty(C) || begin
cl, cu = coupling_bounds(model)
@constraint(optimization_model, c_lbs, cl .<= C * x) # coupling lower bounds
@constraint(optimization_model, c_ubs, C * x .<= cu) # coupling upper bounds
end
return optimization_model
end
......
......@@ -12,7 +12,14 @@ _missing_impl_error(m, a) = throw(MethodError(m, a))
"""
reactions(a::MetabolicModel)::Vector{String}
Return a vector of reaction identifiers in a model.
Return a vector of reaction identifiers in a model. The vector precisely
corresponds to the columns in [`stoichiometry`](@ref) matrix.
For technical reasons, the "reactions" may sometimes not be true reactions but
various virtual and helper pseudo-reactions that are used in the metabolic
modeling, such as metabolite exchanges, separate forward and reverse reactions,
supplies of enzymatic and genetic material and virtual cell volume, etc. To
simplify the view of the model contents use [`reaction_flux`](@ref).
"""
function reactions(a::MetabolicModel)::Vector{String}
_missing_impl_error(reactions, (a,))
......@@ -21,7 +28,11 @@ end
"""
metabolites(a::MetabolicModel)::Vector{String}
Return a vector of metabolite identifiers in a model.
Return a vector of metabolite identifiers in a model. The vector precisely
corresponds to the rows in [`stoichiometry`](@ref) matrix.
As with [`reactions`](@ref)s, some metabolites in models may be virtual,
representing purely technical equality constraints.
"""
function metabolites(a::MetabolicModel)::Vector{String}
_missing_impl_error(metabolites, (a,))
......
......@@ -16,7 +16,7 @@ end
"""
struct _gecko_capacity
A helper struct that contains the gene product capacity terms organized by
A helper struct that contains the gene product capacity terms organized by
the grouping type, e.g. metabolic or membrane groups etc.
"""
struct _gecko_capacity
......@@ -52,19 +52,18 @@ The model wraps another "internal" model, and adds following modifications:
proteins).
The structure contains fields `columns` that describe the contents of the
coupling columns, `coupling_row_reaction`, `coupling_row_gene_product` and
`coupling_row_mass_group` that describe correspondence of the coupling rows to
original model and determine the coupling bounds, and `inner`, which is the
original wrapped model. Note, `objective` is the objective vector of the model.
Special care needs to be taken to ensure that its length is the sum of
`n_reactions(model)` and `n_genes(model)` when the user modifies it, where
`model` is the GeckoModel in question.
stoichiometry matrix columns, `coupling_row_reaction`,
`coupling_row_gene_product` and `coupling_row_mass_group` that describe
correspondence of the coupling rows to original model and determine the
coupling bounds (note: the coupling for gene product is actually added to
stoichiometry, not in [`coupling`](@ref)), and `inner`, which is the original
wrapped model. The `objective` of the model includes also the extra columns for
individual genes, as held by `coupling_row_gene_product`.
Implementation exposes the split reactions (available as `reactions(model)`),
but retains the original "simple" reactions accessible by [`fluxes`](@ref). All
constraints are implemented using [`coupling`](@ref) and
[`coupling_bounds`](@ref), i.e., all virtual metabolites described by GECKO are
purely virtual and do not occur in [`metabolites`](@ref).
but retains the original "simple" reactions accessible by [`fluxes`](@ref).
The related constraints are implemented using [`coupling`](@ref) and
[`coupling_bounds`](@ref).
"""
struct GeckoModel <: ModelWrapper
objective::SparseVec
......@@ -98,10 +97,9 @@ end
objective(model::GeckoModel)
Return the objective of the [`GeckoModel`](@ref). Note, the objective is with
respect to the internal variables, i.e. [`reactions(model)`](@ref) and
[`genes(model)`](@ref). To manually set the objective, index into
`model.objective` appropriately, and remember to set the previous coefficients
to zero.
respect to the internal variables, i.e. [`reactions(model)`](@ref), which are
the unidirectional reactions and the genes involved in enzymatic reactions that
have kinetic data.
"""
objective(model::GeckoModel) = model.objective
......@@ -112,16 +110,17 @@ Returns the internal reactions in a [`GeckoModel`](@ref) (these may be split
to forward- and reverse-only parts with different isozyme indexes; reactions
IDs are mangled accordingly with suffixes).
"""
reactions(model::GeckoModel) =
let inner_reactions = reactions(model.inner)
[
_gecko_reaction_name(
inner_reactions[col.reaction_idx],
col.direction,
col.isozyme_idx,
) for col in model.columns
]
end
function reactions(model::GeckoModel)
inner_reactions = reactions(model.inner)
mangled_reactions = [
_gecko_reaction_name(
inner_reactions[col.reaction_idx],
col.direction,
col.isozyme_idx,
) for col in model.columns
]
[mangled_reactions; genes(model)]
end
"""
n_reactions(model::GeckoModel)
......@@ -129,7 +128,7 @@ reactions(model::GeckoModel) =
Returns the number of all irreversible reactions in `model` as well as the
number of gene products that take part in enzymatic reactions.
"""
n_reactions(model::GeckoModel) = length(model.columns)
n_reactions(model::GeckoModel) = length(model.columns) + n_genes(model)
"""
bounds(model::GeckoModel)
......@@ -217,7 +216,7 @@ end
"""
balance(model::GeckoModel)
Return the balance of the reactions in the inner model, concatenated with a vector of
Return the balance of the reactions in the inner model, concatenated with a vector of
zeros representing the enzyme balance of a [`GeckoModel`](@ref).
"""
balance(model::GeckoModel) =
......@@ -233,7 +232,7 @@ n_genes(model::GeckoModel) = length(model.coupling_row_gene_product)
"""
genes(model::GeckoModel)
Return the gene ids of genes that have enzymatic constraints associated with them.
Return the gene ids of genes that have enzymatic constraints associated with them.
"""
genes(model::GeckoModel) =
genes(model.inner)[[idx for (idx, _) in model.coupling_row_gene_product]]
......@@ -243,7 +242,7 @@ genes(model::GeckoModel) =
Return the ids of all metabolites, both real and pseudo, for a [`GeckoModel`](@ref).
"""
metabolites(model::GeckoModel) = [metabolites(model.inner); genes(model) .* "#supply"]
metabolites(model::GeckoModel) = [metabolites(model.inner); genes(model) .* "#gecko"]
"""
n_metabolites(model::GeckoModel)
......
......@@ -8,7 +8,7 @@ function that returns a dictionary of solved fluxes.
"""
gene_product_dict(model::GeckoModel, opt_model) =
is_solved(opt_model) ?
Dict(genes(model) .=> value.(opt_model[:x])[(n_reactions(model)+1):end]) : nothing
Dict(genes(model) .=> value.(opt_model[:x])[(length(model.columns)+1):end]) : nothing
"""
gene_product_dict(model::GeckoModel)
......@@ -26,7 +26,7 @@ gene_product_mass_group_dict(model::GeckoModel, opt_model) =
is_solved(opt_model) ?
Dict(
grp.group_id => dot(
value.(opt_model[:x])[n_reactions(model).+grp.gene_product_idxs],
value.(opt_model[:x])[length(model.columns).+grp.gene_product_idxs],
grp.gene_product_molar_masses,
) for grp in model.coupling_row_mass_group
) : nothing
......@@ -34,9 +34,10 @@ gene_product_mass_group_dict(model::GeckoModel, opt_model) =
"""
gene_product_mass_group_dict(model::GeckoModel)
A pipe-able variant of [`mass_group_dict`](@ref).
A pipe-able variant of [`gene_product_mass_group_dict`](@ref).
"""
gene_product_mass_group_dict(model::GeckoModel) = x -> mass_group_dict(model, x)
gene_product_mass_group_dict(model::GeckoModel) =
x -> gene_product_mass_group_dict(model, x)
"""
gene_product_mass(model::SMomentModel)
......
......@@ -15,12 +15,20 @@ _gecko_reaction_name(original_name::String, direction::Int, isozyme_idx::Int) =
Retrieve a utility mapping between reactions and split reactions; rows
correspond to "original" reactions, columns correspond to "split" reactions.
"""
_gecko_reaction_column_reactions(model::GeckoModel) = sparse(
[col.reaction_idx for col in model.columns],
1:length(model.columns),
[col.direction >= 0 ? 1 : -1 for col in model.columns],
n_reactions(model.inner),
length(model.columns),
_gecko_reaction_column_reactions(model::GeckoModel) =
_gecko_reaction_column_reactions(model.columns, model.inner)
"""
_gecko_reaction_column_reactions(columns, inner)
Helper method that doesn't require the whole [`GeckoModel`](@ref).
"""
_gecko_reaction_column_reactions(columns, inner) = sparse(
[col.reaction_idx for col in columns],
1:length(columns),
[col.direction >= 0 ? 1 : -1 for col in columns],
n_reactions(inner),
length(columns),
)
"""
......
"""
function gapfill_minimum_reactions(
model::MetabolicModel,
universal_reactions::Vector{Reaction},
optimizer;
objective_bounds = (_constants.tolerance, _constants.default_reaction_bound),
maximum_new_reactions = 5,
weights = fill(1.0, length(universal_reactions)),
modifications = [],
)
Find a minimal set of reactions from `universal_reactions` that should be added
to `model` so that the model has a feasible solution with bounds on its
objective function given in `objective_bounds`. Weights of the added reactions
may be specified in `weights` to prefer adding reactions with lower weights.
Internally, this builds and solves a mixed integer program, following
the method of Reed et al. (Reed, Jennifer L., et al. "Systems approach to
refining genome annotation." *Proceedings of the National Academy of Sciences*
(2006)).
The function returns a solved JuMP optimization model, with the boolean
reaction inclusion indicators in variable vector `y`. Use
[`gapfilled_mask`](@ref) or [`gapfilled_rids`](@ref) to collect the reaction
information in Julia datatypes.
To reduce the uncertainty in the MILP solver (and likely reduce the
complexity), you may put a limit on the size of the added reaction set in
`maximum_new_reactions`.
"""
function gapfill_minimum_reactions(
model::MetabolicModel,
universal_reactions::Vector{Reaction},
optimizer;
objective_bounds = (_constants.tolerance, _constants.default_reaction_bound),
maximum_new_reactions = length(universal_reactions),
weights = fill(1.0, length(universal_reactions)),
modifications = [],
)
precache!(model)
# constraints from universal reactions that can fill gaps
univs = _universal_stoichiometry(universal_reactions, metabolites(model))
# add space for additional metabolites and glue with the universal reaction
# stoichiometry
extended_stoichiometry = [[
stoichiometry(model)
spzeros(length(univs.new_mids), n_reactions(model))
] univs.stoichiometry]
# make the model anew (we can't really use make_optimization_model because
# we need the balances and several other things completely removed. Could
# be solved either by parametrizing make_optimization_model or by making a
# tiny temporary wrapper for this.
# keep this in sync with src/base/solver.jl, except for adding balances.
opt_model = Model(optimizer)
@variable(opt_model, x[1:n_reactions(model)])
xl, xu = bounds(model)
@constraint(opt_model, lbs, xl .<= x)
@constraint(opt_model, ubs, x .<= xu)
C = coupling(model)
isempty(C) || begin
cl, cu = coupling_bounds(model)
@constraint(opt_model, c_lbs, cl .<= C * x)
@constraint(opt_model, c_ubs, C * x .<= cu)
end
# add the variables for new stuff
@variable(opt_model, ux[1:length(universal_reactions)]) # fluxes from universal reactions
@variable(opt_model, y[1:length(universal_reactions)], Bin) # indicators
# combined metabolite balances
@constraint(
opt_model,
extended_stoichiometry * [x; ux] .==
[balance(model); zeros(length(univs.new_mids))]
)
# objective bounds
@constraint(opt_model, objective_bounds[1] <= objective(model)' * x)
@constraint(opt_model, objective_bounds[2] >= objective(model)' * x)
# flux bounds of universal reactions with indicators
@constraint(opt_model, ulb, univs.lbs .* y .<= ux)
@constraint(opt_model, uub, univs.ubs .* y .>= ux)
# minimize the total number of indicated reactions
@objective(opt_model, Min, weights' * y)
# limit the number of indicated reactions
# (prevents the solver from exploring too far)
@constraint(opt_model, sum(y) <= maximum_new_reactions)
# apply all modifications
for mod in modifications
mod(opt_model, model)
end
optimize!(opt_model)
return opt_model
end
"""
gapfilled_mask(opt_model::BitVector)
Get a `BitVector` of added reactions from the model solved by
[`gapfill_minimum_reactions`](@ref). The bit indexes correspond to the indexes
of `universal_reactions` given to the gapfilling function. In case the model is
not solved, this returns `nothing`.
# Example
gapfill_minimum_reactions(myModel, myReactions, Tulip.Optimizer) |> gapfilled_mask
"""
gapfilled_mask(opt_model)::BitVector =
is_solved(opt_model) ? value.(opt_model[:y]) .> 0 : nothing
"""
gapfilled_rids(opt_model, universal_reactions::Vector{Reaction})::Vector{String}
Utility to extract a short vector of IDs of the reactions added by the