Commit a54a0560 authored by Miroslav Kratochvil's avatar Miroslav Kratochvil
Browse files

revised benchmark scripts

parent 881dd652
using Distributed, COBREXA, Serialization, ClusterManagers
using Logging
global_logger(ConsoleLogger(stderr, Logging.Debug))
if length(ARGS)!=3
@error "wrong arguments"
exit(1)
end
tag=ARGS[1]
seed=parse(Int, ARGS[2])
n_models=parse(Int, ARGS[3])
n_workers = parse(Int, ENV["SLURM_NTASKS"])
name="$tag-$n_models-$seed"
logHandle = open("logs/log_dist-$tag-$n_models-$n_workers-$seed", "w")
function log(op, args...)
println(logHandle, join(vcat([op, tag, string(seed), string(n_models), string(n_workers)],[string(a) for a in args]),"\t"))
flush(logHandle)
end
addprocs_slurm(n_workers, topology=:master_worker)
#addprocs(n_workers, topology=:master_worker)
@everywhere begin
using COBREXA, SparseArrays
mutable struct rxn
stoi::Dict{String,Float64}
lb::Float64
ub::Float64
oc::Float64
end
Base.copy(x::rxn) = rxn(x.stoi, x.lb, x.ub, x.oc)
mutable struct mdl <: MetabolicModel
rxns::Dict{String, rxn}
mets::Set{String}
cmets::Set{String}
end
Base.copy(x::mdl) = mdl(x.rxns, x.mets, x.cmets)
COBREXA.reactions(m::mdl) = collect(keys(m.rxns))
COBREXA.n_reactions(m::mdl) = length(m.rxns)
COBREXA.metabolites(m::mdl) = vcat(collect(m.mets), collect(m.cmets))
COBREXA.n_metabolites(m::mdl) = length(m.mets) + length(m.cmets)
COBREXA.balance(m::mdl) = spzeros(n_metabolites(m))
COBREXA.stoichiometry(m::mdl) = begin
rd = Dict(zip(reactions(m), 1:n_reactions(m)))
md = Dict(zip(metabolites(m), 1:n_metabolites(m)))
S = spzeros(n_metabolites(m), n_reactions(m))
for (rid,r) in m.rxns
ri = rd[rid]
for (mid, s) in r.stoi
S[md[mid],ri] = s
end
end
S
end
COBREXA.objective(m::mdl) = sparse([r.oc for (_,r) in m.rxns])
COBREXA.bounds(m::mdl) = (
sparse([r.lb for (_,r) in m.rxns]),
sparse([r.ub for (_,r) in m.rxns]),
)
end
#check this out
@info "Distribution:" workers=workers()
cmets = deserialize("data/cmets-"*name)::Set{String}
t=@elapsed mets = deserialize("data/mets-"*name)::Set{String}
log("load_mets", t)
t=@elapsed cmets = deserialize("data/cmets-"*name)::Set{String}
log("load_cmets", t)
t=@elapsed rxns = deserialize("data/rxns-"*name)::Dict{String,rxn}
log("load_rxns", t)
# define the community model structure
m = mdl(rxns, mets, cmets)
remove_rxn(id::String) = (model::mdl) -> begin
mod = copy(model)
mod.rxns = copy(model.rxns)
delete!(mod.rxns, id)
mod
end
#dummy
r = screen_model_variants(m, [[remove_rxn(first(keys(m.rxns)))]], m -> sum([sum(values(r.stoi)) for (_,r) in m.rxns]); workers=workers())
t = @elapsed r = screen_model_variants(m, [[remove_rxn(k)] for k in keys(m.rxns)], m -> sum([sum(values(r.stoi)) for (_,r) in m.rxns]); workers=workers())
log("dist_sum_total", t)
log.("stoi_sum", keys(m.rxns), r)
using Distributed, COBREXA, Serialization, ClusterManagers
using Logging
global_logger(ConsoleLogger(stderr, Logging.Debug))
if length(ARGS)!=3
@error "wrong arguments"
exit(1)
end
tag=ARGS[1]
seed=parse(Int, ARGS[2])
n_models=parse(Int, ARGS[3])
n_workers = parse(Int, ENV["SLURM_NTASKS"])
name="$tag-$n_models-$seed"
logHandle = open("logs/log_fva-$tag-$n_models-$n_workers-$seed", "w")
function log(op, args...)
println(logHandle, join(vcat([op, tag, string(seed), string(n_models), string(n_workers)],[string(a) for a in args]),"\t"))
flush(logHandle)
end
addprocs_slurm(n_workers, topology=:master_worker)
#addprocs(n_workers, topology=:master_worker)
@everywhere begin
using COBREXA, SparseArrays
mutable struct rxn
stoi::Dict{String,Float64}
lb::Float64
ub::Float64
oc::Float64
end
mutable struct mdl <: MetabolicModel
rxns::Dict{String, rxn}
mets::Set{String}
cmets::Set{String}
end
COBREXA.reactions(m::mdl) = collect(keys(m.rxns))
COBREXA.n_reactions(m::mdl) = length(m.rxns)
COBREXA.metabolites(m::mdl) = vcat(collect(m.mets), collect(m.cmets))
COBREXA.n_metabolites(m::mdl) = length(m.mets) + length(m.cmets)
COBREXA.balance(m::mdl) = spzeros(n_metabolites(m))
COBREXA.stoichiometry(m::mdl) = begin
rd = Dict(zip(reactions(m), 1:n_reactions(m)))
md = Dict(zip(metabolites(m), 1:n_metabolites(m)))
S = spzeros(n_metabolites(m), n_reactions(m))
for (rid,r) in m.rxns
ri = rd[rid]
for (mid, s) in r.stoi
S[md[mid],ri] = s
end
end
S
end
COBREXA.objective(m::mdl) = sparse([r.oc for (_,r) in m.rxns])
COBREXA.bounds(m::mdl) = (
sparse([r.lb for (_,r) in m.rxns]),
sparse([r.ub for (_,r) in m.rxns]),
)
end
#check this out
@info "Distribution:" workers=workers()
cmets = deserialize("data/cmets-"*name)::Set{String}
t=@elapsed mets = deserialize("data/mets-"*name)::Set{String}
log("load_mets", t)
t=@elapsed cmets = deserialize("data/cmets-"*name)::Set{String}
log("load_cmets", t)
t=@elapsed rxns = deserialize("data/rxns-"*name)::Dict{String,rxn}
log("load_rxns", t)
# define the community model structure
m = mdl(rxns, mets, cmets)
environment_reactions = broadcast((i,_)::Tuple -> i, filter((_,r)::Tuple -> r in m.cmets, collect(enumerate(reactions(m)))))
using Gurobi
#dummy
r = flux_variability_analysis(m, environment_reactions[begin:begin], Gurobi.Optimizer, workers(); gamma=1.0)
t = @elapsed r = flux_variability_analysis(m, environment_reactions, Gurobi.Optimizer, workers(); gamma=1.0)
log("fva_total", t)
log.("variability", reactions(m)[environment_reactions], r[:,1], r[:,2])
using SBML, StableRNGs, Serialization
using Logging
global_logger(ConsoleLogger(stderr, Logging.Debug))
if length(ARGS)!=3
@error "wrong arguments"
exit(1)
end
tag=ARGS[1]
seed=parse(Int, ARGS[2])
n_models=parse(Int, ARGS[3])
name="$tag-$n_models-$seed"
logHandle = open("logs/log_create-"*name, "w")
function log(op, args...)
println(logHandle, join(vcat([op, tag, string(seed), string(n_models)],[string(a) for a in args]),"\t"))
flush(logHandle)
end
intMetId(fn, met) = fn * "/" * met
commMetId(met) = "@"*met
xchgRxnId(fn, met) = "@"*met*"<=>"*fn
mutable struct rxn
stoi::Dict{String,Float64}
lb::Float64
ub::Float64
oc::Float64
end
rxns = Dict{String,rxn}()
mets = Set{String}()
cmets = Set{String}()
r = StableRNG(seed)
files = readdir("models/")
for i in 1:length(files)-1 # knuthshuffle!
sw=rand(r, i:length(files))
t=files[i]
files[i]=files[sw]
files[sw]=t
end
function load_files(fns)
for fn in fns
@info fn
m = readSBML("models/" * fn)
fn = fn[begin:end-4]
for (rid, r) in m.reactions
nr = rxn(Dict{String,Float64}(), r.lb[1], r.ub[1], r.oc)
for (sid, coef) in r.stoichiometry
mid = intMetId(fn,sid)
push!(mets, mid)
nr.stoi[mid] = coef
end
rxns[fn*"/"*rid] = nr
end
for(sid, s) in m.species
if s.compartment == "e"
push!(cmets,commMetId(sid))
rxns[xchgRxnId(fn,sid)] = rxn(
Dict(commMetId(sid) => 1,
intMetId(fn, sid) => -1),
-1000.0,
1000.0,
0)
end
end
end
end
#dummy
load_files(files[begin:begin])
empty!(rxns)
empty!(mets)
empty!(cmets)
t = @elapsed load_files(files[1:n_models])
for cmet in cmets
rxns[cmet] = rxn(
Dict(cmet => 1),
-1000.0, #dump whatever
100.0, #restrict the intake
0)
end
log("load",t)
log("community",length(rxns),length(mets),length(cmets))
log.("community_contains",files[1:n_models])
#dummy
open(f -> serialize(f, cmets), "data/cmets-"*name, "w")
t = @elapsed open(f -> serialize(f, rxns), "data/rxns-"*name, "w")
log("save_rxns",t,filesize("data/rxns-"*name))
t = @elapsed open(f -> serialize(f, mets), "data/mets-"*name, "w")
log("save_mets", t, filesize("data/mets-"*name))
t = @elapsed open(f -> serialize(f, cmets), "data/cmets-"*name, "w")
log("save_cmets", t, filesize("data/cmets-"*name))
using Distributed, COBREXA, Serialization, ClusterManagers
using Logging
global_logger(ConsoleLogger(stderr, Logging.Debug))
if length(ARGS)!=3
@error "wrong arguments"
exit(1)
end
tag=ARGS[1]
seed=parse(Int, ARGS[2])
n_models=parse(Int, ARGS[3])
n_workers = parse(Int, ENV["SLURM_NTASKS"])
name="$tag-$n_models-$seed"
logHandle = open("logs/log_sdist-$tag-$n_models-$n_workers-$seed", "w")
function log(op, args...)
println(logHandle, join(vcat([op, tag, string(seed), string(n_models), string(n_workers)],[string(a) for a in args]),"\t"))
flush(logHandle)
end
addprocs_slurm(n_workers, topology=:master_worker)
#addprocs(n_workers, topology=:master_worker)
@everywhere begin
using COBREXA, SparseArrays
mutable struct rxn
stoi::Dict{String,Float64}
lb::Float64
ub::Float64
oc::Float64
end
Base.copy(x::rxn) = rxn(x.stoi, x.lb, x.ub, x.oc)
mutable struct mdl <: MetabolicModel
rxns::Dict{String, rxn}
mets::Set{String}
cmets::Set{String}
end
Base.copy(x::mdl) = mdl(x.rxns, x.mets, x.cmets)
COBREXA.reactions(m::mdl) = collect(keys(m.rxns))
COBREXA.n_reactions(m::mdl) = length(m.rxns)
COBREXA.metabolites(m::mdl) = vcat(collect(m.mets), collect(m.cmets))
COBREXA.n_metabolites(m::mdl) = length(m.mets) + length(m.cmets)
COBREXA.balance(m::mdl) = spzeros(n_metabolites(m))
COBREXA.stoichiometry(m::mdl) = begin
rd = Dict(zip(reactions(m), 1:n_reactions(m)))
md = Dict(zip(metabolites(m), 1:n_metabolites(m)))
S = spzeros(n_metabolites(m), n_reactions(m))
for (rid,r) in m.rxns
ri = rd[rid]
for (mid, s) in r.stoi
S[md[mid],ri] = s
end
end
S
end
COBREXA.objective(m::mdl) = sparse([r.oc for (_,r) in m.rxns])
COBREXA.bounds(m::mdl) = (
sparse([r.lb for (_,r) in m.rxns]),
sparse([r.ub for (_,r) in m.rxns]),
)
end
#check this out
@info "Distribution:" workers=workers()
cmets = deserialize("data/cmets-"*name)::Set{String}
t=@elapsed mets = deserialize("data/mets-"*name)::Set{String}
log("load_mets", t)
t=@elapsed cmets = deserialize("data/cmets-"*name)::Set{String}
log("load_cmets", t)
t=@elapsed rxns = deserialize("data/rxns-"*name)::Dict{String,rxn}
log("load_rxns", t)
# define the community model structure
m = mdl(rxns, mets, cmets)
#dummy
sm = serialize_model(m, "data/sm-"*name)
t = @elapsed sm = serialize_model(m, "data/sm-"*name)
log("serialize",t)
remove_rxn(id::String) = (model::Serialized{mdl}) -> begin
mod = copy(model.m)
mod.rxns = copy(model.m.rxns)
delete!(mod.rxns, id)
mod
end
#dummy
r = screen_model_variants(sm, [[remove_rxn(first(keys(m.rxns)))]], m -> sum([sum(values(r.stoi)) for (_,r) in m.rxns]); workers=workers())
t = @elapsed r = screen_model_variants(sm, [[remove_rxn(k)] for k in keys(m.rxns)], m -> sum([sum(values(r.stoi)) for (_,r) in m.rxns]); workers=workers())
log("sdist_sum_total", t)
log.("stoi_sum", keys(m.rxns), r)
using Distributed, COBREXA, Serialization, ClusterManagers, Gurobi
using Logging
global_logger(ConsoleLogger(stderr, Logging.Debug))
if length(ARGS) != 4
@error "bad arguments"
exit(1)
end
tag = ARGS[1]
seed = parse(Int, ARGS[2])
n_models = parse(Int, ARGS[3])
n_breaks = parse(Int, ARGS[4])
n_workers = parse(Int, ENV["SLURM_NTASKS"])
name = "$tag-$n_models-$n_breaks-$seed"
logHandle = open("logs/log_envelope-$tag-$n_models-$n_workers-$n_breaks-$seed", "w")
function log(op, args...)
println(
logHandle,
join(
vcat(
[
op,
tag,
string(seed),
string(n_models),
string(n_workers),
string(n_breaks),
],
[string(a) for a in args],
),
"\t",
),
)
flush(logHandle)
end
addprocs_slurm(n_workers, topology=:master_worker)
@info "Distribution:" workers = workers()
model_file = "data/model-$tag-$n_models-$seed.json"
#speed of COBRApy-like model loading+building at once
test_load(fn) = make_optimization_model(load_model(fn), Gurobi.Optimizer)
t=@elapsed test_load(model_file) #precompile
t=@elapsed test_load(model_file)
log("envelope_json_load_make", t)
t = @elapsed m = load_model(StandardModel, model_file)
log("envelope_json_load", t)
interesting_substrates = ["sucr", "gln_L", "gal", "glu_L", "glc_D", "xyl_D", "arab_L"]
interesting_exchanges = "@M_" .* interesting_substrates .* "(e)"
n_exchanges = 3
exchanges = filter(in(reactions(m)), interesting_exchanges)
if length(interesting_exchanges) < n_exchanges
@error "not enough exchange reactions" exchanges
exit(2)
end
exchanges = exchanges[begin:n_exchanges]
log("envelope_exchanges", join(exchanges, ","))
for e in exchanges
m.reactions[e].lb = 0.0
m.reactions[e].ub = 100.0
end
t = @elapsed sm = serialize_model(m, "data/sm-$tag-$n_models-$n_workers-$seed")
log("serialize", t)
@everywhere using COBREXA, Gurobi
#precompile
@info "precompiling"
t = @elapsed begin
ridxs = Int.(indexin(exchanges[begin:begin], reactions(m)))
variability = flux_variability_analysis(
sm,
ridxs,
Gurobi.Optimizer;
workers = workers(),
modifications = [silence, change_optimizer_attribute("Method", 0), change_optimizer_attribute("Quad", 0)],
optimal_objective_value = 0, # simulate COBRApy behavior
bounds = _ -> (0, Inf),
)
objective_envelope(
sm,
exchanges[begin:begin],
Gurobi.Optimizer;
workers = workers(),
modifications = [silence, change_optimizer_attribute("Method", 0), change_optimizer_attribute("Quad", 0)],
lattice_args = (
samples = n_workers,
ranges = collect(zip(variability[:, 1], variability[:, 2])),
),
)
end
log("envelope_precompile_time", t)
@info "running"
t = @elapsed begin
ridxs = Int.(indexin(exchanges, reactions(m)))
variability = flux_variability_analysis(
sm,
ridxs,
Gurobi.Optimizer;
workers = workers(),
modifications = [silence, change_optimizer_attribute("Method", 0), change_optimizer_attribute("Quad", 0)],
optimal_objective_value = 0, # simulate COBRApy behavior
bounds = _ -> (0, Inf),
)
global r = objective_envelope(
sm,
exchanges,
Gurobi.Optimizer;
workers = workers(),
modifications = [silence, change_optimizer_attribute("Method", 0), change_optimizer_attribute("Quad", 0)],
lattice_args = (
samples = n_breaks,
ranges = collect(zip(variability[:, 1], variability[:, 2])),
),
)
end
log("envelope_time", n_exchanges, t)
log("envelope", n_exchanges, join(r.values, ","))
close(logHandle)
#!/usr/bin/env python
import cobra, sys, os
from time import time
if len(sys.argv) != 5:
sys.exit(1)
tag=sys.argv[1]
seed=int(sys.argv[2])
n_models=int(sys.argv[3])
n_breaks=int(sys.argv[4])
n_workers=int(os.environ['SLURM_CPUS_PER_TASK'])
modelfile = "data/model-%s-%d-%d.json" % (tag, n_models, seed)
logHandle = open("logs/log_envelope.py-%s-%d-%d-%d-%d" % (tag, n_models, n_workers, n_breaks, seed), "w")
def log(op, *args):
logHandle.write("\t".join([op, tag, str(n_models), str(n_workers), str(n_breaks), str(seed)]+list(map(str,args))) + "\n")
logHandle.flush()
t0 = time()
m = cobra.io.load_json_model(modelfile)
td = time() - t0
log("envelope_json_load.py", td)
avail_reactions = [r.id for r in m.reactions]
interesting_substrates = ["sucr", "gln_L", "gal", "glu_L", "glc_D", "xyl_D", "arab_L"]
interesting_exchanges = ["@M_" + s + "(e)" for s in interesting_substrates]
n_exchanges=3
exchanges=[i for i in interesting_exchanges if i in avail_reactions]
if len(exchanges)<n_exchanges:
print("not enough exchange reactions found")
sys.exit(2)
exchanges=exchanges[0:n_exchanges]
log("envelope_exchanges.py", ",".join(exchanges))
exchange_idxs = [i for i in range(len(avail_reactions)) if (avail_reactions[i] in exchanges)]
from cobra.flux_analysis import production_envelope