From bfce173bc2ba41686697102f96966b1d1add8959 Mon Sep 17 00:00:00 2001 From: JuliusMartensen Date: Tue, 23 Nov 2021 12:38:07 +0100 Subject: [PATCH 1/6] Fix solution creation --- src/solution.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/solution.jl b/src/solution.jl index 0752c1acf..b68a853fe 100644 --- a/src/solution.jl +++ b/src/solution.jl @@ -332,7 +332,7 @@ function DataDrivenSolution(prob::AbstractDataDrivenProblem, Ξ::AbstractMatrix, if all(iszero.(Ξ)) @warn "Sparse regression failed! All coefficients are zero." return DataDrivenSolution( - b, [], :failed, opt, Ξ, prob) + true, b, [], :failed, opt, Ξ, prob) end # Assert continuity From 51d67e5d3f298a4236c9f619862665d399e9d329 Mon Sep 17 00:00:00 2001 From: JuliusMartensen Date: Tue, 23 Nov 2021 14:49:44 +0100 Subject: [PATCH 2/6] Works --- Project.toml | 1 + src/solve/surrogates.jl | 205 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 206 insertions(+) create mode 100644 src/solve/surrogates.jl diff --git a/Project.toml b/Project.toml index 58bf872e7..4958ac242 100644 --- a/Project.toml +++ b/Project.toml @@ -9,6 +9,7 @@ DataInterpolations = "82cc6244-b520-54b8-b5a6-8a565e85f1d0" DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" +ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" ModelingToolkit = "961ee093-0014-501f-94e3-6117800e7a78" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" diff --git a/src/solve/surrogates.jl b/src/solve/surrogates.jl new file mode 100644 index 000000000..9d3e10ad0 --- /dev/null +++ b/src/solve/surrogates.jl @@ -0,0 +1,205 @@ +using ForwardDiff +using LinearAlgebra +using Statistics +using Symbolics +using DataDrivenDiffEq +using ModelingToolkit +using DataDrivenDiffEq.Optimize +using DataDrivenDiffEq: get_oop_args, get_target +using DiffEqBase + +function entangle(f::Function, idx::Int) + return f_(x) = getindex(f(x), idx) +end + +function dependencies(∇, x::AbstractMatrix{T}, abstol = eps(), reltol= eps(); kwargs...) where T <: Number + mean(map(xi->normalize(abs.(∇(xi)), Inf), eachcol(x))) .>= reltol +end + +function linearities(∇, x::AbstractMatrix{T}, abstol = eps(), reltol= eps(); kwargs...) where T <: Number + var(map(∇, eachcol(x))) .< reltol +end + +abstract type AbstractSurrogate end + +mutable struct Surrogate <: AbstractSurrogate + # Original function + f::Function + # Gradient + ∇::Function + + # Dependent variables + deps::BitVector + # Linear variables + linears::BitVector + + # Split + op::Function + children::AbstractVector{Surrogate} + + function Surrogate(f, x::AbstractMatrix; abstol = eps(), reltol = eps(), kwargs...) + y_ = f(x[:,1]) + if isa(y_, AbstractVector) + return map(1:size(y_, 1)) do i + f_ = entangle(f, i) + Surrogate(f_, x, abstol = abstol, reltol = reltol; kwargs...) + end + end + + ∇(x) = ForwardDiff.gradient(f, x) + + deps = dependencies(∇, x, abstol, reltol; kwargs...) + linears = linearities(∇, x, abstol, reltol; kwargs...) + return new( + f, ∇, deps, linears .* deps + ) + end +end + +has_children(s::Surrogate) = isdefined(s, :children) + +struct SurrrogateSolvers{G, A, K} + generator::G + alg::A + kwargs::K + + function SurrrogateSolvers(generator::Function, args...; kwargs...) + return new{typeof(generator), typeof(args), typeof(kwargs)}( + generator, args, kwargs + ) + end +end + +function linear_split!(s::Surrogate, x::AbstractMatrix, abstol = eps(), reltol = eps(); kwargs...) + sum(s.linears) < 1 && return + s.linears == s.deps && return + has_children(s) && return + + + # Get coefficients + w = mean(map(s.∇, eachcol(x)))[s.linears] + + g = let w = w, idx = s.linears + (x) -> dot(w, x[s.linears]) + end + + h = let f = s.f, g = g + (x) -> f(x) - g(x) + end + + setfield!(s, :op, +) + setfield!(s, :children, + [ + Surrogate(g, x, abstol = abstol, reltol = reltol; kwargs...); + Surrogate(h, x, abstol = abstol, reltol = reltol; kwargs...) + ] + ) + return +end + +function surrogate_solve(s::Surrogate, x::AbstractMatrix, sol::SurrrogateSolvers; abstol = 1e-5, reltol = eps()) + has_children(s) && return composite_solve(s, x, sol, abstol = abstol, reltol = reltol) + # First see if its a linear fit + prob = DirectDataDrivenProblem(x, reduce(hcat, map(s.f, eachcol(x)))) + if s.linears == s.deps + u = [Symbolics.variable("u", i) for i in 1:size(x,1)] + ũ = u[s.deps] + b = Basis([ũ; 1], u) + return solve(prob, b, STLSQ(abstol); sol.kwargs...) + end + return pareto_solve(s, x, sol, abstol = abstol, reltol = reltol) +end + +function pareto_solve(s::Surrogate, x::AbstractMatrix, sol::SurrrogateSolvers; kwargs...) + res = map(sol.alg) do ai + surrogate_solve(s::Surrogate, x::AbstractMatrix, sol.generator, ai; kwargs..., sol.kwargs...) + end + valids = map(x->x.retcode == :solved, res) + res = [res[i] for i in 1:length(valids) if valids[i]] + idx = argmin(map(x->metrics(x)[:L₂], res)) + return res[idx] +end + +function surrogate_solve(s::Surrogate, x::AbstractMatrix, g::Function, args...; kwargs...) + prob = DirectDataDrivenProblem(x, reduce(hcat, map(s.f, eachcol(x)))) + return solve(prob, args...; kwargs...) +end + +function surrogate_solve(s::Surrogate, x::AbstractMatrix, g::Function, opt::Optimize.AbstractOptimizer; kwargs...) + prob = DirectDataDrivenProblem(x, reduce(hcat, map(s.f, eachcol(x)))) + u = [Symbolics.variable("u", i) for i in 1:size(x,1)] + ũ = u[s.deps] + b = Basis(g(ũ), u) + return solve(prob, b, opt; kwargs...) +end + +function surrogate_solve(s::Surrogate, x::AbstractMatrix, g::Function, opt::Optimize.AbstractSubspaceOptimizer; kwargs...) + prob = DirectDataDrivenProblem(x, reduce(hcat, map(s.f, eachcol(x)))) + u = [Symbolics.variable("u", i) for i in 1:size(x,1)] + y = Symbolics.variable("y") + ũ = u[s.deps] + b = Basis(g(ũ, y), [u;y]) + return solve(prob, b, opt, [y]; kwargs...) +end + +function composite_solve(s::Surrogate, x::AbstractMatrix, sol::SurrrogateSolvers; kwargs...) + res = map(s.children) do c + surrogate_solve(c, x, sol; kwargs...) + end + + # Get the results + basis = map(x->result(x), res) + equations = map(x->first(x).rhs, ModelingToolkit.equations.(basis)) + pls = sum(length, parameters.(basis)) + p = [Symbolics.variable("p", i) for i in 1:pls] + ps = reduce(vcat, map(parameters, res)) + eq = Num(0) + cnt = 1 + for (i,ei) in enumerate(equations) + subs = Dict() + for p_ in parameters(basis[i]) + push!(subs, p_ => p[cnt]) + cnt += 1 + end + eq = s.op(Num(eq), substitute(Num(ei), subs)) + end + b_new = Basis([eq], Num.(states(first(basis))), parameters = p) + prob = DirectDataDrivenProblem(x, reduce(hcat, map(s.f, eachcol(x)))) + return DataDrivenSolution( + b_new, ps, :solved, map(x->x.alg, res), s.children, prob, true, eval_expression = true + ) +end + + +function DiffEqBase.solve(prob::DataDrivenProblem, f::Function, sol::SurrrogateSolvers; abstol = eps(), reltol = eps(), kwargs...) + x, _ = get_oop_args(prob) + s = Surrogate(f, x, abstol = abstol, reltol = reltol) + res = map(s) do si + linear_split!(si, x, abstol = abstol, reltol = reltol) + surrogate_solve(si, x, sol, abstol = abstol, reltol = reltol) + end +end + +using Plots + +f(x) = [-2.0sin(first(x))+5.0last(x); -0.3*last(x); 3.0*x[1]+x[2]+(1.0-3.0*x[6])/(1+10*x[5]^2)] +x = randn(100, 100) +y = reduce(hcat, map(f, eachcol(x))) +ȳ = y .+ 1e-2*randn(size(y)) +scatter(y') +prob = DirectDataDrivenProblem(x, ȳ) + +generator(u) = vcat(polynomial_basis(u), sin.(u)) +generator(u,v) = begin + explicits = polynomial_basis(u, 3) + implicits = explicits .* v + return vcat(explicits, implicits) +end + + +sol = SurrrogateSolvers(generator, STLSQ(1e-3:1e-3:1.0), ImplicitOptimizer(1e-1:1e-1:1.0), progress = false) + +res = solve(prob, f, sol, abstol = 1e-1, reltol = 1e-1) +for r in res + println(result(r)) +end \ No newline at end of file From 91e7689b990b59affd3debd454e14b6337181ca8 Mon Sep 17 00:00:00 2001 From: JuliusMartensen Date: Wed, 24 Nov 2021 07:31:55 +0100 Subject: [PATCH 3/6] Implement into DDD --- src/DataDrivenDiffEq.jl | 7 +++- src/solve/surrogates.jl | 71 ++++++++++++++++------------------------- 2 files changed, 34 insertions(+), 44 deletions(-) diff --git a/src/DataDrivenDiffEq.jl b/src/DataDrivenDiffEq.jl index 2c20e2271..6ea99b479 100644 --- a/src/DataDrivenDiffEq.jl +++ b/src/DataDrivenDiffEq.jl @@ -12,7 +12,7 @@ using Distributions using QuadGK using Statistics using DataInterpolations - +using ForwardDiff using Requires using ProgressMeter @@ -41,6 +41,9 @@ abstract type AbstractKoopmanAlgorithm end # Abstract symbolic_regression abstract type AbstractSymbolicRegression end +# Abstract Surrogate +abstract type AbstractSurrogate end + # Problem and solution abstract type AbstractDataDrivenProblem{dType, cType, probType} end abstract type AbstractDataDrivenSolution end @@ -125,7 +128,9 @@ export output, metrics, error, aic, determination include("./solve/sindy.jl") include("./solve/koopman.jl") +include("./solve/surrogates.jl") export solve +export SurrogateSolvers # Optional function __init__() diff --git a/src/solve/surrogates.jl b/src/solve/surrogates.jl index 9d3e10ad0..89754534f 100644 --- a/src/solve/surrogates.jl +++ b/src/solve/surrogates.jl @@ -1,13 +1,3 @@ -using ForwardDiff -using LinearAlgebra -using Statistics -using Symbolics -using DataDrivenDiffEq -using ModelingToolkit -using DataDrivenDiffEq.Optimize -using DataDrivenDiffEq: get_oop_args, get_target -using DiffEqBase - function entangle(f::Function, idx::Int) return f_(x) = getindex(f(x), idx) end @@ -20,8 +10,6 @@ function linearities(∇, x::AbstractMatrix{T}, abstol = eps(), reltol= eps(); k var(map(∇, eachcol(x))) .< reltol end -abstract type AbstractSurrogate end - mutable struct Surrogate <: AbstractSurrogate # Original function f::Function @@ -58,12 +46,33 @@ end has_children(s::Surrogate) = isdefined(s, :children) -struct SurrrogateSolvers{G, A, K} + +""" +$(TYPEDEF) + +Defines a composition of solvers to be applied on the specific surrogate function. +'generator' should be defined in such a way that is generates basis elements for the (filtered) +dependent variables for explicit and - if used - implicit sparse regression, e.g. + +```julia +generator(u) -> polynomial_basis(u,3) +generator(u,y)->vcat(generator(u), generator(u) .*y) + +solvers = SurrogateSolvers( + generator, STLSQ(), ImplicitOptimizer(); kwargs... +) +``` + +# Fields + +$(FIELDS) +""" +struct SurrogateSolvers{G, A, K} generator::G alg::A kwargs::K - function SurrrogateSolvers(generator::Function, args...; kwargs...) + function SurrogateSolvers(generator::Function, args...; kwargs...) return new{typeof(generator), typeof(args), typeof(kwargs)}( generator, args, kwargs ) @@ -97,7 +106,7 @@ function linear_split!(s::Surrogate, x::AbstractMatrix, abstol = eps(), reltol = return end -function surrogate_solve(s::Surrogate, x::AbstractMatrix, sol::SurrrogateSolvers; abstol = 1e-5, reltol = eps()) +function surrogate_solve(s::Surrogate, x::AbstractMatrix, sol::SurrogateSolvers; abstol = 1e-5, reltol = eps()) has_children(s) && return composite_solve(s, x, sol, abstol = abstol, reltol = reltol) # First see if its a linear fit prob = DirectDataDrivenProblem(x, reduce(hcat, map(s.f, eachcol(x)))) @@ -110,13 +119,13 @@ function surrogate_solve(s::Surrogate, x::AbstractMatrix, sol::SurrrogateSolvers return pareto_solve(s, x, sol, abstol = abstol, reltol = reltol) end -function pareto_solve(s::Surrogate, x::AbstractMatrix, sol::SurrrogateSolvers; kwargs...) +function pareto_solve(s::Surrogate, x::AbstractMatrix, sol::SurrogateSolvers; kwargs...) res = map(sol.alg) do ai surrogate_solve(s::Surrogate, x::AbstractMatrix, sol.generator, ai; kwargs..., sol.kwargs...) end valids = map(x->x.retcode == :solved, res) res = [res[i] for i in 1:length(valids) if valids[i]] - idx = argmin(map(x->metrics(x)[:L₂], res)) + idx = argmax(map(x->metrics(x)[:AIC], res)) return res[idx] end @@ -142,7 +151,7 @@ function surrogate_solve(s::Surrogate, x::AbstractMatrix, g::Function, opt::Opti return solve(prob, b, opt, [y]; kwargs...) end -function composite_solve(s::Surrogate, x::AbstractMatrix, sol::SurrrogateSolvers; kwargs...) +function composite_solve(s::Surrogate, x::AbstractMatrix, sol::SurrogateSolvers; kwargs...) res = map(s.children) do c surrogate_solve(c, x, sol; kwargs...) end @@ -171,7 +180,7 @@ function composite_solve(s::Surrogate, x::AbstractMatrix, sol::SurrrogateSolvers end -function DiffEqBase.solve(prob::DataDrivenProblem, f::Function, sol::SurrrogateSolvers; abstol = eps(), reltol = eps(), kwargs...) +function DiffEqBase.solve(prob::DataDrivenProblem, f::Function, sol::SurrogateSolvers; abstol = eps(), reltol = eps(), kwargs...) x, _ = get_oop_args(prob) s = Surrogate(f, x, abstol = abstol, reltol = reltol) res = map(s) do si @@ -179,27 +188,3 @@ function DiffEqBase.solve(prob::DataDrivenProblem, f::Function, sol::SurrrogateS surrogate_solve(si, x, sol, abstol = abstol, reltol = reltol) end end - -using Plots - -f(x) = [-2.0sin(first(x))+5.0last(x); -0.3*last(x); 3.0*x[1]+x[2]+(1.0-3.0*x[6])/(1+10*x[5]^2)] -x = randn(100, 100) -y = reduce(hcat, map(f, eachcol(x))) -ȳ = y .+ 1e-2*randn(size(y)) -scatter(y') -prob = DirectDataDrivenProblem(x, ȳ) - -generator(u) = vcat(polynomial_basis(u), sin.(u)) -generator(u,v) = begin - explicits = polynomial_basis(u, 3) - implicits = explicits .* v - return vcat(explicits, implicits) -end - - -sol = SurrrogateSolvers(generator, STLSQ(1e-3:1e-3:1.0), ImplicitOptimizer(1e-1:1e-1:1.0), progress = false) - -res = solve(prob, f, sol, abstol = 1e-1, reltol = 1e-1) -for r in res - println(result(r)) -end \ No newline at end of file From 3cdceb470b70cb9fc9ba0ff2e7479600135a6ca1 Mon Sep 17 00:00:00 2001 From: JuliusMartensen Date: Wed, 24 Nov 2021 07:48:42 +0100 Subject: [PATCH 4/6] Init test file --- src/solve/surrogates.jl | 3 +++ test/surrogates/surrogate.jl | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 test/surrogates/surrogate.jl diff --git a/src/solve/surrogates.jl b/src/solve/surrogates.jl index 89754534f..e4edc57d2 100644 --- a/src/solve/surrogates.jl +++ b/src/solve/surrogates.jl @@ -66,6 +66,9 @@ solvers = SurrogateSolvers( # Fields $(FIELDS) + +# Note +This is still an experimental prototype and highly sensitive towards the tolerances. """ struct SurrogateSolvers{G, A, K} generator::G diff --git a/test/surrogates/surrogate.jl b/test/surrogates/surrogate.jl new file mode 100644 index 000000000..38be1b997 --- /dev/null +++ b/test/surrogates/surrogate.jl @@ -0,0 +1,34 @@ +#using Revise +# +#using DataDrivenDiffEq +#using ModelingToolkit +#using Random + +# Init the problem +Random.seed!(1212) +ws = rand(4) .+ 0.2 +f(x) = [-2.0sin(first(x))+5.0last(x); sum(ws[1:3] .* x[1:3]) + ws[end]; 3.0-2.0*x[6]/(1.0+3.0*x[5]^2) + x[3]] +x = randn(20, 200) +y = reduce(hcat, map(f, eachcol(x))) +ȳ = y .+ 1e-3*randn(size(y)) + +# Add a generator +generator(u) = vcat(polynomial_basis(u, 2), sin.(u)) + +generator(u,v) = begin + explicits = polynomial_basis(u, 2) + implicits = explicits .* v + return vcat(explicits, implicits) +end + +# Add the solver and the problem +sol = SurrogateSolvers(generator, STLSQ(1e-2:1e-2:1.0), ImplicitOptimizer(0.1:0.1:1.0), maxiters = 1000, normalize_coefficients = false, progress = true) +prob = DirectDataDrivenProblem(x, ȳ) +res = solve(prob, f, sol, abstol = 3e-2, reltol = 3e-2) + +# Tests here +for r in res + println(result(r)) + println(metrics(r)) + println(parameters(r)) +end \ No newline at end of file From 89905c16eb18b090eb9d1cef321def7cc5fb3c32 Mon Sep 17 00:00:00 2001 From: JuliusMartensen Date: Sun, 28 Nov 2021 11:42:06 +0100 Subject: [PATCH 5/6] Adapt SymReg solve --- src/symbolic_regression/symbolic_regression.jl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/symbolic_regression/symbolic_regression.jl b/src/symbolic_regression/symbolic_regression.jl index 0046d64e6..7d8bda055 100644 --- a/src/symbolic_regression/symbolic_regression.jl +++ b/src/symbolic_regression/symbolic_regression.jl @@ -43,7 +43,7 @@ function DiffEqBase.solve(prob::AbstractDataDrivenProblem, alg::EQSearch; numprocs = nothing, procs = nothing, multithreading = false, runtests::Bool = true, - eval_expression = false + eval_expression = false, kwargs... ) opt = to_options(alg) @@ -61,8 +61,12 @@ function DiffEqBase.solve(prob::AbstractDataDrivenProblem, alg::EQSearch; numprocs = numprocs, procs = procs, multithreading = multithreading, runtests = runtests) # Sort the paretofront - doms = map(1:size(Y, 1)) do i - calculateParetoFrontier(X, Y[i, :], hof[i], opt) + if isa(hof, AbstractVector) + doms = map(1:size(Y, 1)) do i + calculateParetoFrontier(X, Y[i, :], hof[i], opt) + end + else + doms = [calculateParetoFrontier(X, Y[1,:], hof, opt)] end build_solution(prob, alg, doms; eval_expression = eval_expression) From d913d41535d7b85595105474e2c13ce58625b87c Mon Sep 17 00:00:00 2001 From: JuliusMartensen Date: Fri, 3 Dec 2021 10:00:23 +0100 Subject: [PATCH 6/6] Add merge result --- src/solve/surrogates.jl | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/src/solve/surrogates.jl b/src/solve/surrogates.jl index e4edc57d2..29749da90 100644 --- a/src/solve/surrogates.jl +++ b/src/solve/surrogates.jl @@ -175,14 +175,45 @@ function composite_solve(s::Surrogate, x::AbstractMatrix, sol::SurrogateSolvers; end eq = s.op(Num(eq), substitute(Num(ei), subs)) end + b_new = Basis([eq], Num.(states(first(basis))), parameters = p) + prob = DirectDataDrivenProblem(x, reduce(hcat, map(s.f, eachcol(x)))) + return DataDrivenSolution( b_new, ps, :solved, map(x->x.alg, res), s.children, prob, true, eval_expression = true ) end + +function merge_results(r, prob::DataDrivenDiffEq.AbstractDataDrivenProblem, args...; eval_expression = true, kwargs...) + # Collect all equations + # Create new parameters + res = result.(r) + l_ps = sum(map(length∘parameters, res)) + ps = [Symbolics.variable("p", i) for i in 1:l_ps] + pvals = reduce(vcat, map(parameters, r)) + # Substitue dict + p_cnt = 1 + eqs = map(r) do ri + # Collect the parameters for substitution + psub = Dict() + for p_ in parameters(result(ri)) + push!(psub, p_ => ps[p_cnt]) + p_cnt += 1 + end + _r = map(equations(result(ri))) do eqi + substitute(Num(eqi.rhs), psub) + end + end + + b_new = Basis(reduce(vcat, eqs), Num.(states(first(res))), parameters = ps) + return DataDrivenSolution( + b_new, pvals, :solved, map(x->x.alg, r), r, prob, true, eval_expression = eval_expression + ) +end + function DiffEqBase.solve(prob::DataDrivenProblem, f::Function, sol::SurrogateSolvers; abstol = eps(), reltol = eps(), kwargs...) x, _ = get_oop_args(prob) s = Surrogate(f, x, abstol = abstol, reltol = reltol) @@ -190,4 +221,7 @@ function DiffEqBase.solve(prob::DataDrivenProblem, f::Function, sol::SurrogateSo linear_split!(si, x, abstol = abstol, reltol = reltol) surrogate_solve(si, x, sol, abstol = abstol, reltol = reltol) end -end + + ## Merge the results + merge_results(res, prob; kwargs...) +end \ No newline at end of file