diff --git a/Project.toml b/Project.toml index 58bf872e7..4958ac242 100644 --- a/Project.toml +++ b/Project.toml @@ -9,6 +9,7 @@ DataInterpolations = "82cc6244-b520-54b8-b5a6-8a565e85f1d0" DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" +ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" ModelingToolkit = "961ee093-0014-501f-94e3-6117800e7a78" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" diff --git a/src/DataDrivenDiffEq.jl b/src/DataDrivenDiffEq.jl index 2c20e2271..6ea99b479 100644 --- a/src/DataDrivenDiffEq.jl +++ b/src/DataDrivenDiffEq.jl @@ -12,7 +12,7 @@ using Distributions using QuadGK using Statistics using DataInterpolations - +using ForwardDiff using Requires using ProgressMeter @@ -41,6 +41,9 @@ abstract type AbstractKoopmanAlgorithm end # Abstract symbolic_regression abstract type AbstractSymbolicRegression end +# Abstract Surrogate +abstract type AbstractSurrogate end + # Problem and solution abstract type AbstractDataDrivenProblem{dType, cType, probType} end abstract type AbstractDataDrivenSolution end @@ -125,7 +128,9 @@ export output, metrics, error, aic, determination include("./solve/sindy.jl") include("./solve/koopman.jl") +include("./solve/surrogates.jl") export solve +export SurrogateSolvers # Optional function __init__() diff --git a/src/solution.jl b/src/solution.jl index 0752c1acf..b68a853fe 100644 --- a/src/solution.jl +++ b/src/solution.jl @@ -332,7 +332,7 @@ function DataDrivenSolution(prob::AbstractDataDrivenProblem, Ξ::AbstractMatrix, if all(iszero.(Ξ)) @warn "Sparse regression failed! All coefficients are zero." return DataDrivenSolution( - b, [], :failed, opt, Ξ, prob) + true, b, [], :failed, opt, Ξ, prob) end # Assert continuity diff --git a/src/solve/surrogates.jl b/src/solve/surrogates.jl new file mode 100644 index 000000000..29749da90 --- /dev/null +++ b/src/solve/surrogates.jl @@ -0,0 +1,227 @@ +function entangle(f::Function, idx::Int) + return f_(x) = getindex(f(x), idx) +end + +function dependencies(∇, x::AbstractMatrix{T}, abstol = eps(), reltol= eps(); kwargs...) where T <: Number + mean(map(xi->normalize(abs.(∇(xi)), Inf), eachcol(x))) .>= reltol +end + +function linearities(∇, x::AbstractMatrix{T}, abstol = eps(), reltol= eps(); kwargs...) where T <: Number + var(map(∇, eachcol(x))) .< reltol +end + +mutable struct Surrogate <: AbstractSurrogate + # Original function + f::Function + # Gradient + ∇::Function + + # Dependent variables + deps::BitVector + # Linear variables + linears::BitVector + + # Split + op::Function + children::AbstractVector{Surrogate} + + function Surrogate(f, x::AbstractMatrix; abstol = eps(), reltol = eps(), kwargs...) + y_ = f(x[:,1]) + if isa(y_, AbstractVector) + return map(1:size(y_, 1)) do i + f_ = entangle(f, i) + Surrogate(f_, x, abstol = abstol, reltol = reltol; kwargs...) + end + end + + ∇(x) = ForwardDiff.gradient(f, x) + + deps = dependencies(∇, x, abstol, reltol; kwargs...) + linears = linearities(∇, x, abstol, reltol; kwargs...) + return new( + f, ∇, deps, linears .* deps + ) + end +end + +has_children(s::Surrogate) = isdefined(s, :children) + + +""" +$(TYPEDEF) + +Defines a composition of solvers to be applied on the specific surrogate function. +'generator' should be defined in such a way that is generates basis elements for the (filtered) +dependent variables for explicit and - if used - implicit sparse regression, e.g. + +```julia +generator(u) -> polynomial_basis(u,3) +generator(u,y)->vcat(generator(u), generator(u) .*y) + +solvers = SurrogateSolvers( + generator, STLSQ(), ImplicitOptimizer(); kwargs... +) +``` + +# Fields + +$(FIELDS) + +# Note +This is still an experimental prototype and highly sensitive towards the tolerances. +""" +struct SurrogateSolvers{G, A, K} + generator::G + alg::A + kwargs::K + + function SurrogateSolvers(generator::Function, args...; kwargs...) + return new{typeof(generator), typeof(args), typeof(kwargs)}( + generator, args, kwargs + ) + end +end + +function linear_split!(s::Surrogate, x::AbstractMatrix, abstol = eps(), reltol = eps(); kwargs...) + sum(s.linears) < 1 && return + s.linears == s.deps && return + has_children(s) && return + + + # Get coefficients + w = mean(map(s.∇, eachcol(x)))[s.linears] + + g = let w = w, idx = s.linears + (x) -> dot(w, x[s.linears]) + end + + h = let f = s.f, g = g + (x) -> f(x) - g(x) + end + + setfield!(s, :op, +) + setfield!(s, :children, + [ + Surrogate(g, x, abstol = abstol, reltol = reltol; kwargs...); + Surrogate(h, x, abstol = abstol, reltol = reltol; kwargs...) + ] + ) + return +end + +function surrogate_solve(s::Surrogate, x::AbstractMatrix, sol::SurrogateSolvers; abstol = 1e-5, reltol = eps()) + has_children(s) && return composite_solve(s, x, sol, abstol = abstol, reltol = reltol) + # First see if its a linear fit + prob = DirectDataDrivenProblem(x, reduce(hcat, map(s.f, eachcol(x)))) + if s.linears == s.deps + u = [Symbolics.variable("u", i) for i in 1:size(x,1)] + ũ = u[s.deps] + b = Basis([ũ; 1], u) + return solve(prob, b, STLSQ(abstol); sol.kwargs...) + end + return pareto_solve(s, x, sol, abstol = abstol, reltol = reltol) +end + +function pareto_solve(s::Surrogate, x::AbstractMatrix, sol::SurrogateSolvers; kwargs...) + res = map(sol.alg) do ai + surrogate_solve(s::Surrogate, x::AbstractMatrix, sol.generator, ai; kwargs..., sol.kwargs...) + end + valids = map(x->x.retcode == :solved, res) + res = [res[i] for i in 1:length(valids) if valids[i]] + idx = argmax(map(x->metrics(x)[:AIC], res)) + return res[idx] +end + +function surrogate_solve(s::Surrogate, x::AbstractMatrix, g::Function, args...; kwargs...) + prob = DirectDataDrivenProblem(x, reduce(hcat, map(s.f, eachcol(x)))) + return solve(prob, args...; kwargs...) +end + +function surrogate_solve(s::Surrogate, x::AbstractMatrix, g::Function, opt::Optimize.AbstractOptimizer; kwargs...) + prob = DirectDataDrivenProblem(x, reduce(hcat, map(s.f, eachcol(x)))) + u = [Symbolics.variable("u", i) for i in 1:size(x,1)] + ũ = u[s.deps] + b = Basis(g(ũ), u) + return solve(prob, b, opt; kwargs...) +end + +function surrogate_solve(s::Surrogate, x::AbstractMatrix, g::Function, opt::Optimize.AbstractSubspaceOptimizer; kwargs...) + prob = DirectDataDrivenProblem(x, reduce(hcat, map(s.f, eachcol(x)))) + u = [Symbolics.variable("u", i) for i in 1:size(x,1)] + y = Symbolics.variable("y") + ũ = u[s.deps] + b = Basis(g(ũ, y), [u;y]) + return solve(prob, b, opt, [y]; kwargs...) +end + +function composite_solve(s::Surrogate, x::AbstractMatrix, sol::SurrogateSolvers; kwargs...) + res = map(s.children) do c + surrogate_solve(c, x, sol; kwargs...) + end + + # Get the results + basis = map(x->result(x), res) + equations = map(x->first(x).rhs, ModelingToolkit.equations.(basis)) + pls = sum(length, parameters.(basis)) + p = [Symbolics.variable("p", i) for i in 1:pls] + ps = reduce(vcat, map(parameters, res)) + eq = Num(0) + cnt = 1 + for (i,ei) in enumerate(equations) + subs = Dict() + for p_ in parameters(basis[i]) + push!(subs, p_ => p[cnt]) + cnt += 1 + end + eq = s.op(Num(eq), substitute(Num(ei), subs)) + end + + b_new = Basis([eq], Num.(states(first(basis))), parameters = p) + + prob = DirectDataDrivenProblem(x, reduce(hcat, map(s.f, eachcol(x)))) + + return DataDrivenSolution( + b_new, ps, :solved, map(x->x.alg, res), s.children, prob, true, eval_expression = true + ) +end + + + +function merge_results(r, prob::DataDrivenDiffEq.AbstractDataDrivenProblem, args...; eval_expression = true, kwargs...) + # Collect all equations + # Create new parameters + res = result.(r) + l_ps = sum(map(length∘parameters, res)) + ps = [Symbolics.variable("p", i) for i in 1:l_ps] + pvals = reduce(vcat, map(parameters, r)) + # Substitue dict + p_cnt = 1 + eqs = map(r) do ri + # Collect the parameters for substitution + psub = Dict() + for p_ in parameters(result(ri)) + push!(psub, p_ => ps[p_cnt]) + p_cnt += 1 + end + _r = map(equations(result(ri))) do eqi + substitute(Num(eqi.rhs), psub) + end + end + + b_new = Basis(reduce(vcat, eqs), Num.(states(first(res))), parameters = ps) + return DataDrivenSolution( + b_new, pvals, :solved, map(x->x.alg, r), r, prob, true, eval_expression = eval_expression + ) +end + +function DiffEqBase.solve(prob::DataDrivenProblem, f::Function, sol::SurrogateSolvers; abstol = eps(), reltol = eps(), kwargs...) + x, _ = get_oop_args(prob) + s = Surrogate(f, x, abstol = abstol, reltol = reltol) + res = map(s) do si + linear_split!(si, x, abstol = abstol, reltol = reltol) + surrogate_solve(si, x, sol, abstol = abstol, reltol = reltol) + end + + ## Merge the results + merge_results(res, prob; kwargs...) +end \ No newline at end of file diff --git a/src/symbolic_regression/symbolic_regression.jl b/src/symbolic_regression/symbolic_regression.jl index 0046d64e6..7d8bda055 100644 --- a/src/symbolic_regression/symbolic_regression.jl +++ b/src/symbolic_regression/symbolic_regression.jl @@ -43,7 +43,7 @@ function DiffEqBase.solve(prob::AbstractDataDrivenProblem, alg::EQSearch; numprocs = nothing, procs = nothing, multithreading = false, runtests::Bool = true, - eval_expression = false + eval_expression = false, kwargs... ) opt = to_options(alg) @@ -61,8 +61,12 @@ function DiffEqBase.solve(prob::AbstractDataDrivenProblem, alg::EQSearch; numprocs = numprocs, procs = procs, multithreading = multithreading, runtests = runtests) # Sort the paretofront - doms = map(1:size(Y, 1)) do i - calculateParetoFrontier(X, Y[i, :], hof[i], opt) + if isa(hof, AbstractVector) + doms = map(1:size(Y, 1)) do i + calculateParetoFrontier(X, Y[i, :], hof[i], opt) + end + else + doms = [calculateParetoFrontier(X, Y[1,:], hof, opt)] end build_solution(prob, alg, doms; eval_expression = eval_expression) diff --git a/test/surrogates/surrogate.jl b/test/surrogates/surrogate.jl new file mode 100644 index 000000000..38be1b997 --- /dev/null +++ b/test/surrogates/surrogate.jl @@ -0,0 +1,34 @@ +#using Revise +# +#using DataDrivenDiffEq +#using ModelingToolkit +#using Random + +# Init the problem +Random.seed!(1212) +ws = rand(4) .+ 0.2 +f(x) = [-2.0sin(first(x))+5.0last(x); sum(ws[1:3] .* x[1:3]) + ws[end]; 3.0-2.0*x[6]/(1.0+3.0*x[5]^2) + x[3]] +x = randn(20, 200) +y = reduce(hcat, map(f, eachcol(x))) +ȳ = y .+ 1e-3*randn(size(y)) + +# Add a generator +generator(u) = vcat(polynomial_basis(u, 2), sin.(u)) + +generator(u,v) = begin + explicits = polynomial_basis(u, 2) + implicits = explicits .* v + return vcat(explicits, implicits) +end + +# Add the solver and the problem +sol = SurrogateSolvers(generator, STLSQ(1e-2:1e-2:1.0), ImplicitOptimizer(0.1:0.1:1.0), maxiters = 1000, normalize_coefficients = false, progress = true) +prob = DirectDataDrivenProblem(x, ȳ) +res = solve(prob, f, sol, abstol = 3e-2, reltol = 3e-2) + +# Tests here +for r in res + println(result(r)) + println(metrics(r)) + println(parameters(r)) +end \ No newline at end of file