diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 480d4d96d..6b6610ab4 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -39,6 +39,7 @@ jobs: - OptimizationPyCMA - OptimizationQuadDIRECT - OptimizationSciPy + - OptimizationSophia - OptimizationSpeedMapping - OptimizationPolyalgorithms - OptimizationNLPModels diff --git a/Project.toml b/Project.toml index 03727fbe1..efb5ec11c 100644 --- a/Project.toml +++ b/Project.toml @@ -14,7 +14,6 @@ LoggingExtras = "e6f89c97-d47a-5376-807f-9c37f3926c36" OptimizationBase = "bca83a33-5cc9-4baa-983d-23429ab6bcbb" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" ProgressLogging = "33c8b6b6-d38a-422a-b730-caa89a2f386c" -Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" diff --git a/lib/OptimizationSophia/Project.toml b/lib/OptimizationSophia/Project.toml new file mode 100644 index 000000000..41946ca19 --- /dev/null +++ b/lib/OptimizationSophia/Project.toml @@ -0,0 +1,33 @@ +name = "OptimizationSophia" +uuid = "892fee11-dca1-40d6-b698-84ba0d87399a" +authors = ["paramthakkar123 "] +version = "0.1.0" + +[deps] +Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba" +OptimizationBase = "bca83a33-5cc9-4baa-983d-23429ab6bcbb" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" + +[extras] +ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66" +Lux = "b2108857-7c20-44ae-9111-449ecde12c47" +MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" +OrdinaryDiffEqTsit5 = "b1df2697-797e-41e3-8120-5422d3b24e4a" +SciMLSensitivity = "1ed8b502-d754-442c-8d5d-10ac956f44a1" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" + +[compat] +ComponentArrays = "0.15.29" +Lux = "1.16.0" +MLUtils = "0.4.8" +Optimization = "4.5.0" +OptimizationBase = "2.10.0" +OrdinaryDiffEqTsit5 = "1.2.0" +Random = "1.10.0" +SciMLSensitivity = "7.88.0" +Test = "1.10.0" +Zygote = "0.7.10" + +[targets] +test = ["Test", "ComponentArrays", "Lux", "MLUtils", "OrdinaryDiffEqTsit5", "SciMLSensitivity", "Zygote"] diff --git a/src/sophia.jl b/lib/OptimizationSophia/src/OptimizationSophia.jl similarity index 97% rename from src/sophia.jl rename to lib/OptimizationSophia/src/OptimizationSophia.jl index 8516913a2..c5ce693b6 100644 --- a/src/sophia.jl +++ b/lib/OptimizationSophia/src/OptimizationSophia.jl @@ -1,3 +1,10 @@ +module OptimizationSophia + +using OptimizationBase.SciMLBase +using OptimizationBase: OptimizationCache +using Optimization +using Random + """ Sophia(; η = 1e-3, βs = (0.9, 0.999), ϵ = 1e-8, λ = 1e-1, k = 10, ρ = 0.04) @@ -171,3 +178,5 @@ function SciMLBase.__solve(cache::OptimizationCache{ θ, x, retcode = ReturnCode.Success) end + +end diff --git a/lib/OptimizationSophia/test/runtests.jl b/lib/OptimizationSophia/test/runtests.jl new file mode 100644 index 000000000..63f17408d --- /dev/null +++ b/lib/OptimizationSophia/test/runtests.jl @@ -0,0 +1,78 @@ +using OptimizationBase, Optimization +using OptimizationBase.SciMLBase: solve, OptimizationFunction, OptimizationProblem +using OptimizationSophia +using Lux, MLUtils, Random, ComponentArrays +using SciMLSensitivity +using Test +using Zygote +using OrdinaryDiffEqTsit5 + +function dudt_(u, p, t) + ann(u, p, st)[1] .* u +end + +function newtons_cooling(du, u, p, t) + temp = u[1] + k, temp_m = p + du[1] = dT = -k * (temp - temp_m) +end + +function true_sol(du, u, p, t) + true_p = [log(2) / 8.0, 100.0] + newtons_cooling(du, u, true_p, t) +end + +function callback(state, l) #callback function to observe training + display(l) + return l < 1e-2 +end + +function predict_adjoint(fullp, time_batch) + Array(solve(prob, Tsit5(), p = fullp, saveat = time_batch)) +end + +function loss_adjoint(fullp, p) + (batch, time_batch) = p + pred = predict_adjoint(fullp, time_batch) + sum(abs2, batch .- pred) +end + +u0 = Float32[200.0] +datasize = 30 +tspan = (0.0f0, 1.5f0) +rng = Random.default_rng() + +ann = Lux.Chain(Lux.Dense(1, 8, tanh), Lux.Dense(8, 1, tanh)) +pp, st = Lux.setup(rng, ann) +pp = ComponentArray(pp) + +prob = ODEProblem{false}(dudt_, u0, tspan, pp) + +t = range(tspan[1], tspan[2], length = datasize) +true_prob = ODEProblem(true_sol, u0, tspan) +ode_data = Array(solve(true_prob, Tsit5(), saveat = t)) + +k = 10 +train_loader = MLUtils.DataLoader((ode_data, t), batchsize = k) + +l1 = loss_adjoint(pp, (train_loader.data[1], train_loader.data[2]))[1] + +optfun = OptimizationFunction(loss_adjoint, + OptimizationBase.AutoZygote()) +optprob = OptimizationProblem(optfun, pp, train_loader) + +res1 = solve(optprob, + OptimizationSophia.Sophia(), callback = callback, + maxiters = 2000) +@test 10res1.objective < l1 + +# Test Sophia with ComponentArrays + Enzyme (shadow generation fix) +using ComponentArrays +x0_comp = ComponentVector(a = 0.0, b = 0.0) +rosenbrock_comp(x, p = nothing) = (1 - x.a)^2 + 100 * (x.b - x.a^2)^2 + +optf_sophia = OptimizationFunction(rosenbrock_comp, AutoEnzyme()) +prob_sophia = OptimizationProblem(optf_sophia, x0_comp) +res_sophia = solve(prob_sophia, OptimizationSophia.Sophia(η=0.01, k=5), maxiters = 50) +@test res_sophia.objective < rosenbrock_comp(x0_comp) # Test optimization progress +@test res_sophia.retcode == Optimization.SciMLBase.ReturnCode.Success diff --git a/src/Optimization.jl b/src/Optimization.jl index 46581afa9..47797ee04 100644 --- a/src/Optimization.jl +++ b/src/Optimization.jl @@ -12,7 +12,7 @@ if !isdefined(Base, :get_extension) end using Logging, ProgressLogging, ConsoleProgressMonitor, TerminalLoggers, LoggingExtras -using ArrayInterface, Base.Iterators, SparseArrays, LinearAlgebra, Random +using ArrayInterface, Base.Iterators, SparseArrays, LinearAlgebra import OptimizationBase: instantiate_function, OptimizationCache, ReInitCache import SciMLBase: OptimizationProblem, @@ -22,7 +22,6 @@ export ObjSense, MaxSense, MinSense include("utils.jl") include("state.jl") -include("sophia.jl") export solve diff --git a/test/minibatch.jl b/test/minibatch.jl index 8185ec4bc..abd5a2610 100644 --- a/test/minibatch.jl +++ b/test/minibatch.jl @@ -58,11 +58,6 @@ optfun = OptimizationFunction(loss_adjoint, Optimization.AutoZygote()) optprob = OptimizationProblem(optfun, pp, train_loader) -res1 = Optimization.solve(optprob, - Optimization.Sophia(), callback = callback, - maxiters = 2000) -@test 10res1.objective < l1 - optfun = OptimizationFunction(loss_adjoint, Optimization.AutoForwardDiff()) optprob = OptimizationProblem(optfun, pp, train_loader) diff --git a/test/native.jl b/test/native.jl index f9fc61aaa..45bc73e1c 100644 --- a/test/native.jl +++ b/test/native.jl @@ -26,14 +26,3 @@ optf1 = OptimizationFunction(loss, AutoSparseForwardDiff()) prob1 = OptimizationProblem(optf1, rand(5), data) sol1 = solve(prob1, OptimizationOptimisers.Adam(), maxiters = 1000, callback = callback) @test sol1.objective < l0 - -# Test Sophia with ComponentArrays + Enzyme (shadow generation fix) -using ComponentArrays -x0_comp = ComponentVector(a = 0.0, b = 0.0) -rosenbrock_comp(x, p = nothing) = (1 - x.a)^2 + 100 * (x.b - x.a^2)^2 - -optf_sophia = OptimizationFunction(rosenbrock_comp, AutoEnzyme()) -prob_sophia = OptimizationProblem(optf_sophia, x0_comp) -res_sophia = solve(prob_sophia, Optimization.Sophia(η=0.01, k=5), maxiters = 50) -@test res_sophia.objective < rosenbrock_comp(x0_comp) # Test optimization progress -@test res_sophia.retcode == Optimization.SciMLBase.ReturnCode.Success