From 0ce84baed1cb2416305a085d95c06f46af259599 Mon Sep 17 00:00:00 2001
From: Qingyu Qu <2283984853@qq.com>
Date: Tue, 16 Sep 2025 00:11:38 +0800
Subject: [PATCH 01/11] Optimal control interface

---
 docs/src/tutorials/optimal_control.md         |  88 ++++++++++++
 lib/BoundaryValueDiffEqCore/src/utils.jl      |  22 ++-
 lib/BoundaryValueDiffEqFIRK/src/algorithms.jl |   5 +-
 lib/BoundaryValueDiffEqMIRK/src/mirk.jl       | 132 +++++++++++++++---
 4 files changed, 219 insertions(+), 28 deletions(-)
 create mode 100644 docs/src/tutorials/optimal_control.md

diff --git a/docs/src/tutorials/optimal_control.md b/docs/src/tutorials/optimal_control.md
new file mode 100644
index 000000000..b45c599e3
--- /dev/null
+++ b/docs/src/tutorials/optimal_control.md
@@ -0,0 +1,88 @@
+# Solve Optimal Control problem
+
+A classical optimal control problem is the rocket launching problem(aka [Goddard Rocket problem](https://en.wikipedia.org/wiki/Goddard_problem)). Say we have a rocket with limited fuel and is launched vertically. And we want to control the final altitude of this rocket so that we can make the best of the limited fuel in rocket to get to the highest altitude. In this optimal control problem, the state variables are:
+
+  - Velocity of the rocket: $x_v(t)$
+  - Altitude of the rocket: $x_h(t)$
+  - Mass of the rocket and the fuel: $x_m(t)$
+
+The control variable is
+
+  - Thrust of the rocket: $u_t(t)$
+
+The dynamics of the launching can be formulated with three differential equations:
+
+$$
+\left\{\begin{aligned}
+&\frac{dx_v}{dt}=\frac{u_t-drag(x_h,x_v)}{x_m}-g(x_h)\\
+&\frac{dx_h}{dt}=x_v\\
+&\frac{dx_m}{dt}=-\frac{u_t}{c}
+\end{aligned}\right.
+$$
+
+where the drag $D(x_h,x_v)$ is a function of altitude and velocity:
+
+$$
+D(x_h,x_v)=D_c\cdot x_v^2\cdot\exp^{h_c(\frac{x_h-x_h(0)}{x_h(0)})}
+$$
+
+gravity $g(x_h)$ is a function of altitude:
+
+$$
+g(x_h)=g_0\cdot (\frac{x_h(0)}{x_h})^2
+$$
+
+$c$ is a constant. Suppose the final time is $T$, we here want to maximize the final altitude $x_h(T)$:
+
+$$
+\max x_h(T)
+$$
+
+Similar solving for such optimal control problem can be found on JuMP.jl and InfiniteOpt.jl. The detailed parameters are taken from [COPS](https://www.mcs.anl.gov/%7Emore/cops/cops3.pdf).
+
+```julia
+using BoundaryValueDiffEqMIRK, OptimizationMOI, Ipopt
+h_0 = 1                      # Initial height
+v_0 = 0                      # Initial velocity
+m_0 = 1.0                    # Initial mass
+m_T = 0.6                    # Final mass
+g_0 = 1                      # Gravity at the surface
+h_c = 500                    # Used for drag
+c = 0.5 * sqrt(g_0 * h_0)    # Thrust-to-fuel mass
+D_c = 0.5 * 620 * m_0 / g_0  # Drag scaling
+u_t_max = 3.5 * g_0 * m_0    # Maximum thrust
+T_max = 0.2                  # Number of seconds
+T = 1_000                    # Number of time steps
+Δt = 0.2 / T;                # Time per discretized step
+
+tspan = (0.0, 0.2)
+drag(x_h, x_v) = D_c * x_v^2 * exp(-h_c * (x_h - h_0) / h_0)
+g(x_h) = g_0 * (h_0 / x_h)^2
+function rocket_launch!(du, u, p, t)
+    # u_t is the control variable (thrust)
+    x_v, x_h, x_m, u_t = u[1], u[2], u[3], u[4]
+    du[1] = (u_t-drag(x_h, x_v))/x_m - g(x_h)
+    du[2] = x_v
+    du[3] = -u_t/c
+end
+function rocket_launch_bc!(res, u, p, t)
+    res[1] = u(0.0)[1] - v_0
+    res[2] = u(0.0)[2] - h_0
+    res[3] = u(0.0)[3] - m_0
+    res[4] = u(0.2)[4] - 0.0
+end
+function constraints!(res, u, p)
+    res[1] = u[1]
+    res[2] = u[2]
+    res[3] = u[3]
+    res[4] = u[4]
+end
+cost_fun(u, p) = -u[end - 2] # To minimize, only temporary, need to use temporary solution interpolation here similar to what we do in boundary condition evaluations.
+#cost_fun(sol, p) = -sol(0.2)[2]
+u0 = [v_0, h_0, m_0, 0.0]
+rocket_launch_fun = BVPFunction(rocket_launch!, rocket_launch_bc!; cost = cost_fun,
+    inequality = constraints!, bcresid_prototype = zeros(4))
+rocket_launch_prob = BVProblem(rocket_launch_fun, u0, tspan; lcons = [0.0, 0.0, m_T, 0.0],
+    ucons = [Inf, Inf, m_0, u_t_max])
+sol = solve(rocket_launch_prob, MIRK4(; optimize = Ipopt.Optimizer()); dt = 0.002)
+```
diff --git a/lib/BoundaryValueDiffEqCore/src/utils.jl b/lib/BoundaryValueDiffEqCore/src/utils.jl
index a3096f8c0..cad61bd94 100644
--- a/lib/BoundaryValueDiffEqCore/src/utils.jl
+++ b/lib/BoundaryValueDiffEqCore/src/utils.jl
@@ -629,16 +629,26 @@ end
 
 @inline function __extract_lcons_ucons(prob::AbstractBVProblem, ::Type{T}, M, N) where {T}
     lcons = if isnothing(prob.lcons)
-        zeros(T, N*M)
+        zeros(T, N*M) #TODO: handle carefully when NLLS
     else
-        lcons_length = length(prob.lcons)
-        vcat(prob.lcons, zeros(T, N*M - lcons_length))
+        if !(isnothing(prob.f.equality) && isnothing(prob.f.inequality))
+            # When there are additional equality or inequality constraints
+            vcat(zeros(T, N*M), repeat(prob.lcons, N))
+        else
+            lcons_length = length(prob.lcons)
+            vcat(prob.lcons, zeros(T, N*M - lcons_length))
+        end
     end
     ucons = if isnothing(prob.ucons)
-        zeros(T, N*M)
+        zeros(T, N*M) #TODO: handle carefully when NLLS
     else
-        ucons_length = length(prob.ucons)
-        vcat(prob.ucons, zeros(T, N*M - ucons_length))
+        if !(isnothing(prob.f.equality) && isnothing(prob.f.inequality))
+            # When there are additional equality or inequality constraints
+            vcat(zeros(T, N*M), repeat(prob.ucons, N))
+        else
+            ucons_length = length(prob.ucons)
+            vcat(prob.ucons, zeros(T, N*M - ucons_length))
+        end
     end
     return lcons, ucons
 end
diff --git a/lib/BoundaryValueDiffEqFIRK/src/algorithms.jl b/lib/BoundaryValueDiffEqFIRK/src/algorithms.jl
index 00f8e5e23..25e43aa19 100644
--- a/lib/BoundaryValueDiffEqFIRK/src/algorithms.jl
+++ b/lib/BoundaryValueDiffEqFIRK/src/algorithms.jl
@@ -121,7 +121,6 @@ for stage in (2, 3, 4, 5)
           - `optimize`: Internal Optimization solver. Any solver which conforms to the SciML
             `OptimizationProblem` interface can be used. Note that any autodiff argument for
             the solver will be ignored and a custom jacobian algorithm will be used.
-
           - `jac_alg`: Jacobian Algorithm used for the nonlinear solver. Defaults to
             `BVPJacobianAlgorithm()`, which automatically decides the best algorithm to
             use based on the input types and problem type.
@@ -132,7 +131,6 @@ for stage in (2, 3, 4, 5)
                 `nonbc_diffmode` defaults to `AutoSparse(AutoForwardDiff)` if possible else
                 `AutoSparse(AutoFiniteDiff)`. For `bc_diffmode`, defaults to `AutoForwardDiff` if
                 possible else `AutoFiniteDiff`.
-
           - `nested_nlsolve`: Whether or not to use a nested nonlinear solve for the
             implicit FIRK step. Defaults to `false`. If set to `false`, the FIRK stages are
             solved as a part of the global residual. The general recommendation is to choose
@@ -150,6 +148,7 @@ for stage in (2, 3, 4, 5)
         ## References
 
         Reference for Lobatto and Radau methods:
+
         ```bibtex
         @Inbook{Jay2015,
             author="Jay, Laurent O.",
@@ -168,7 +167,9 @@ for stage in (2, 3, 4, 5)
             year = {2015},
         }
         ```
+
         References for implementation of defect control, based on the `bvp5c` solver in MATLAB:
+
         ```bibtex
         @article{shampine_solving_nodate,
             title = {Solving {Boundary} {Value} {Problems} for {Ordinary} {Diﬀerential} {Equations} in {Matlab} with bvp4c
diff --git a/lib/BoundaryValueDiffEqMIRK/src/mirk.jl b/lib/BoundaryValueDiffEqMIRK/src/mirk.jl
index 98d89646c..a92c580bc 100644
--- a/lib/BoundaryValueDiffEqMIRK/src/mirk.jl
+++ b/lib/BoundaryValueDiffEqMIRK/src/mirk.jl
@@ -43,6 +43,7 @@ function SciMLBase.__init(
     diffcache = __cache_trait(alg.jac_alg)
     @assert (iip || isnothing(alg.optimize)) "Out-of-place constraints don't allow optimization solvers "
     fit_parameters = haskey(prob.kwargs, :fit_parameters)
+    constraint = (!isnothing(prob.f.inequality)) || (!isnothing(prob.f.equality))
 
     t₀, t₁ = prob.tspan
     ig, T,
@@ -72,10 +73,20 @@ function SciMLBase.__init(
     bcresid_prototype, resid₁_size = __get_bcresid_prototype(prob.problem_type, prob, X)
 
     residual = if iip
-        if prob.problem_type isa TwoPointBVProblem
-            vcat([__alloc(__vec(bcresid_prototype))], __alloc.(copy.(@view(y₀.u[2:end]))))
+        if !constraint
+            if prob.problem_type isa TwoPointBVProblem
+                vcat([__alloc(__vec(bcresid_prototype))], __alloc.(copy.(@view(y₀.u[2:end]))))
+            else
+                vcat([__alloc(bcresid_prototype)], __alloc.(copy.(@view(y₀.u[2:end]))))
+            end
         else
-            vcat([__alloc(bcresid_prototype)], __alloc.(copy.(@view(y₀.u[2:end]))))
+            if prob.problem_type isa TwoPointBVProblem
+                vcat([__alloc(__vec(bcresid_prototype))],
+                    __alloc.(copy.(@view(y₀.u[2:end]))), __alloc.(copy.(y₀.u)))
+            else
+                vcat([__alloc(bcresid_prototype)],
+                    __alloc.(copy.(@view(y₀.u[2:end]))), __alloc.(copy.(y₀.u)))
+            end
         end
     else
         nothing
@@ -246,6 +257,9 @@ function __construct_problem(cache::MIRKCache{iip}, y::AbstractVector, y₀::Abs
 
     trait = __cache_trait(jac_alg)
 
+    constraint = (!isnothing(cache.prob.f.inequality)) ||
+                 (!isnothing(cache.prob.f.equality))
+
     loss_bc = if iip
         @closure (du,
             u,
@@ -270,7 +284,7 @@ function __construct_problem(cache::MIRKCache{iip}, y::AbstractVector, y₀::Abs
         @closure (du,
             u,
             p) -> __mirk_loss!(du, u, p, cache.y, pt, cache.bc, cache.residual,
-            cache.mesh, cache, eval_sol, trait)
+            cache.mesh, cache, eval_sol, trait, Val(constraint))
     else
         @closure (u,
             p) -> __mirk_loss(
@@ -280,8 +294,16 @@ function __construct_problem(cache::MIRKCache{iip}, y::AbstractVector, y₀::Abs
     return __construct_problem(cache, y, loss_bc, loss_collocation, loss, pt)
 end
 
-@views function __mirk_loss!(resid, u, p, y, pt::StandardBVProblem, bc!::BC, residual,
-        mesh, cache, EvalSol, trait::DiffCacheNeeded) where {BC}
+# Let's only do inplace version now since Optimization.jl only supports that.
+
+# J = [J_equality;
+#      J_inequality]
+# where J_equality is the Jacobian we are computed from bc and collocation
+# and J_inequality is the Jacobian from inequality constraints(whole-time inequality)
+
+@views function __mirk_loss!(
+        resid, u, p, y, pt::StandardBVProblem, bc!::BC, residual, mesh, cache,
+        EvalSol, trait::DiffCacheNeeded, constraint::Val{false}) where {BC}
     y_ = recursive_unflatten!(y, u)
     resids = [get_tmp(r, u) for r in residual]
     Φ!(resids[2:end], cache, y_, u, trait)
@@ -292,8 +314,9 @@ end
     return nothing
 end
 
-@views function __mirk_loss!(resid, u, p, y, pt::StandardBVProblem, bc!::BC, residual,
-        mesh, cache, EvalSol, trait::NoDiffCacheNeeded) where {BC}
+@views function __mirk_loss!(
+        resid, u, p, y, pt::StandardBVProblem, bc!::BC, residual, mesh, cache,
+        EvalSol, trait::NoDiffCacheNeeded, constraint::Val{false}) where {BC}
     y_ = recursive_unflatten!(y, u)
     Φ!(residual[2:end], cache, y_, u, trait)
     EvalSol.u[1:end] .= __restructure_sol(y_, cache.in_size)
@@ -303,8 +326,9 @@ end
     return nothing
 end
 
-@views function __mirk_loss!(resid, u, p, y, pt::TwoPointBVProblem, bc!::Tuple{BC1, BC2},
-        residual, mesh, cache, _, trait::DiffCacheNeeded) where {BC1, BC2}
+@views function __mirk_loss!(
+        resid, u, p, y, pt::TwoPointBVProblem, bc!::Tuple{BC1, BC2}, residual, mesh,
+        cache, _, trait::DiffCacheNeeded, constraint::Val{false}) where {BC1, BC2}
     y_ = recursive_unflatten!(y, u)
     resids = [get_tmp(r, u) for r in residual]
     Φ!(resids[2:end], cache, y_, u, trait)
@@ -315,8 +339,9 @@ end
     return nothing
 end
 
-@views function __mirk_loss!(resid, u, p, y, pt::TwoPointBVProblem, bc!::Tuple{BC1, BC2},
-        residual, mesh, cache, _, trait::NoDiffCacheNeeded) where {BC1, BC2}
+@views function __mirk_loss!(
+        resid, u, p, y, pt::TwoPointBVProblem, bc!::Tuple{BC1, BC2}, residual, mesh,
+        cache, _, trait::NoDiffCacheNeeded, constraint::Val{false}) where {BC1, BC2}
     y_ = recursive_unflatten!(y, u)
     Φ!(residual[2:end], cache, y_, u, trait)
     resida = residual[1][1:prod(cache.resid_size[1])]
@@ -326,6 +351,23 @@ end
     return nothing
 end
 
+@views function __mirk_loss!(
+        resid, u, p, y, pt::StandardBVProblem, bc!::BC, residual, mesh, cache,
+        EvalSol, trait::DiffCacheNeeded, constraint::Val{true}) where {BC}
+    y_ = recursive_unflatten!(y, u)
+    L = length(y_)
+    resids = [get_tmp(r, u) for r in residual]
+    Φ!(resids[2:L], cache, y_, u, trait)
+    EvalSol.u[1:end] .= __restructure_sol(y_, cache.in_size)
+    EvalSol.cache.k_discrete[1:end] .= cache.k_discrete
+    eval_bc_residual!(resids[1], pt, bc!, EvalSol, p, mesh)
+
+    # whole-time inequality constraints
+    cache.prob.f.inequality.(resids[(L + 1):end], y_, nothing)
+    recursive_flatten!(resid, resids)
+    return nothing
+end
+
 @views function __mirk_loss(
         u, p, y, pt::StandardBVProblem, bc::BC, mesh, cache, EvalSol, trait) where {BC}
     y_ = recursive_unflatten!(y, u)
@@ -362,7 +404,8 @@ end
 @views function __mirk_loss_collocation!(
         resid, u, p, y, mesh, residual, cache, trait::DiffCacheNeeded)
     y_ = recursive_unflatten!(y, u)
-    resids = [get_tmp(r, u) for r in residual[2:end]]
+    N = length(y)
+    resids = [get_tmp(r, u) for r in residual[2:N]]
     Φ!(resids, cache, y_, u, trait)
     recursive_flatten!(resid, resids)
     return nothing
@@ -371,7 +414,8 @@ end
 @views function __mirk_loss_collocation!(
         resid, u, p, y, mesh, residual, cache, trait::NoDiffCacheNeeded)
     y_ = recursive_unflatten!(y, u)
-    resids = [r for r in residual[2:end]]
+    N = length(y)
+    resids = [r for r in residual[2:N]]
     Φ!(resids, cache, y_, u, trait)
     recursive_flatten!(resid, resids)
     return nothing
@@ -388,10 +432,13 @@ function __construct_problem(cache::MIRKCache{iip}, y, loss_bc::BC, loss_colloca
     (; jac_alg) = cache.alg
     (; bc_diffmode) = jac_alg
     N = length(cache.mesh)
+    constraint = (!isnothing(cache.prob.f.inequality)) ||
+                 (!isnothing(cache.prob.f.equality))
 
     resid_bc = cache.bcresid_prototype
     L = length(resid_bc)
     resid_collocation = safe_similar(y, cache.M * (N - 1))
+    resid_prototype = vcat(resid_bc, resid_collocation)
 
     cache_bc = if iip
         DI.prepare_jacobian(loss_bc, resid_bc, bc_diffmode, y, Constant(cache.p))
@@ -445,12 +492,29 @@ function __construct_problem(cache::MIRKCache{iip}, y, loss_bc::BC, loss_colloca
         jac_prototype = AlmostBandedMatrix{eltype(cache)}(J_full_band, J_bc)
     end
 
+    if constraint
+        cache_inequality = DI.prepare_jacobian(
+            cache.prob.f.inequality, resid_prototype, bc_diffmode, y, Constant(cache.p))
+        J_inequality = DI.jacobian(cache.prob.f.inequality, resid_prototype,
+            cache_inequality, bc_diffmode, y, Constant(cache.p))
+        jac_prototype = vcat(jac_prototype, J_inequality)
+    end
+
     jac = if iip
-        @closure (J,
-            u,
-            p) -> __mirk_mpoint_jacobian!(
-            J, J_c, u, bc_diffmode, nonbc_diffmode, cache_bc, cache_collocation,
-            loss_bc, loss_collocation, resid_bc, resid_collocation, L, cache.p)
+        if constraint
+            @closure (J,
+                u,
+                p) -> __mirk_mpoint_jacobian!(
+                J, J_c, u, bc_diffmode, nonbc_diffmode, cache_bc, cache_collocation,
+                cache_inequality, loss_bc, loss_collocation, cache.prob.f.inequality,
+                resid_bc, resid_collocation, resid_prototype, L, cache.p)
+        else
+            @closure (J,
+                u,
+                p) -> __mirk_mpoint_jacobian!(
+                J, J_c, u, bc_diffmode, nonbc_diffmode, cache_bc, cache_collocation,
+                loss_bc, loss_collocation, resid_bc, resid_collocation, L, cache.p)
+        end
     else
         @closure (u,
             p) -> __mirk_mpoint_jacobian(
@@ -458,7 +522,6 @@ function __construct_problem(cache::MIRKCache{iip}, y, loss_bc::BC, loss_colloca
             cache_collocation, loss_bc, loss_collocation, L, cache.p)
     end
 
-    resid_prototype = vcat(resid_bc, resid_collocation)
     return __construct_internal_problem(cache.prob, cache.alg, loss, jac, jac_prototype,
         resid_prototype, y, cache.p, cache.M, N)
 end
@@ -473,6 +536,35 @@ function __mirk_mpoint_jacobian!(
     return nothing
 end
 
+function __mirk_mpoint_jacobian!(J::AlmostBandedMatrix, J_c, x, bc_diffmode, nonbc_diffmode,
+        bc_diffcache, nonbc_diffcache, inequality_diffcache,
+        loss_bc::BC, loss_collocation::C, loss_inequality, resid_bc,
+        resid_collocation, resid_prototype, L::Int, p) where {BC, C}
+    J_bc = fillpart(J)
+    DI.jacobian!(loss_collocation, resid_collocation, J_c,
+        nonbc_diffcache, nonbc_diffmode, x, Constant(p))
+    DI.jacobian!(loss_bc, resid_bc, J_bc, bc_diffcache, bc_diffmode, x, Constant(p))
+    exclusive_bandpart(J) .= J_c
+    finish_part_setindex!(J)
+    return nothing
+end
+
+function __mirk_mpoint_jacobian!(
+        J, _, x, bc_diffmode, nonbc_diffmode, bc_diffcache, nonbc_diffcache,
+        inequality_diffcache, loss_bc::BC, loss_collocation::C, loss_inequality,
+        resid_bc, resid_collocation, resid_prototype, L::Int, p) where {BC, C}
+    N = length(x)
+    DI.jacobian!(
+        loss_bc, resid_bc, @view(J[1:L, :]), bc_diffcache, bc_diffmode, x, Constant(p))
+    DI.jacobian!(loss_collocation, resid_collocation, @view(J[(L + 1):N, :]),
+        nonbc_diffcache, nonbc_diffmode, x, Constant(p))
+
+    # The Jacobian of constraints
+    DI.jacobian!(loss_inequality, resid_prototype, @view(J[(N + 1):end, :]),
+        inequality_diffcache, bc_diffmode, x, Constant(p))
+    return nothing
+end
+
 function __mirk_mpoint_jacobian!(J::AlmostBandedMatrix, J_c, x, bc_diffmode, nonbc_diffmode,
         bc_diffcache, nonbc_diffcache, loss_bc::BC, loss_collocation::C,
         resid_bc, resid_collocation, L::Int, p) where {BC, C}

From b7e7042bbf75920978789c1e2261467880f59530 Mon Sep 17 00:00:00 2001
From: Qingyu Qu <2283984853@qq.com>
Date: Tue, 23 Sep 2025 22:51:48 +0800
Subject: [PATCH 02/11] Tweak for docs

---
 Project.toml                             | 2 +-
 docs/src/tutorials/optimal_control.md    | 4 +++-
 lib/BoundaryValueDiffEqCore/src/utils.jl | 4 ++++
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index 9ecff6938..2f0aa07d4 100644
--- a/Project.toml
+++ b/Project.toml
@@ -59,7 +59,7 @@ Random = "1.10"
 RecursiveArrayTools = "3.31.2"
 ReTestItems = "1.29"
 Reexport = "1.2"
-SciMLBase = "2.108.0"
+SciMLBase = "2.120.0"
 Sparspak = "0.3.11"
 StaticArrays = "1.9.8"
 Test = "1.10"
diff --git a/docs/src/tutorials/optimal_control.md b/docs/src/tutorials/optimal_control.md
index b45c599e3..30566ab5c 100644
--- a/docs/src/tutorials/optimal_control.md
+++ b/docs/src/tutorials/optimal_control.md
@@ -81,8 +81,10 @@ cost_fun(u, p) = -u[end - 2] # To minimize, only temporary, need to use temporar
 #cost_fun(sol, p) = -sol(0.2)[2]
 u0 = [v_0, h_0, m_0, 0.0]
 rocket_launch_fun = BVPFunction(rocket_launch!, rocket_launch_bc!; cost = cost_fun,
-    inequality = constraints!, bcresid_prototype = zeros(4))
+    inequality = constraints!, f_prototype = zeros(3))
 rocket_launch_prob = BVProblem(rocket_launch_fun, u0, tspan; lcons = [0.0, 0.0, m_T, 0.0],
     ucons = [Inf, Inf, m_0, u_t_max])
 sol = solve(rocket_launch_prob, MIRK4(; optimize = Ipopt.Optimizer()); dt = 0.002)
 ```
+
+Similar optimal control problem solving can also be deployed in JuMP.jl and InfiniteOpt.jl.
diff --git a/lib/BoundaryValueDiffEqCore/src/utils.jl b/lib/BoundaryValueDiffEqCore/src/utils.jl
index cad61bd94..6047123b3 100644
--- a/lib/BoundaryValueDiffEqCore/src/utils.jl
+++ b/lib/BoundaryValueDiffEqCore/src/utils.jl
@@ -272,6 +272,10 @@ function __extract_problem_details(prob, u0::AbstractArray; dt = 0.0,
         new_u = vcat(u0, prob.p)
         return Val(false), eltype(new_u), length(new_u), Int(cld(t₁ - t₀, dt)), new_u
     end
+    if !isnothing(prob.f.f_prototype)
+        length_f_prototype = length(prob.f.f_prototype)
+        return Val(false), eltype(u0), length_f_prototype, Int(cld(t₁ - t₀, dt)), prob.u0
+    end
     return Val(false), eltype(u0), length(u0), Int(cld(t₁ - t₀, dt)), prob.u0
 end
 function __extract_problem_details(prob, f::F; dt = 0.0, check_positive_dt::Bool = false,

From 0f0cb4ec625431011316bce16cd82b32df25e334 Mon Sep 17 00:00:00 2001
From: Qingyu Qu <2283984853@qq.com>
Date: Thu, 9 Oct 2025 22:03:46 +0800
Subject: [PATCH 03/11] Change the position of constraints

---
 docs/src/tutorials/optimal_control.md    |  4 +--
 lib/BoundaryValueDiffEqCore/src/utils.jl |  4 ---
 lib/BoundaryValueDiffEqMIRK/src/mirk.jl  | 37 ++++++++++++------------
 3 files changed, 20 insertions(+), 25 deletions(-)

diff --git a/docs/src/tutorials/optimal_control.md b/docs/src/tutorials/optimal_control.md
index 30566ab5c..7ed1e4b54 100644
--- a/docs/src/tutorials/optimal_control.md
+++ b/docs/src/tutorials/optimal_control.md
@@ -41,7 +41,7 @@ $$
 Similar solving for such optimal control problem can be found on JuMP.jl and InfiniteOpt.jl. The detailed parameters are taken from [COPS](https://www.mcs.anl.gov/%7Emore/cops/cops3.pdf).
 
 ```julia
-using BoundaryValueDiffEqMIRK, OptimizationMOI, Ipopt
+using BoundaryValueDiffEqMIRK, OptimizationIpopt
 h_0 = 1                      # Initial height
 v_0 = 0                      # Initial velocity
 m_0 = 1.0                    # Initial mass
@@ -84,7 +84,7 @@ rocket_launch_fun = BVPFunction(rocket_launch!, rocket_launch_bc!; cost = cost_f
     inequality = constraints!, f_prototype = zeros(3))
 rocket_launch_prob = BVProblem(rocket_launch_fun, u0, tspan; lcons = [0.0, 0.0, m_T, 0.0],
     ucons = [Inf, Inf, m_0, u_t_max])
-sol = solve(rocket_launch_prob, MIRK4(; optimize = Ipopt.Optimizer()); dt = 0.002)
+sol = solve(rocket_launch_prob, MIRK4(; optimize = IpoptOptimizer()); dt = 0.002)
 ```
 
 Similar optimal control problem solving can also be deployed in JuMP.jl and InfiniteOpt.jl.
diff --git a/lib/BoundaryValueDiffEqCore/src/utils.jl b/lib/BoundaryValueDiffEqCore/src/utils.jl
index 6047123b3..cad61bd94 100644
--- a/lib/BoundaryValueDiffEqCore/src/utils.jl
+++ b/lib/BoundaryValueDiffEqCore/src/utils.jl
@@ -272,10 +272,6 @@ function __extract_problem_details(prob, u0::AbstractArray; dt = 0.0,
         new_u = vcat(u0, prob.p)
         return Val(false), eltype(new_u), length(new_u), Int(cld(t₁ - t₀, dt)), new_u
     end
-    if !isnothing(prob.f.f_prototype)
-        length_f_prototype = length(prob.f.f_prototype)
-        return Val(false), eltype(u0), length_f_prototype, Int(cld(t₁ - t₀, dt)), prob.u0
-    end
     return Val(false), eltype(u0), length(u0), Int(cld(t₁ - t₀, dt)), prob.u0
 end
 function __extract_problem_details(prob, f::F; dt = 0.0, check_positive_dt::Bool = false,
diff --git a/lib/BoundaryValueDiffEqMIRK/src/mirk.jl b/lib/BoundaryValueDiffEqMIRK/src/mirk.jl
index a92c580bc..5bea5f870 100644
--- a/lib/BoundaryValueDiffEqMIRK/src/mirk.jl
+++ b/lib/BoundaryValueDiffEqMIRK/src/mirk.jl
@@ -81,11 +81,11 @@ function SciMLBase.__init(
             end
         else
             if prob.problem_type isa TwoPointBVProblem
-                vcat([__alloc(__vec(bcresid_prototype))],
-                    __alloc.(copy.(@view(y₀.u[2:end]))), __alloc.(copy.(y₀.u)))
+                vcat(__alloc.(copy.(y₀.u)), [__alloc(__vec(bcresid_prototype))],
+                    __alloc.(copy.(@view(y₀.u[2:end]))))
             else
-                vcat([__alloc(bcresid_prototype)],
-                    __alloc.(copy.(@view(y₀.u[2:end]))), __alloc.(copy.(y₀.u)))
+                vcat(__alloc.(copy.(y₀.u)), [__alloc(bcresid_prototype)],
+                    __alloc.(copy.(@view(y₀.u[2:end]))))
             end
         end
     else
@@ -296,10 +296,10 @@ end
 
 # Let's only do inplace version now since Optimization.jl only supports that.
 
-# J = [J_equality;
-#      J_inequality]
-# where J_equality is the Jacobian we are computed from bc and collocation
-# and J_inequality is the Jacobian from inequality constraints(whole-time inequality)
+# J = [J_constraints;
+#      J_bvp]
+# where J_constraints is the Jacobian from equality/inequality constraints(whole-time equality/inequality)
+# and J_bvp is the Jacobian computed from bc and collocation
 
 @views function __mirk_loss!(
         resid, u, p, y, pt::StandardBVProblem, bc!::BC, residual, mesh, cache,
@@ -357,13 +357,13 @@ end
     y_ = recursive_unflatten!(y, u)
     L = length(y_)
     resids = [get_tmp(r, u) for r in residual]
-    Φ!(resids[2:L], cache, y_, u, trait)
+    # whole-time inequality constraints
+    cache.prob.f.inequality.(resids[1:L], y_, nothing)
+    Φ!(resids[(L + 2):2L], cache, y_, u, trait)
     EvalSol.u[1:end] .= __restructure_sol(y_, cache.in_size)
     EvalSol.cache.k_discrete[1:end] .= cache.k_discrete
-    eval_bc_residual!(resids[1], pt, bc!, EvalSol, p, mesh)
+    eval_bc_residual!(resids[L + 1], pt, bc!, EvalSol, p, mesh)
 
-    # whole-time inequality constraints
-    cache.prob.f.inequality.(resids[(L + 1):end], y_, nothing)
     recursive_flatten!(resid, resids)
     return nothing
 end
@@ -497,7 +497,7 @@ function __construct_problem(cache::MIRKCache{iip}, y, loss_bc::BC, loss_colloca
             cache.prob.f.inequality, resid_prototype, bc_diffmode, y, Constant(cache.p))
         J_inequality = DI.jacobian(cache.prob.f.inequality, resid_prototype,
             cache_inequality, bc_diffmode, y, Constant(cache.p))
-        jac_prototype = vcat(jac_prototype, J_inequality)
+        jac_prototype = vcat(J_inequality, jac_prototype)
     end
 
     jac = if iip
@@ -554,14 +554,13 @@ function __mirk_mpoint_jacobian!(
         inequality_diffcache, loss_bc::BC, loss_collocation::C, loss_inequality,
         resid_bc, resid_collocation, resid_prototype, L::Int, p) where {BC, C}
     N = length(x)
-    DI.jacobian!(
-        loss_bc, resid_bc, @view(J[1:L, :]), bc_diffcache, bc_diffmode, x, Constant(p))
-    DI.jacobian!(loss_collocation, resid_collocation, @view(J[(L + 1):N, :]),
-        nonbc_diffcache, nonbc_diffmode, x, Constant(p))
-
     # The Jacobian of constraints
-    DI.jacobian!(loss_inequality, resid_prototype, @view(J[(N + 1):end, :]),
+    DI.jacobian!(loss_inequality, resid_prototype, @view(J[1:N, :]),
         inequality_diffcache, bc_diffmode, x, Constant(p))
+    DI.jacobian!(loss_bc, resid_bc, @view(J[(N + 1):(N + L), :]),
+        bc_diffcache, bc_diffmode, x, Constant(p))
+    DI.jacobian!(loss_collocation, resid_collocation, @view(J[(N + L + 1):2N, :]),
+        nonbc_diffcache, nonbc_diffmode, x, Constant(p))
     return nothing
 end
 

From 67e581333beccc04e20d8f3319952843c7683a27 Mon Sep 17 00:00:00 2001
From: Qingyu Qu <2283984853@qq.com>
Date: Tue, 14 Oct 2025 02:15:49 +0800
Subject: [PATCH 04/11] Collocation lhs should be f_prototype

---
 docs/src/tutorials/optimal_control.md         |  10 +-
 lib/BoundaryValueDiffEqCore/src/utils.jl      |   4 +-
 .../src/collocation.jl                        |  47 ++-
 lib/BoundaryValueDiffEqMIRK/src/mirk.jl       | 302 ++++++++++++++----
 4 files changed, 289 insertions(+), 74 deletions(-)

diff --git a/docs/src/tutorials/optimal_control.md b/docs/src/tutorials/optimal_control.md
index 7ed1e4b54..bd3faeec1 100644
--- a/docs/src/tutorials/optimal_control.md
+++ b/docs/src/tutorials/optimal_control.md
@@ -41,7 +41,7 @@ $$
 Similar solving for such optimal control problem can be found on JuMP.jl and InfiniteOpt.jl. The detailed parameters are taken from [COPS](https://www.mcs.anl.gov/%7Emore/cops/cops3.pdf).
 
 ```julia
-using BoundaryValueDiffEqMIRK, OptimizationIpopt
+using BoundaryValueDiffEqMIRK, OptimizationMOI, Ipopt
 h_0 = 1                      # Initial height
 v_0 = 0                      # Initial velocity
 m_0 = 1.0                    # Initial mass
@@ -68,7 +68,7 @@ end
 function rocket_launch_bc!(res, u, p, t)
     res[1] = u(0.0)[1] - v_0
     res[2] = u(0.0)[2] - h_0
-    res[3] = u(0.0)[3] - m_0
+    res[3] = u(0.2)[3] - m_T
     res[4] = u(0.2)[4] - 0.0
 end
 function constraints!(res, u, p)
@@ -77,14 +77,14 @@ function constraints!(res, u, p)
     res[3] = u[3]
     res[4] = u[4]
 end
-cost_fun(u, p) = -u[end - 2] # To minimize, only temporary, need to use temporary solution interpolation here similar to what we do in boundary condition evaluations.
+cost_fun(u, p) = -u[end - 2] #Altitute x_h. To minimize, only temporary, need to use temporary solution interpolation here similar to what we do in boundary condition evaluations.
 #cost_fun(sol, p) = -sol(0.2)[2]
 u0 = [v_0, h_0, m_0, 0.0]
 rocket_launch_fun = BVPFunction(rocket_launch!, rocket_launch_bc!; cost = cost_fun,
     inequality = constraints!, f_prototype = zeros(3))
-rocket_launch_prob = BVProblem(rocket_launch_fun, u0, tspan; lcons = [0.0, 0.0, m_T, 0.0],
+rocket_launch_prob = BVProblem(rocket_launch_fun, u0, tspan; lcons = [0.0, h_0, m_T, 0.0],
     ucons = [Inf, Inf, m_0, u_t_max])
-sol = solve(rocket_launch_prob, MIRK4(; optimize = IpoptOptimizer()); dt = 0.002)
+sol = solve(rocket_launch_prob, MIRK4(; optimize = Ipopt.Optimizer()); dt = 0.002)
 ```
 
 Similar optimal control problem solving can also be deployed in JuMP.jl and InfiniteOpt.jl.
diff --git a/lib/BoundaryValueDiffEqCore/src/utils.jl b/lib/BoundaryValueDiffEqCore/src/utils.jl
index cad61bd94..0e426fecd 100644
--- a/lib/BoundaryValueDiffEqCore/src/utils.jl
+++ b/lib/BoundaryValueDiffEqCore/src/utils.jl
@@ -633,7 +633,7 @@ end
     else
         if !(isnothing(prob.f.equality) && isnothing(prob.f.inequality))
             # When there are additional equality or inequality constraints
-            vcat(zeros(T, N*M), repeat(prob.lcons, N))
+            vcat(repeat(prob.lcons, N), zeros(T, M + (N - 1)*3))
         else
             lcons_length = length(prob.lcons)
             vcat(prob.lcons, zeros(T, N*M - lcons_length))
@@ -644,7 +644,7 @@ end
     else
         if !(isnothing(prob.f.equality) && isnothing(prob.f.inequality))
             # When there are additional equality or inequality constraints
-            vcat(zeros(T, N*M), repeat(prob.ucons, N))
+            vcat(repeat(prob.ucons, N), zeros(T, M + (N - 1)*3))
         else
             ucons_length = length(prob.ucons)
             vcat(prob.ucons, zeros(T, N*M - ucons_length))
diff --git a/lib/BoundaryValueDiffEqMIRK/src/collocation.jl b/lib/BoundaryValueDiffEqMIRK/src/collocation.jl
index cb971737b..88d79bd02 100644
--- a/lib/BoundaryValueDiffEqMIRK/src/collocation.jl
+++ b/lib/BoundaryValueDiffEqMIRK/src/collocation.jl
@@ -1,10 +1,43 @@
-function Φ!(residual, cache::MIRKCache, y, u, trait)
-    return Φ!(residual, cache.fᵢ_cache, cache.k_discrete, cache.f, cache.TU,
-        y, u, cache.p, cache.mesh, cache.mesh_dt, cache.stage, trait)
+function Φ!(residual, cache::MIRKCache, y, u, trait, constraint)
+    return Φ!(residual, cache.fᵢ_cache, cache.k_discrete, cache.f, cache.TU, y, u,
+        cache.p, cache.mesh, cache.mesh_dt, cache.stage, trait, constraint)
 end
 
-@views function Φ!(residual, fᵢ_cache, k_discrete, f!, TU::MIRKTableau,
-        y, u, p, mesh, mesh_dt, stage::Int, ::DiffCacheNeeded)
+@views function Φ!(residual, fᵢ_cache, k_discrete, f!, TU::MIRKTableau, y, u, p, mesh,
+        mesh_dt, stage::Int, ::DiffCacheNeeded, constraint::Val{true})
+    (; c, v, x, b) = TU
+
+    ttt = get_tmp(fᵢ_cache, u)
+    tmp = copy(ttt[1:3])
+
+    length_control = length(last(residual))
+
+    T = eltype(u)
+    for i in eachindex(k_discrete)
+        K = get_tmp(k_discrete[i], u)
+        residᵢ = residual[i]
+        h = mesh_dt[i]
+
+        yᵢ = get_tmp(y[i], u)
+        yᵢ₊₁ = get_tmp(y[i + 1], u)
+
+        yᵢ, uᵢ = yᵢ[1:(end - length_control)], yᵢ[(end - length_control + 1):end]
+        yᵢ₊₁, uᵢ₊₁ = yᵢ₊₁[1:(end - length_control)], yᵢ₊₁[(end - length_control + 1):end]
+
+        for r in 1:stage
+            @. tmp = (1 - v[r]) * yᵢ + v[r] * yᵢ₊₁
+            __maybe_matmul!(tmp, K[:, 1:(r - 1)], x[r, 1:(r - 1)], h, T(1))
+            f!(K[:, r], vcat(tmp, uᵢ), p, mesh[i] + c[r] * h)
+        end
+
+        # Update residual
+        @. residᵢ = yᵢ₊₁ - yᵢ
+        __maybe_matmul!(residᵢ, K[:, 1:stage], b[1:stage], -h, T(1))
+    end
+end
+
+@views function Φ!(residual, fᵢ_cache, k_discrete, f!, TU::MIRKTableau, y, u, p, mesh,
+        mesh_dt, stage::Int, ::DiffCacheNeeded, constraint::Val{false})
     (; c, v, x, b) = TU
 
     tmp = get_tmp(fᵢ_cache, u)
@@ -29,8 +62,8 @@ end
     end
 end
 
-@views function Φ!(residual, fᵢ_cache, k_discrete, f!, TU::MIRKTableau, y,
-        u, p, mesh, mesh_dt, stage::Int, ::NoDiffCacheNeeded)
+@views function Φ!(residual, fᵢ_cache, k_discrete, f!, TU::MIRKTableau, y, u, p, mesh,
+        mesh_dt, stage::Int, ::NoDiffCacheNeeded, constraint::Val{false})
     (; c, v, x, b) = TU
 
     tmp = similar(fᵢ_cache)
diff --git a/lib/BoundaryValueDiffEqMIRK/src/mirk.jl b/lib/BoundaryValueDiffEqMIRK/src/mirk.jl
index 5bea5f870..60dda4897 100644
--- a/lib/BoundaryValueDiffEqMIRK/src/mirk.jl
+++ b/lib/BoundaryValueDiffEqMIRK/src/mirk.jl
@@ -12,6 +12,7 @@
     alg                        # MIRK methods
     TU                         # MIRK Tableau
     ITU                        # MIRK Interpolation Tableau
+    f_prototype
     bcresid_prototype
     # Everything below gets resized in adaptive methods
     mesh                       # Discrete mesh
@@ -65,13 +66,25 @@ function SciMLBase.__init(
     y = __alloc.(copy.(y₀.u))
     TU, ITU = constructMIRK(alg, T)
     stage = alg_stage(alg)
+    f_prototype = isnothing(prob.f.f_prototype) ? nothing : __vec(prob.f.f_prototype)
 
-    k_discrete = [__maybe_allocate_diffcache(safe_similar(X, N, stage), chunksize, alg.jac_alg)
-                  for _ in 1:Nig]
-    k_interp = VectorOfArray([similar(X, N, ITU.s_star - stage) for _ in 1:Nig])
+    k_discrete = if !constraint
+        [__maybe_allocate_diffcache(safe_similar(X, N, stage), chunksize, alg.jac_alg)
+         for _ in 1:Nig]
+    else
+        [__maybe_allocate_diffcache(safe_similar(X, N - length(f_prototype), stage), chunksize, alg.jac_alg)
+         for _ in 1:Nig]
+    end
+    k_interp = if !constraint
+        VectorOfArray([similar(X, N, ITU.s_star - stage) for _ in 1:Nig])
+    else
+        VectorOfArray([similar(X, N - length(f_prototype), ITU.s_star - stage)
+                       for _ in 1:Nig])
+    end
 
     bcresid_prototype, resid₁_size = __get_bcresid_prototype(prob.problem_type, prob, X)
 
+    # residual should be [bc_resid_prototype; f_prototype; f_prototype; f_prototype; f_prototype...]
     residual = if iip
         if !constraint
             if prob.problem_type isa TwoPointBVProblem
@@ -81,11 +94,13 @@ function SciMLBase.__init(
             end
         else
             if prob.problem_type isa TwoPointBVProblem
-                vcat(__alloc.(copy.(y₀.u)), [__alloc(__vec(bcresid_prototype))],
-                    __alloc.(copy.(@view(y₀.u[2:end]))))
+                #vcat(__alloc.(copy.(y₀.u)), [__alloc(__vec(bcresid_prototype))],
+                #    __alloc.(copy.(@view(y₀.u[2:end]))))
             else
+                #vcat(__alloc.(copy.(y₀.u)), [__alloc(bcresid_prototype)],
+                #    __alloc.(copy.(@view(y₀.u[2:end]))))
                 vcat(__alloc.(copy.(y₀.u)), [__alloc(bcresid_prototype)],
-                    __alloc.(copy.(@view(y₀.u[2:end]))))
+                    __alloc.(copy.([f_prototype for _ in 1:Nig])))
             end
         end
     else
@@ -139,9 +154,9 @@ function SciMLBase.__init(
     prob_ = !(prob.u0 isa AbstractArray) ? remake(prob; u0 = X) : prob
 
     return MIRKCache{iip, T, use_both, typeof(diffcache), fit_parameters}(
-        alg_order(alg), stage, N, size(X), f, bc, prob_, prob.problem_type, prob.p,
-        alg, TU, ITU, bcresid_prototype, mesh, mesh_dt, k_discrete, k_interp, y, y₀,
-        residual, fᵢ_cache, fᵢ₂_cache, errors, new_stages, resid₁_size, nlsolve_kwargs,
+        alg_order(alg), stage, N, size(X), f, bc, prob_, prob.problem_type, prob.p, alg,
+        TU, ITU, f_prototype, bcresid_prototype, mesh, mesh_dt, k_discrete, k_interp, y,
+        y₀, residual, fᵢ_cache, fᵢ₂_cache, errors, new_stages, resid₁_size, nlsolve_kwargs,
         optimize_kwargs, (; abstol, dt, adaptive, controller, fit_parameters, kwargs...))
 end
 
@@ -250,6 +265,13 @@ end
 
 # Constructing the Nonlinear Problem
 function __construct_problem(cache::MIRKCache{iip}, y::AbstractVector, y₀::AbstractVectorOfArray) where {iip}
+    constraint = (!isnothing(cache.prob.f.inequality)) ||
+                 (!isnothing(cache.prob.f.equality))
+    return __construct_problem(cache, y, y₀, Val(constraint))
+end
+
+function __construct_problem(cache::MIRKCache{iip}, y::AbstractVector,
+        y₀::AbstractVectorOfArray, constraint::Val{false}) where {iip}
     pt = cache.problem_type
     (; jac_alg) = cache.alg
 
@@ -257,8 +279,48 @@ function __construct_problem(cache::MIRKCache{iip}, y::AbstractVector, y₀::Abs
 
     trait = __cache_trait(jac_alg)
 
-    constraint = (!isnothing(cache.prob.f.inequality)) ||
-                 (!isnothing(cache.prob.f.equality))
+    loss_bc = if iip
+        @closure (du,
+            u,
+            p) -> __mirk_loss_bc!(du, u, p, pt, cache.bc, cache.y, cache.mesh, cache, trait)
+    else
+        @closure (
+            u, p) -> __mirk_loss_bc(u, p, pt, cache.bc, cache.y, cache.mesh, cache, trait)
+    end
+
+    loss_collocation = if iip
+        @closure (du,
+            u,
+            p) -> __mirk_loss_collocation!(
+            du, u, p, cache.y, cache.mesh, cache.residual, cache, trait, constraint)
+    else
+        @closure (u,
+            p) -> __mirk_loss_collocation(
+            u, p, cache.y, cache.mesh, cache.residual, cache, trait)
+    end
+
+    loss = if iip
+        @closure (du,
+            u,
+            p) -> __mirk_loss!(du, u, p, cache.y, pt, cache.bc, cache.residual,
+            cache.mesh, cache, eval_sol, trait, constraint)
+    else
+        @closure (u,
+            p) -> __mirk_loss(
+            u, p, cache.y, pt, cache.bc, cache.mesh, cache, eval_sol, trait)
+    end
+
+    return __construct_problem(cache, y, loss_bc, loss_collocation, loss, pt, constraint)
+end
+
+function __construct_problem(cache::MIRKCache{iip}, y::AbstractVector,
+        y₀::AbstractVectorOfArray, constraint::Val{true}) where {iip}
+    pt = cache.problem_type
+    (; jac_alg) = cache.alg
+
+    eval_sol = EvalSol(__restructure_sol(y₀.u, cache.in_size), cache.mesh, cache)
+
+    trait = __cache_trait(jac_alg)
 
     loss_bc = if iip
         @closure (du,
@@ -273,25 +335,31 @@ function __construct_problem(cache::MIRKCache{iip}, y::AbstractVector, y₀::Abs
         @closure (du,
             u,
             p) -> __mirk_loss_collocation!(
-            du, u, p, cache.y, cache.mesh, cache.residual, cache, trait)
+            du, u, p, cache.y, cache.mesh, cache.residual, cache, trait, constraint)
     else
         @closure (u,
             p) -> __mirk_loss_collocation(
             u, p, cache.y, cache.mesh, cache.residual, cache, trait)
     end
 
+    loss_constraint = @closure (du,
+        u,
+        p) -> __mirk_loss_constraint!(
+        du, u, p, cache.y, cache.mesh, cache.residual, cache, trait)
+
     loss = if iip
         @closure (du,
             u,
             p) -> __mirk_loss!(du, u, p, cache.y, pt, cache.bc, cache.residual,
-            cache.mesh, cache, eval_sol, trait, Val(constraint))
+            cache.mesh, cache, eval_sol, trait, constraint)
     else
         @closure (u,
             p) -> __mirk_loss(
             u, p, cache.y, pt, cache.bc, cache.mesh, cache, eval_sol, trait)
     end
 
-    return __construct_problem(cache, y, loss_bc, loss_collocation, loss, pt)
+    return __construct_problem(
+        cache, y, loss_bc, loss_collocation, loss_constraint, loss, pt, constraint)
 end
 
 # Let's only do inplace version now since Optimization.jl only supports that.
@@ -306,7 +374,7 @@ end
         EvalSol, trait::DiffCacheNeeded, constraint::Val{false}) where {BC}
     y_ = recursive_unflatten!(y, u)
     resids = [get_tmp(r, u) for r in residual]
-    Φ!(resids[2:end], cache, y_, u, trait)
+    Φ!(resids[2:end], cache, y_, u, trait, constraint)
     EvalSol.u[1:end] .= __restructure_sol(y_, cache.in_size)
     EvalSol.cache.k_discrete[1:end] .= cache.k_discrete
     eval_bc_residual!(resids[1], pt, bc!, EvalSol, p, mesh)
@@ -318,7 +386,7 @@ end
         resid, u, p, y, pt::StandardBVProblem, bc!::BC, residual, mesh, cache,
         EvalSol, trait::NoDiffCacheNeeded, constraint::Val{false}) where {BC}
     y_ = recursive_unflatten!(y, u)
-    Φ!(residual[2:end], cache, y_, u, trait)
+    Φ!(residual[2:end], cache, y_, u, trait, constraint)
     EvalSol.u[1:end] .= __restructure_sol(y_, cache.in_size)
     EvalSol.cache.k_discrete[1:end] .= cache.k_discrete
     eval_bc_residual!(residual[1], pt, bc!, EvalSol, p, mesh)
@@ -331,7 +399,7 @@ end
         cache, _, trait::DiffCacheNeeded, constraint::Val{false}) where {BC1, BC2}
     y_ = recursive_unflatten!(y, u)
     resids = [get_tmp(r, u) for r in residual]
-    Φ!(resids[2:end], cache, y_, u, trait)
+    Φ!(resids[2:end], cache, y_, u, trait, constraint)
     resida = resids[1][1:prod(cache.resid_size[1])]
     residb = resids[1][(prod(cache.resid_size[1]) + 1):end]
     eval_bc_residual!((resida, residb), pt, bc!, y_, p, mesh)
@@ -343,7 +411,7 @@ end
         resid, u, p, y, pt::TwoPointBVProblem, bc!::Tuple{BC1, BC2}, residual, mesh,
         cache, _, trait::NoDiffCacheNeeded, constraint::Val{false}) where {BC1, BC2}
     y_ = recursive_unflatten!(y, u)
-    Φ!(residual[2:end], cache, y_, u, trait)
+    Φ!(residual[2:end], cache, y_, u, trait, constraint)
     resida = residual[1][1:prod(cache.resid_size[1])]
     residb = residual[1][(prod(cache.resid_size[1]) + 1):end]
     eval_bc_residual!((resida, residb), pt, bc!, y_, p, mesh)
@@ -359,7 +427,7 @@ end
     resids = [get_tmp(r, u) for r in residual]
     # whole-time inequality constraints
     cache.prob.f.inequality.(resids[1:L], y_, nothing)
-    Φ!(resids[(L + 2):2L], cache, y_, u, trait)
+    Φ!(resids[(L + 2):2L], cache, y_, u, trait, constraint)
     EvalSol.u[1:end] .= __restructure_sol(y_, cache.in_size)
     EvalSol.cache.k_discrete[1:end] .= cache.k_discrete
     eval_bc_residual!(resids[L + 1], pt, bc!, EvalSol, p, mesh)
@@ -386,6 +454,10 @@ end
     return vcat(resid_bca, mapreduce(vec, vcat, resid_co), resid_bcb)
 end
 
+################################################################################
+# Dispatch for boundary condition part
+################################################################################
+
 @views function __mirk_loss_bc!(
         resid, u, p, pt, bc!::BC, y, mesh, cache::MIRKCache, trait) where {BC}
     y_ = recursive_unflatten!(y, u)
@@ -401,22 +473,46 @@ end
     return eval_bc_residual(pt, bc!, soly_, p, mesh)
 end
 
-@views function __mirk_loss_collocation!(
-        resid, u, p, y, mesh, residual, cache, trait::DiffCacheNeeded)
+################################################################################
+# Dispatch for collocation part
+################################################################################
+
+@views function __mirk_loss_collocation!(resid, u, p, y, mesh, residual, cache,
+        trait::DiffCacheNeeded, constraint::Val{false})
     y_ = recursive_unflatten!(y, u)
     N = length(y)
     resids = [get_tmp(r, u) for r in residual[2:N]]
-    Φ!(resids, cache, y_, u, trait)
+    Φ!(resids, cache, y_, u, trait, constraint)
     recursive_flatten!(resid, resids)
     return nothing
 end
 
-@views function __mirk_loss_collocation!(
-        resid, u, p, y, mesh, residual, cache, trait::NoDiffCacheNeeded)
+@views function __mirk_loss_collocation!(resid, u, p, y, mesh, residual, cache,
+        trait::DiffCacheNeeded, constraint::Val{true})
+    y_ = recursive_unflatten!(y, u)
+    N = length(y)
+    resids = [get_tmp(r, u) for r in residual[(N + 2):end]]
+    Φ!(resids, cache, y_, u, trait, constraint)
+    recursive_flatten!(resid, resids)
+    return nothing
+end
+
+@views function __mirk_loss_collocation!(resid, u, p, y, mesh, residual, cache,
+        trait::NoDiffCacheNeeded, constraint::Val{false})
     y_ = recursive_unflatten!(y, u)
     N = length(y)
     resids = [r for r in residual[2:N]]
-    Φ!(resids, cache, y_, u, trait)
+    Φ!(resids, cache, y_, u, trait, constraint)
+    recursive_flatten!(resid, resids)
+    return nothing
+end
+
+@views function __mirk_loss_collocation!(resid, u, p, y, mesh, residual, cache,
+        trait::NoDiffCacheNeeded, constraint::Val{true})
+    y_ = recursive_unflatten!(y, u)
+    N = length(y)
+    resids = [r for r in residual[(N + 2):end]]
+    Φ!(resids, cache, y_, u, trait, constraint)
     recursive_flatten!(resid, resids)
     return nothing
 end
@@ -427,18 +523,30 @@ end
     return mapreduce(vec, vcat, resids)
 end
 
-function __construct_problem(cache::MIRKCache{iip}, y, loss_bc::BC, loss_collocation::C,
-        loss::LF, ::StandardBVProblem) where {iip, BC, C, LF}
+################################################################################
+# Dispatch for problems with constraints
+################################################################################
+
+@views function __mirk_loss_constraint!(
+        resid, u, p, y, mesh, residual, cache, trait::DiffCacheNeeded)
+    y_ = recursive_unflatten!(y, u)
+    N = length(y)
+    resids = [get_tmp(r, u) for r in residual[1:N]]
+    cache.prob.f.inequality.(resids, y_, nothing)
+    recursive_flatten!(resid, resids)
+    return nothing
+end
+
+function __construct_problem(
+        cache::MIRKCache{iip}, y, loss_bc::BC, loss_collocation::C, loss::LF,
+        ::StandardBVProblem, constraint::Val{false}) where {iip, BC, C, LF}
     (; jac_alg) = cache.alg
     (; bc_diffmode) = jac_alg
     N = length(cache.mesh)
-    constraint = (!isnothing(cache.prob.f.inequality)) ||
-                 (!isnothing(cache.prob.f.equality))
 
     resid_bc = cache.bcresid_prototype
     L = length(resid_bc)
     resid_collocation = safe_similar(y, cache.M * (N - 1))
-    resid_prototype = vcat(resid_bc, resid_collocation)
 
     cache_bc = if iip
         DI.prepare_jacobian(loss_bc, resid_bc, bc_diffmode, y, Constant(cache.p))
@@ -492,29 +600,12 @@ function __construct_problem(cache::MIRKCache{iip}, y, loss_bc::BC, loss_colloca
         jac_prototype = AlmostBandedMatrix{eltype(cache)}(J_full_band, J_bc)
     end
 
-    if constraint
-        cache_inequality = DI.prepare_jacobian(
-            cache.prob.f.inequality, resid_prototype, bc_diffmode, y, Constant(cache.p))
-        J_inequality = DI.jacobian(cache.prob.f.inequality, resid_prototype,
-            cache_inequality, bc_diffmode, y, Constant(cache.p))
-        jac_prototype = vcat(J_inequality, jac_prototype)
-    end
-
     jac = if iip
-        if constraint
-            @closure (J,
-                u,
-                p) -> __mirk_mpoint_jacobian!(
-                J, J_c, u, bc_diffmode, nonbc_diffmode, cache_bc, cache_collocation,
-                cache_inequality, loss_bc, loss_collocation, cache.prob.f.inequality,
-                resid_bc, resid_collocation, resid_prototype, L, cache.p)
-        else
-            @closure (J,
-                u,
-                p) -> __mirk_mpoint_jacobian!(
-                J, J_c, u, bc_diffmode, nonbc_diffmode, cache_bc, cache_collocation,
-                loss_bc, loss_collocation, resid_bc, resid_collocation, L, cache.p)
-        end
+        @closure (J,
+            u,
+            p) -> __mirk_mpoint_jacobian!(
+            J, J_c, u, bc_diffmode, nonbc_diffmode, cache_bc, cache_collocation,
+            loss_bc, loss_collocation, resid_bc, resid_collocation, L, cache.p)
     else
         @closure (u,
             p) -> __mirk_mpoint_jacobian(
@@ -522,6 +613,96 @@ function __construct_problem(cache::MIRKCache{iip}, y, loss_bc::BC, loss_colloca
             cache_collocation, loss_bc, loss_collocation, L, cache.p)
     end
 
+    resid_prototype = vcat(resid_bc, resid_collocation)
+    return __construct_internal_problem(cache.prob, cache.alg, loss, jac, jac_prototype,
+        resid_prototype, y, cache.p, cache.M, N)
+end
+
+# Dispatch for problems with constraints
+function __construct_problem(
+        cache::MIRKCache{iip}, y, loss_bc::BC, loss_collocation::C, loss_constraint::LC,
+        loss::LF, ::StandardBVProblem, constraint::Val{true}) where {iip, BC, C, LC, LF}
+    (; jac_alg) = cache.alg
+    (; bc_diffmode) = jac_alg
+    (; f_prototype) = cache
+    N = length(cache.mesh)
+
+    resid_bc = cache.bcresid_prototype
+    L = length(resid_bc)
+    resid_collocation = safe_similar(y, length(f_prototype) * (N - 1))
+    resid_prototype = safe_similar(y, cache.M * N)
+
+    cache_bc = if iip
+        DI.prepare_jacobian(loss_bc, resid_bc, bc_diffmode, y, Constant(cache.p))
+    else
+        DI.prepare_jacobian(loss_bc, bc_diffmode, y, Constant(cache.p))
+    end
+
+    nonbc_diffmode = if jac_alg.nonbc_diffmode isa AutoSparse
+        if L < cache.M
+            # For underdetermined problems we use sparse since we don't have banded qr
+            J_full_band = nothing
+            sparse_jacobian_prototype = __generate_sparse_jacobian_prototype(
+                cache, cache.problem_type, y, y, cache.M, N)
+        else
+            J_full_band = BandedMatrix(
+                Ones{eltype(y)}(L + length(f_prototype) * (N - 1), cache.M * N),
+                (L + 1, cache.M + max(cache.M - L, 0)))
+            sparse_jacobian_prototype = BandedMatrix(
+                Ones{eltype(y)}(length(f_prototype) * (N - 1), cache.M * N), (
+                    1, 2cache.M - 1))
+        end
+        AutoSparse(get_dense_ad(jac_alg.nonbc_diffmode);
+            sparsity_detector = ADTypes.KnownJacobianSparsityDetector(sparse_jacobian_prototype),
+            coloring_algorithm = __default_coloring_algorithm(jac_alg.nonbc_diffmode))
+    else
+        J_full_band = nothing
+        jac_alg.nonbc_diffmode
+    end
+
+    cache_collocation = if iip
+        DI.prepare_jacobian(
+            loss_collocation, resid_collocation, nonbc_diffmode, y, Constant(cache.p))
+    else
+        DI.prepare_jacobian(loss_collocation, nonbc_diffmode, y, Constant(cache.p))
+    end
+
+    J_bc = if iip
+        DI.jacobian(loss_bc, resid_bc, cache_bc, bc_diffmode, y, Constant(cache.p))
+    else
+        DI.jacobian(loss_bc, cache_bc, bc_diffmode, y, Constant(cache.p))
+    end
+    J_c = if iip
+        DI.jacobian(loss_collocation, resid_collocation, cache_collocation,
+            nonbc_diffmode, y, Constant(cache.p))
+    else
+        DI.jacobian(
+            loss_collocation, cache_collocation, nonbc_diffmode, y, Constant(cache.p))
+    end
+
+    if J_full_band === nothing
+        jac_prototype = vcat(J_bc, J_c)
+    else
+        jac_prototype = AlmostBandedMatrix{eltype(cache)}(J_full_band, J_bc)
+    end
+
+    constraint_diffmode = AutoSparse(AutoForwardDiff();
+        sparsity_detector = BoundaryValueDiffEqCore.TracerLocalSparsityDetector(),
+        coloring_algorithm = __default_coloring_algorithm(jac_alg.bc_diffmode))
+
+    cache_constraint = DI.prepare_jacobian(
+        loss_constraint, resid_prototype, constraint_diffmode, y, Constant(cache.p))
+    J_constraint = DI.jacobian(loss_constraint, resid_prototype, cache_constraint,
+        constraint_diffmode, y, Constant(cache.p))
+    jac_prototype = vcat(J_constraint, jac_prototype)
+
+    jac = @closure (J,
+        u,
+        p) -> __mirk_mpoint_jacobian!(
+        J, J_c, u, bc_diffmode, nonbc_diffmode, constraint_diffmode, cache_bc,
+        cache_collocation, cache_constraint, loss_bc, loss_collocation,
+        loss_constraint, resid_bc, resid_collocation, resid_prototype, L, cache.p)
+
     return __construct_internal_problem(cache.prob, cache.alg, loss, jac, jac_prototype,
         resid_prototype, y, cache.p, cache.M, N)
 end
@@ -549,17 +730,17 @@ function __mirk_mpoint_jacobian!(J::AlmostBandedMatrix, J_c, x, bc_diffmode, non
     return nothing
 end
 
-function __mirk_mpoint_jacobian!(
-        J, _, x, bc_diffmode, nonbc_diffmode, bc_diffcache, nonbc_diffcache,
-        inequality_diffcache, loss_bc::BC, loss_collocation::C, loss_inequality,
-        resid_bc, resid_collocation, resid_prototype, L::Int, p) where {BC, C}
+function __mirk_mpoint_jacobian!(J, _, x, bc_diffmode, nonbc_diffmode, constraint_diffmode,
+        bc_diffcache, nonbc_diffcache, constraint_diffcache,
+        loss_bc::BC, loss_collocation::C, loss_inequality, resid_bc,
+        resid_collocation, resid_prototype, L::Int, p) where {BC, C}
     N = length(x)
     # The Jacobian of constraints
     DI.jacobian!(loss_inequality, resid_prototype, @view(J[1:N, :]),
-        inequality_diffcache, bc_diffmode, x, Constant(p))
+        constraint_diffcache, constraint_diffmode, x, Constant(p))
     DI.jacobian!(loss_bc, resid_bc, @view(J[(N + 1):(N + L), :]),
         bc_diffcache, bc_diffmode, x, Constant(p))
-    DI.jacobian!(loss_collocation, resid_collocation, @view(J[(N + L + 1):2N, :]),
+    DI.jacobian!(loss_collocation, resid_collocation, @view(J[(N + L + 1):end, :]),
         nonbc_diffcache, nonbc_diffmode, x, Constant(p))
     return nothing
 end
@@ -596,8 +777,9 @@ function __mirk_mpoint_jacobian(
     return J
 end
 
-function __construct_problem(cache::MIRKCache{iip}, y, loss_bc::BC, loss_collocation::C,
-        loss::LF, ::TwoPointBVProblem) where {iip, BC, C, LF}
+function __construct_problem(
+        cache::MIRKCache{iip}, y, loss_bc::BC, loss_collocation::C, loss::LF,
+        ::TwoPointBVProblem, constraint::Val{false}) where {iip, BC, C, LF}
     (; jac_alg) = cache.alg
     N = length(cache.mesh)
 

From 6297392af9154b1a49bdbdda306cdc34b088fbbc Mon Sep 17 00:00:00 2001
From: Qingyu Qu <2283984853@qq.com>
Date: Tue, 14 Oct 2025 02:25:28 +0800
Subject: [PATCH 05/11] Update inequality constraints in docs

---
 docs/src/tutorials/optimal_control.md   | 13 ++++++++++++-
 lib/BoundaryValueDiffEqMIRK/src/mirk.jl | 23 ++++++++++++++---------
 2 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/docs/src/tutorials/optimal_control.md b/docs/src/tutorials/optimal_control.md
index bd3faeec1..f196fec52 100644
--- a/docs/src/tutorials/optimal_control.md
+++ b/docs/src/tutorials/optimal_control.md
@@ -38,6 +38,17 @@ $$
 \max x_h(T)
 $$
 
+The inequality constraints for the state variables and control variables are:
+
+$$
+\left\{\begin{aligned}
+&x_v>0\\
+&x_h>0\\
+&m_T<x_m<m_0\\
+&0<u_t<u_{t\text{max}}
+\end{aligned}\right.
+$$
+
 Similar solving for such optimal control problem can be found on JuMP.jl and InfiniteOpt.jl. The detailed parameters are taken from [COPS](https://www.mcs.anl.gov/%7Emore/cops/cops3.pdf).
 
 ```julia
@@ -77,7 +88,7 @@ function constraints!(res, u, p)
     res[3] = u[3]
     res[4] = u[4]
 end
-cost_fun(u, p) = -u[end - 2] #Altitute x_h. To minimize, only temporary, need to use temporary solution interpolation here similar to what we do in boundary condition evaluations.
+cost_fun(u, p) = -u[end - 2] #Final altitude x_h. To minimize, only temporary, need to use temporary solution interpolation here similar to what we do in boundary condition evaluations.
 #cost_fun(sol, p) = -sol(0.2)[2]
 u0 = [v_0, h_0, m_0, 0.0]
 rocket_launch_fun = BVPFunction(rocket_launch!, rocket_launch_bc!; cost = cost_fun,
diff --git a/lib/BoundaryValueDiffEqMIRK/src/mirk.jl b/lib/BoundaryValueDiffEqMIRK/src/mirk.jl
index 60dda4897..935f8e7e8 100644
--- a/lib/BoundaryValueDiffEqMIRK/src/mirk.jl
+++ b/lib/BoundaryValueDiffEqMIRK/src/mirk.jl
@@ -116,18 +116,23 @@ function SciMLBase.__init(
     bcresid_prototype = __vec(bcresid_prototype)
     f,
     bc = if X isa AbstractVector
-        #TODO: Simplify the logic by wrapping the functions
-        if fit_parameters == true
+        f_wrapped = prob.f
+        bc_wrapped = prob.f.bc
+        if fit_parameters
             l_parameters = length(prob.p)
-            vecf! = function (du, u, p, t)
-                prob.f(du, u, @view(u[(end - l_parameters + 1):end]), t)
-                du[(end - l_parameters + 1):end] .= 0
+            base_f = f_wrapped
+            f_wrapped = @closure (du,
+                u,
+                p,
+                t) -> begin
+                @inbounds @views begin
+                    base_f(du, u, u[(end - l_parameters + 1):end], t)
+                    fill!(du[(end - l_parameters + 1):end], zero(eltype(du)))
+                end
+                return nothing
             end
-            vecbc! = prob.f.bc
-            vecf!, vecbc!
-        else
-            prob.f, prob.f.bc
         end
+        f_wrapped, bc_wrapped
     elseif iip
         vecf! = @closure (du, u, p, t) -> __vec_f!(du, u, p, t, prob.f, size(X))
         vecbc! = if !(prob.problem_type isa TwoPointBVProblem)

From 300667d1ac2e6fcd1a0504bc050d455d17d02941 Mon Sep 17 00:00:00 2001
From: Qingyu Qu <2283984853@qq.com>
Date: Tue, 14 Oct 2025 03:03:49 +0800
Subject: [PATCH 06/11] Fix wrong f_prototype length

---
 lib/BoundaryValueDiffEqCore/src/utils.jl | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lib/BoundaryValueDiffEqCore/src/utils.jl b/lib/BoundaryValueDiffEqCore/src/utils.jl
index 0e426fecd..93fa82df1 100644
--- a/lib/BoundaryValueDiffEqCore/src/utils.jl
+++ b/lib/BoundaryValueDiffEqCore/src/utils.jl
@@ -631,9 +631,10 @@ end
     lcons = if isnothing(prob.lcons)
         zeros(T, N*M) #TODO: handle carefully when NLLS
     else
+        length_f_prototype = length(prob.f.f_prototype)
         if !(isnothing(prob.f.equality) && isnothing(prob.f.inequality))
             # When there are additional equality or inequality constraints
-            vcat(repeat(prob.lcons, N), zeros(T, M + (N - 1)*3))
+            vcat(repeat(prob.lcons, N), zeros(T, M + (N - 1)*length_f_prototype))
         else
             lcons_length = length(prob.lcons)
             vcat(prob.lcons, zeros(T, N*M - lcons_length))
@@ -642,9 +643,10 @@ end
     ucons = if isnothing(prob.ucons)
         zeros(T, N*M) #TODO: handle carefully when NLLS
     else
+        length_f_prototype = length(prob.f.f_prototype)
         if !(isnothing(prob.f.equality) && isnothing(prob.f.inequality))
             # When there are additional equality or inequality constraints
-            vcat(repeat(prob.ucons, N), zeros(T, M + (N - 1)*3))
+            vcat(repeat(prob.ucons, N), zeros(T, M + (N - 1)*length_f_prototype))
         else
             ucons_length = length(prob.ucons)
             vcat(prob.ucons, zeros(T, N*M - ucons_length))

From 18e6dd9f7c484aa5a9e241bf333df79e7ce5c26f Mon Sep 17 00:00:00 2001
From: Qingyu Qu <2283984853@qq.com>
Date: Mon, 17 Nov 2025 14:44:11 +0800
Subject: [PATCH 07/11] MIRK for dynamic optimization done

---
 docs/src/tutorials/optimal_control.md         |  33 +-
 lib/BoundaryValueDiffEqCore/src/utils.jl      | 151 +++----
 .../src/BoundaryValueDiffEqFIRK.jl            |   2 +-
 .../src/collocation.jl                        | 102 ++++-
 lib/BoundaryValueDiffEqFIRK/src/firk.jl       | 390 ++++++++++++++---
 .../src/BoundaryValueDiffEqMIRK.jl            |   3 +-
 .../src/collocation.jl                        |  31 +-
 lib/BoundaryValueDiffEqMIRK/src/mirk.jl       | 391 +++++++-----------
 .../test/dynamic_optimization_tests.jl        |  57 +++
 9 files changed, 715 insertions(+), 445 deletions(-)
 create mode 100644 lib/BoundaryValueDiffEqMIRK/test/dynamic_optimization_tests.jl

diff --git a/docs/src/tutorials/optimal_control.md b/docs/src/tutorials/optimal_control.md
index f196fec52..decfcb6e6 100644
--- a/docs/src/tutorials/optimal_control.md
+++ b/docs/src/tutorials/optimal_control.md
@@ -52,7 +52,7 @@ $$
 Similar solving for such optimal control problem can be found on JuMP.jl and InfiniteOpt.jl. The detailed parameters are taken from [COPS](https://www.mcs.anl.gov/%7Emore/cops/cops3.pdf).
 
 ```julia
-using BoundaryValueDiffEqMIRK, OptimizationMOI, Ipopt
+using BoundaryValueDiffEqMIRK, OptimizationIpopt, Plots
 h_0 = 1                      # Initial height
 v_0 = 0                      # Initial velocity
 m_0 = 1.0                    # Initial mass
@@ -79,23 +79,26 @@ end
 function rocket_launch_bc!(res, u, p, t)
     res[1] = u(0.0)[1] - v_0
     res[2] = u(0.0)[2] - h_0
-    res[3] = u(0.2)[3] - m_T
+    res[3] = u(0.0)[3] - m_0
     res[4] = u(0.2)[4] - 0.0
 end
-function constraints!(res, u, p)
-    res[1] = u[1]
-    res[2] = u[2]
-    res[3] = u[3]
-    res[4] = u[4]
-end
 cost_fun(u, p) = -u[end - 2] #Final altitude x_h. To minimize, only temporary, need to use temporary solution interpolation here similar to what we do in boundary condition evaluations.
-#cost_fun(sol, p) = -sol(0.2)[2]
-u0 = [v_0, h_0, m_0, 0.0]
-rocket_launch_fun = BVPFunction(rocket_launch!, rocket_launch_bc!; cost = cost_fun,
-    inequality = constraints!, f_prototype = zeros(3))
-rocket_launch_prob = BVProblem(rocket_launch_fun, u0, tspan; lcons = [0.0, h_0, m_T, 0.0],
-    ucons = [Inf, Inf, m_0, u_t_max])
-sol = solve(rocket_launch_prob, MIRK4(; optimize = Ipopt.Optimizer()); dt = 0.002)
+u0 = [v_0, h_0, m_T, 3.0]
+rocket_launch_fun = BVPFunction(rocket_launch!, rocket_launch_bc!; cost = cost_fun, f_prototype = zeros(3))
+rocket_launch_prob = BVProblem(
+    rocket_launch_fun, u0, tspan; lb = [0.0, h_0, m_T, 0.0], ub = [Inf, Inf, m_0, u_t_max])
+sol = solve(rocket_launch_prob, MIRK4(; optimize = Ipopt.Optimizer()); dt = Δt, adaptive = false)
+
+u = reduce(hcat, sol.u)
+v, h, m, c = u[1, :], u[2, :], u[3, :], u[4, :]
+
+# Plot the solution
+p1 = plot(sol.t, v, xlabel = "Time", ylabel = "Velocity", legend = false)
+p2 = plot(sol.t, h, xlabel = "Time", ylabel = "Altitude", legend = false)
+p3 = plot(sol.t, m, xlabel = "Time", ylabel = "Mass", legend = false)
+p4 = plot(sol.t, c, xlabel = "Time", ylabel = "Thrust", legend = false)
+
+plot(p1, p2, p3, p4, layout = (2, 2))
 ```
 
 Similar optimal control problem solving can also be deployed in JuMP.jl and InfiniteOpt.jl.
diff --git a/lib/BoundaryValueDiffEqCore/src/utils.jl b/lib/BoundaryValueDiffEqCore/src/utils.jl
index a059ca958..bbe3cd044 100644
--- a/lib/BoundaryValueDiffEqCore/src/utils.jl
+++ b/lib/BoundaryValueDiffEqCore/src/utils.jl
@@ -627,42 +627,48 @@ end
 @inline __default_cost(f) = f
 @inline __default_cost(fun::BVPFunction) = __default_cost(fun.cost)
 
-@inline function __extract_lcons_ucons(prob::AbstractBVProblem, ::Type{T}, M, N) where {T}
+@inline function __extract_lcons_ucons(
+        prob::AbstractBVProblem, ::Type{T}, M, N, bcresid_prototype, f_prototype) where {T}
+    L_f_prototype = length(f_prototype)
+    L_bcresid_prototype = length(bcresid_prototype)
     lcons = if isnothing(prob.lcons)
-        zeros(T, N*M) #TODO: handle carefully when NLLS
+        zeros(T, L_bcresid_prototype + (N - 1)*L_f_prototype)
     else
-        length_f_prototype = length(prob.f.f_prototype)
-        if !(isnothing(prob.f.equality) && isnothing(prob.f.inequality))
-            # When there are additional equality or inequality constraints
-            vcat(repeat(prob.lcons, N), zeros(T, M + (N - 1)*length_f_prototype))
-        else
-            lcons_length = length(prob.lcons)
-            vcat(prob.lcons, zeros(T, N*M - lcons_length))
-        end
+        lcons_length = length(prob.lcons)
+        vcat(prob.lcons, zeros(T, N*M - lcons_length))
     end
     ucons = if isnothing(prob.ucons)
-        zeros(T, N*M) #TODO: handle carefully when NLLS
+        zeros(T, L_bcresid_prototype + (N - 1)*L_f_prototype)
     else
-        length_f_prototype = length(prob.f.f_prototype)
-        if !(isnothing(prob.f.equality) && isnothing(prob.f.inequality))
-            # When there are additional equality or inequality constraints
-            vcat(repeat(prob.ucons, N), zeros(T, M + (N - 1)*length_f_prototype))
-        else
-            ucons_length = length(prob.ucons)
-            vcat(prob.ucons, zeros(T, N*M - ucons_length))
-        end
+        ucons_length = length(prob.ucons)
+        vcat(prob.ucons, zeros(T, N*M - ucons_length))
     end
     return lcons, ucons
 end
 
+@inline function __extract_lb_ub(prob::AbstractBVProblem, ::Type{T}, M, N) where {T}
+    lb = if isnothing(prob.lb)
+        nothing
+    else
+        repeat(prob.lb, N)
+    end
+    ub = if isnothing(prob.ub)
+        nothing
+    else
+        repeat(prob.ub, N)
+    end
+    return lb, ub
+end
+
 """
     __construct_internal_problem
 
 Constructs the internal problem based on the type of the boundary value problem and the
 algorithm used. It returns either a `NonlinearProblem` or an `OptimizationProblem`.
 """
-function __construct_internal_problem(prob, pt::StandardBVProblem, alg, loss, jac,
-        jac_prototype, resid_prototype, y, p, M::Int, N::Int)
+function __construct_internal_problem(
+        prob, pt::StandardBVProblem, alg, loss, jac, jac_prototype,
+        resid_prototype, bcresid_prototype, f_prototype, y, p, M::Int, N::Int)
     T = eltype(y)
     iip = SciMLBase.isinplace(prob)
     if !isnothing(alg.nlsolve) || (isnothing(alg.nlsolve) && isnothing(alg.optimize))
@@ -675,15 +681,18 @@ function __construct_internal_problem(prob, pt::StandardBVProblem, alg, loss, ja
                 sparsity_detector = __default_sparsity_detector(alg.jac_alg.diffmode)),
             cons = loss,
             cons_j = jac,
-            cons_jac_prototype = jac_prototype)
-        lcons, ucons = __extract_lcons_ucons(prob, T, M, N)
+            cons_jac_prototype = sparse(jac_prototype))
+        lcons, ucons = __extract_lcons_ucons(prob, T, M, N, bcresid_prototype, f_prototype)
+        lb, ub = __extract_lb_ub(prob, T, M, N)
+
         return __internal_optimization_problem(
-            prob, optf, y, p; lcons = lcons, ucons = ucons)
+            prob, optf, y, p; lcons = lcons, ucons = ucons, lb = lb, ub = ub)
     end
 end
 
-function __construct_internal_problem(prob, pt::TwoPointBVProblem, alg, loss, jac,
-        jac_prototype, resid_prototype, y, p, M::Int, N::Int)
+function __construct_internal_problem(
+        prob, pt::TwoPointBVProblem, alg, loss, jac, jac_prototype,
+        resid_prototype, bcresid_prototype, f_prototype, y, p, M::Int, N::Int)
     T = eltype(y)
     iip = SciMLBase.isinplace(prob)
     if !isnothing(alg.nlsolve) || (isnothing(alg.nlsolve) && isnothing(alg.optimize))
@@ -696,17 +705,18 @@ function __construct_internal_problem(prob, pt::TwoPointBVProblem, alg, loss, ja
                 sparsity_detector = __default_sparsity_detector(alg.jac_alg.diffmode)),
             cons = loss,
             cons_j = jac,
-            cons_jac_prototype = jac_prototype)
-        lcons, ucons = __extract_lcons_ucons(prob, T, M, N)
+            cons_jac_prototype = sparse(jac_prototype))
+        lcons, ucons = __extract_lcons_ucons(prob, T, M, N, bcresid_prototype, f_prototype)
+        lb, ub = __extract_lb_ub(prob, T, M, N)
 
         return __internal_optimization_problem(
-            prob, optf, y, p; lcons = lcons, ucons = ucons)
+            prob, optf, y, p; lcons = lcons, ucons = ucons, lb = lb, ub = ub)
     end
 end
 
 # Single shooting use diffmode for StandardBVProblem and TwoPointBVProblem
-function __construct_internal_problem(prob, alg, loss, jac, jac_prototype,
-        resid_prototype, y, p, M::Int, N::Int, ::Nothing)
+function __construct_internal_problem(prob, alg, loss, jac, jac_prototype, resid_prototype,
+        bcresid_prototype, f_prototype, y, p, M::Int, N::Int, ::Nothing)
     T = eltype(y)
     iip = SciMLBase.isinplace(prob)
     if !isnothing(alg.nlsolve) || (isnothing(alg.nlsolve) && isnothing(alg.optimize))
@@ -719,18 +729,19 @@ function __construct_internal_problem(prob, alg, loss, jac, jac_prototype,
                 sparsity_detector = __default_sparsity_detector(alg.jac_alg.diffmode)),
             cons = loss,
             cons_j = jac,
-            cons_jac_prototype = jac_prototype)
-        lcons, ucons = __extract_lcons_ucons(prob, T, M, N)
+            cons_jac_prototype = sparse(jac_prototype))
+        lcons, ucons = __extract_lcons_ucons(prob, T, M, N, bcresid_prototype, f_prototype)
+        lb, ub = __extract_lb_ub(prob, T, M, N)
 
         return __internal_optimization_problem(
-            prob, optf, y, p; lcons = lcons, ucons = ucons)
+            prob, optf, y, p; lcons = lcons, ucons = ucons, lb = lb, ub = ub)
     end
 end
 
 # Multiple shooting always use inplace version internal problem constructor
 function __construct_internal_problem(
-        prob, pt::StandardBVProblem, alg, loss, jac, jac_prototype,
-        resid_prototype, y, p, M::Int, N::Int, ::Nothing)
+        prob, pt::StandardBVProblem, alg, loss, jac, jac_prototype, resid_prototype,
+        bcresid_prototype, f_prototype, y, p, M::Int, N::Int, ::Nothing)
     T = eltype(y)
     if !isnothing(alg.nlsolve) || (isnothing(alg.nlsolve) && isnothing(alg.optimize))
         nlf = NonlinearFunction{true}(loss; jac = jac, resid_prototype = resid_prototype,
@@ -742,39 +753,17 @@ function __construct_internal_problem(
                 sparsity_detector = __default_sparsity_detector(alg.jac_alg.nonbc_diffmode)),
             cons = loss,
             cons_j = jac,
-            cons_jac_prototype = jac_prototype)
-        lcons, ucons = __extract_lcons_ucons(prob, T, M, N)
+            cons_jac_prototype = sparse(jac_prototype))
+        lcons, ucons = __extract_lcons_ucons(prob, T, M, N, bcresid_prototype, f_prototype)
+        lb, ub = __extract_lb_ub(prob, T, M, N)
 
         return __internal_optimization_problem(
-            prob, optf, y, p; lcons = lcons, ucons = ucons)
+            prob, optf, y, p; lcons = lcons, ucons = ucons, lb = lb, ub = ub)
     end
 end
 function __construct_internal_problem(
-        prob, pt::TwoPointBVProblem, alg, loss, jac, jac_prototype,
-        resid_prototype, y, p, M::Int, N::Int, ::Nothing)
-    T = eltype(y)
-    if !isnothing(alg.nlsolve) || (isnothing(alg.nlsolve) && isnothing(alg.optimize))
-        nlf = NonlinearFunction{true}(loss; jac = jac, resid_prototype = resid_prototype,
-            jac_prototype = jac_prototype)
-        return __internal_nlsolve_problem(prob, resid_prototype, y, nlf, y, p)
-    else
-        optf = OptimizationFunction{true}(__default_cost(prob.f),
-            AutoSparse(get_dense_ad(alg.jac_alg.diffmode),
-                sparsity_detector = __default_sparsity_detector(alg.jac_alg.nonbc_diffmode)),
-            cons = loss,
-            cons_j = jac,
-            cons_jac_prototype = jac_prototype)
-        lcons, ucons = __extract_lcons_ucons(prob, T, M, N)
-
-        return __internal_optimization_problem(
-            prob, optf, y, p; lcons = lcons, ucons = ucons)
-    end
-end
-
-# Second order BVProblem
-function __construct_internal_problem(
-        prob, pt::StandardSecondOrderBVProblem, alg, loss, jac,
-        jac_prototype, resid_prototype, y, p, M::Int, N::Int)
+        prob, pt::TwoPointBVProblem, alg, loss, jac, jac_prototype, resid_prototype,
+        bcresid_prototype, f_prototype, y, p, M::Int, N::Int, ::Nothing)
     T = eltype(y)
     iip = SciMLBase.isinplace(prob)
     if !isnothing(alg.nlsolve) || (isnothing(alg.nlsolve) && isnothing(alg.optimize))
@@ -782,38 +771,16 @@ function __construct_internal_problem(
             jac_prototype = jac_prototype)
         return __internal_nlsolve_problem(prob, resid_prototype, y, nlf, y, p)
     else
-        optf = OptimizationFunction{iip}(__default_cost(prob.f.f),
-            AutoSparse(get_dense_ad(alg.jac_alg.nonbc_diffmode),
-                sparsity_detector = __default_sparsity_detector(alg.jac_alg.nonbc_diffmode)),
-            cons = loss,
-            cons_j = jac,
-            cons_jac_prototype = jac_prototype)
-        lcons, ucons = __extract_lcons_ucons(prob, T, M, N)
-        return __internal_optimization_problem(
-            prob, optf, y, p; lcons = lcons, ucons = ucons)
-    end
-end
-
-# Two point BVProblem
-function __construct_internal_problem(
-        prob, pt::TwoPointSecondOrderBVProblem, alg, loss, jac,
-        jac_prototype, resid_prototype, y, p, M::Int, N::Int)
-    T = eltype(y)
-    iip = SciMLBase.isinplace(prob)
-    if !isnothing(alg.nlsolve) || (isnothing(alg.nlsolve) && isnothing(alg.optimize))
-        nlf = NonlinearFunction{iip}(loss; jac = jac, resid_prototype = resid_prototype,
-            jac_prototype = jac_prototype)
-        return __internal_nlsolve_problem(prob, resid_prototype, y, nlf, y, p)
-    else
-        optf = OptimizationFunction{iip}(__default_cost(prob.f.f),
+        optf = OptimizationFunction{true}(__default_cost(prob.f),
             AutoSparse(get_dense_ad(alg.jac_alg.diffmode),
-                sparsity_detector = __default_sparsity_detector(alg.jac_alg.diffmode)),
+                sparsity_detector = __default_sparsity_detector(alg.jac_alg.nonbc_diffmode)),
             cons = loss,
             cons_j = jac,
-            cons_jac_prototype = jac_prototype)
-        lcons, ucons = __extract_lcons_ucons(prob, T, M, N)
+            cons_jac_prototype = sparse(jac_prototype))
+        lcons, ucons = __extract_lcons_ucons(prob, T, M, N, bcresid_prototype, f_prototype)
+        lb, ub = __extract_lb_ub(prob, T, M, N)
 
         return __internal_optimization_problem(
-            prob, optf, y, p; lcons = lcons, ucons = ucons)
+            prob, optf, y, p; lcons = lcons, ucons = ucons, lb = lb, ub = ub)
     end
 end
diff --git a/lib/BoundaryValueDiffEqFIRK/src/BoundaryValueDiffEqFIRK.jl b/lib/BoundaryValueDiffEqFIRK/src/BoundaryValueDiffEqFIRK.jl
index f836f7917..2603f0c45 100644
--- a/lib/BoundaryValueDiffEqFIRK/src/BoundaryValueDiffEqFIRK.jl
+++ b/lib/BoundaryValueDiffEqFIRK/src/BoundaryValueDiffEqFIRK.jl
@@ -24,7 +24,7 @@ using BoundaryValueDiffEqCore: AbstractBoundaryValueDiffEqAlgorithm,
                                __initial_guess_on_mesh, __flatten_initial_guess,
                                __build_solution, __Fix3, __split_kwargs, _sparse_like,
                                get_dense_ad, __internal_optimization_problem,
-                               __internal_solve
+                               __internal_solve, __default_sparsity_detector
 
 using ConcreteStructs: @concrete
 using DiffEqBase: DiffEqBase
diff --git a/lib/BoundaryValueDiffEqFIRK/src/collocation.jl b/lib/BoundaryValueDiffEqFIRK/src/collocation.jl
index bbdcf1350..8317e4d6c 100644
--- a/lib/BoundaryValueDiffEqFIRK/src/collocation.jl
+++ b/lib/BoundaryValueDiffEqFIRK/src/collocation.jl
@@ -1,15 +1,58 @@
-function Φ!(residual, cache::FIRKCacheExpand, y, u, trait)
-    return Φ!(residual, cache.fᵢ_cache, cache.k_discrete, cache.f, cache.TU,
-        y, u, cache.p, cache.mesh, cache.mesh_dt, cache.stage, trait)
+function Φ!(residual, cache::FIRKCacheExpand, y, u, trait, constraint)
+    return Φ!(residual, cache.fᵢ_cache, cache.k_discrete, cache.f, cache.TU, y, u, cache.p,
+        cache.mesh, cache.mesh_dt, cache.stage, cache.f_prototype, trait, constraint)
 end
 
-function Φ!(residual, cache::FIRKCacheNested, y, u, trait)
-    return Φ!(residual, cache.fᵢ_cache, cache.k_discrete, cache.f, cache.TU, y,
-        u, cache.p, cache.mesh, cache.mesh_dt, cache.stage, cache, trait)
+function Φ!(residual, cache::FIRKCacheNested, y, u, trait, constraint)
+    return Φ!(residual, cache.fᵢ_cache, cache.k_discrete, cache.f, cache.TU, y, u,
+        cache.p, cache.mesh, cache.mesh_dt, cache.stage, cache, trait, constraint)
 end
 
-@views function Φ!(residual, fᵢ_cache, k_discrete, f!, TU::FIRKTableau{false},
-        y, u, p, mesh, mesh_dt, stage::Int, ::DiffCacheNeeded)
+@views function Φ!(residual, fᵢ_cache, k_discrete, f!, TU::FIRKTableau{false}, y, u, p,
+        mesh, mesh_dt, stage::Int, f_prototype, ::DiffCacheNeeded, ::Val{true})
+    (; c, a, b) = TU
+    L_f_prototype = length(f_prototype)
+    tmp1,
+    tmpu = get_tmp(fᵢ_cache, u)[1:L_f_prototype],
+    get_tmp(fᵢ_cache, u)[(f_prototype + 1):end]
+
+    K = get_tmp(k_discrete[1], u) # Not optimal # TODO
+    T = eltype(u)
+    ctr = 1
+
+    for i in eachindex(mesh_dt)
+        h = mesh_dt[i]
+        yᵢ = get_tmp(y[ctr], u)
+        yᵢ₊₁ = get_tmp(y[ctr + stage + 1], u)
+
+        yᵢ, uᵢ = yᵢ[1:L_f_prototype], yᵢ[(L_f_prototype + 1):end]
+        yᵢ₊₁, uᵢ₊₁ = yᵢ₊₁[1:L_f_prototype], yᵢ₊₁[(L_f_prototype + 1):end]
+
+        # Load interpolation residual
+        for j in 1:stage
+            tmp = get_tmp(y[ctr + j], u)
+            K[:, j] = tmp[1:3]
+        end
+
+        # Update interpolation residual
+        for r in 1:stage
+            @. tmp1 = yᵢ
+            @. tmpu = uᵢ
+            __maybe_matmul!(tmp1, K, a[:, r], h, T(1))
+            f!(residual[ctr + r], vcat(tmp1, tmpu), p, mesh[i] + c[r] * h)
+            residual[ctr + r] .-= K[:, r]
+        end
+
+        # Update mesh point residual
+        residᵢ = residual[ctr]
+        @. residᵢ = yᵢ₊₁ - yᵢ
+        __maybe_matmul!(residᵢ, K, b, -h, T(1))
+        ctr += stage + 1
+    end
+end
+
+@views function Φ!(residual, fᵢ_cache, k_discrete, f!, TU::FIRKTableau{false}, y, u, p,
+        mesh, mesh_dt, stage::Int, f_prototype, ::DiffCacheNeeded, ::Val{false})
     (; c, a, b) = TU
     tmp1 = get_tmp(fᵢ_cache, u)
     K = get_tmp(k_discrete[1], u) # Not optimal # TODO
@@ -42,8 +85,9 @@ end
     end
 end
 
-@views function Φ!(residual, fᵢ_cache, k_discrete, f!, TU::FIRKTableau{false},
-        y, u, p, mesh, mesh_dt, stage::Int, ::NoDiffCacheNeeded)
+@views function Φ!(
+        residual, fᵢ_cache, k_discrete, f!, TU::FIRKTableau{false}, y, u, p, mesh,
+        mesh_dt, stage::Int, f_prototype, ::NoDiffCacheNeeded, ::Val{false})
     (; c, a, b) = TU
     tmp1 = similar(fᵢ_cache)
     K = similar(k_discrete[1])
@@ -114,8 +158,38 @@ function FIRK_nlsolve(K, p_nlsolve, f!, TU::FIRKTableau{true}, p_f!)
     return res
 end
 
-@views function Φ!(residual, fᵢ_cache, k_discrete, f!, TU::FIRKTableau{true}, y,
-        u, p, mesh, mesh_dt, stage::Int, cache, ::DiffCacheNeeded)
+@views function Φ!(residual, fᵢ_cache, k_discrete, f!, TU::FIRKTableau{true}, y, u, p,
+        mesh, mesh_dt, stage::Int, cache, ::DiffCacheNeeded, ::Val{true})
+    (; b) = TU
+    (; nest_prob, alg) = cache
+
+    T = eltype(u)
+    nestprob_p = vcat(T(mesh[1]), T(mesh_dt[1]), get_tmp(y[1], u))
+    nest_nlsolve_alg = __concrete_solve_algorithm(nest_prob, alg.nlsolve)
+
+    for i in eachindex(k_discrete)
+        residᵢ = residual[i]
+        h = mesh_dt[i]
+
+        yᵢ = get_tmp(y[i], u)
+        yᵢ₊₁ = get_tmp(y[i + 1], u)
+
+        nestprob_p[1] = T(mesh[i])
+        nestprob_p[2] = T(mesh_dt[i])
+        nestprob_p[3:end] = yᵢ
+
+        K = get_tmp(k_discrete[i], u)
+
+        _nestprob = remake(nest_prob, p = nestprob_p)
+        nestsol = __solve(_nestprob, nest_nlsolve_alg; alg.nested_nlsolve_kwargs...)
+        @. K = nestsol.u
+        @. residᵢ = yᵢ₊₁ - yᵢ
+        __maybe_matmul!(residᵢ, nestsol.u, b, -h, T(1))
+    end
+end
+
+@views function Φ!(residual, fᵢ_cache, k_discrete, f!, TU::FIRKTableau{true}, y, u, p,
+        mesh, mesh_dt, stage::Int, cache, ::DiffCacheNeeded, ::Val{false})
     (; b) = TU
     (; nest_prob, alg) = cache
 
@@ -144,8 +218,8 @@ end
     end
 end
 
-@views function Φ!(residual, fᵢ_cache, k_discrete, f!, TU::FIRKTableau{true}, y,
-        u, p, mesh, mesh_dt, stage::Int, cache, ::NoDiffCacheNeeded)
+@views function Φ!(residual, fᵢ_cache, k_discrete, f!, TU::FIRKTableau{true}, y, u, p,
+        mesh, mesh_dt, stage::Int, cache, ::NoDiffCacheNeeded, ::Val{false})
     (; b) = TU
     (; nest_prob, alg) = cache
 
diff --git a/lib/BoundaryValueDiffEqFIRK/src/firk.jl b/lib/BoundaryValueDiffEqFIRK/src/firk.jl
index f74327e74..4993840fa 100644
--- a/lib/BoundaryValueDiffEqFIRK/src/firk.jl
+++ b/lib/BoundaryValueDiffEqFIRK/src/firk.jl
@@ -12,6 +12,7 @@
     alg                        # FIRK methods
     TU                         # FIRK Tableau
     ITU                        # FIRK Interpolation Tableau
+    f_prototype
     bcresid_prototype
     # Everything below gets resized in adaptive methods
     mesh                       # Discrete mesh
@@ -47,6 +48,7 @@ Base.eltype(::FIRKCacheNested{iip, T}) where {iip, T} = T
     alg                        # FIRK methods
     TU                         # FIRK Tableau
     ITU                        # FIRK Interpolation Tableau
+    f_prototype
     bcresid_prototype
     # Everything below gets resized in adaptive methods
     mesh                       # Discrete mesh
@@ -113,6 +115,10 @@ function init_nested(
     end
     diffcache = __cache_trait(alg.jac_alg)
     fit_parameters = haskey(prob.kwargs, :fit_parameters)
+    constraint = (!isnothing(prob.f.inequality)) ||
+                 (!isnothing(prob.f.equality)) ||
+                 (!isnothing(prob.lb)) ||
+                 (!isnothing(prob.ub))
 
     t₀, t₁ = prob.tspan
     ig, T,
@@ -134,17 +140,34 @@ function init_nested(
     y = __alloc.(copy.(y₀.u))
     TU, ITU = constructRK(alg, T)
     stage = alg_stage(alg)
+    f_prototype = isnothing(prob.f.f_prototype) ? nothing : __vec(prob.f.f_prototype)
+    L_f_prototype = isnothing(f_prototype) ? M : length(f_prototype)
 
-    k_discrete = [__maybe_allocate_diffcache(safe_similar(X, M, stage), chunksize, alg.jac_alg)
-                  for _ in 1:Nig]
+    k_discrete = if !constraint
+        [__maybe_allocate_diffcache(safe_similar(X, M, stage), chunksize, alg.jac_alg)
+         for _ in 1:Nig]
+    else
+        [__maybe_allocate_diffcache(safe_similar(X, L_f_prototype, stage), chunksize, alg.jac_alg)
+         for _ in 1:Nig]
+    end
 
     bcresid_prototype, resid₁_size = __get_bcresid_prototype(prob.problem_type, prob, X)
 
     residual = if iip
-        if prob.problem_type isa TwoPointBVProblem
-            vcat([__alloc(__vec(bcresid_prototype))], __alloc.(copy.(@view(y₀.u[2:end]))))
+        if !constraint
+            if prob.problem_type isa TwoPointBVProblem
+                vcat([__alloc(__vec(bcresid_prototype))], __alloc.(copy.(@view(y₀.u[2:end]))))
+            else
+                vcat([__alloc(bcresid_prototype)], __alloc.(copy.(@view(y₀.u[2:end]))))
+            end
         else
-            vcat([__alloc(bcresid_prototype)], __alloc.(copy.(@view(y₀.u[2:end]))))
+            if prob.problem_type isa TwoPointBVProblem
+                vcat([__alloc(__vec(bcresid_prototype))],
+                    __alloc.(copy.([f_prototype for _ in 1:length(y₀.u[2:end])])))
+            else
+                vcat([__alloc(bcresid_prototype)],
+                    __alloc.(copy.([f_prototype for _ in 1:length(y₀.u[2:end])])))
+            end
         end
     else
         nothing
@@ -205,9 +228,9 @@ function init_nested(
 
     return FIRKCacheNested{iip, T, typeof(diffcache), fit_parameters}(
         alg_order(alg), stage, M, size(X), f, bc, prob_, prob.problem_type, prob.p,
-        alg, TU, ITU, bcresid_prototype, mesh, mesh_dt, k_discrete, y, y₀, residual,
-        fᵢ_cache, fᵢ₂_cache, defect, nestprob, resid₁_size, nlsolve_kwargs,
-        optimize_kwargs, (; abstol, dt, adaptive, controller, kwargs...))
+        alg, TU, ITU, f_prototype, bcresid_prototype, mesh, mesh_dt, k_discrete,
+        y, y₀, residual, fᵢ_cache, fᵢ₂_cache, defect, nestprob, resid₁_size,
+        nlsolve_kwargs, optimize_kwargs, (; abstol, dt, adaptive, controller, kwargs...))
 end
 
 function init_expanded(
@@ -222,6 +245,10 @@ function init_expanded(
     end
     diffcache = __cache_trait(alg.jac_alg)
     fit_parameters = haskey(prob.kwargs, :fit_parameters)
+    constraint = (!isnothing(prob.f.inequality)) ||
+                 (!isnothing(prob.f.equality)) ||
+                 (!isnothing(prob.lb)) ||
+                 (!isnothing(prob.ub))
 
     t₀, t₁ = prob.tspan
     ig, T,
@@ -233,6 +260,8 @@ function init_expanded(
 
     TU, ITU = constructRK(alg, T)
     stage = alg_stage(alg)
+    f_prototype = isnothing(prob.f.f_prototype) ? nothing : __vec(prob.f.f_prototype)
+    L_f_prototype = isnothing(f_prototype) ? M : length(f_prototype)
 
     chunksize = pickchunksize(M + M * Nig * (stage + 1))
     __alloc = @closure x -> __maybe_allocate_diffcache(vec(x), chunksize, alg.jac_alg)
@@ -245,16 +274,31 @@ function init_expanded(
     y₀ = extend_y(_y₀, Nig + 1, stage)
     y = __alloc.(copy.(y₀.u)) # Runtime dispatch
 
-    k_discrete = [__maybe_allocate_diffcache(safe_similar(X, M, stage), chunksize, alg.jac_alg)
-                  for _ in 1:Nig] # Runtime dispatch
+    k_discrete = if !constraint
+        [__maybe_allocate_diffcache(safe_similar(X, M, stage), chunksize, alg.jac_alg)
+         for _ in 1:Nig] # Runtime dispatch
+    else
+        [__maybe_allocate_diffcache(safe_similar(X, L_f_prototype, stage), chunksize, alg.jac_alg)
+         for _ in 1:Nig] # Runtime dispatch
+    end
 
     bcresid_prototype, resid₁_size = __get_bcresid_prototype(prob.problem_type, prob, X)
 
     residual = if iip
-        if prob.problem_type isa TwoPointBVProblem
-            vcat([__alloc(__vec(bcresid_prototype))], __alloc.(copy.(@view(y₀.u[2:end]))))
+        if !constraint
+            if prob.problem_type isa TwoPointBVProblem
+                vcat([__alloc(__vec(bcresid_prototype))], __alloc.(copy.(@view(y₀.u[2:end]))))
+            else
+                vcat([__alloc(bcresid_prototype)], __alloc.(copy.(@view(y₀.u[2:end]))))
+            end
         else
-            vcat([__alloc(bcresid_prototype)], __alloc.(copy.(@view(y₀.u[2:end]))))
+            if prob.problem_type isa TwoPointBVProblem
+                vcat([__alloc(__vec(bcresid_prototype))],
+                    __alloc.(copy.([f_prototype for _ in 1:length(y₀.u[2:end])])))
+            else
+                vcat([__alloc(bcresid_prototype)],
+                    __alloc.(copy.([f_prototype for _ in 1:length(y₀.u[2:end])])))
+            end
         end
     else
         nothing
@@ -303,9 +347,9 @@ function init_expanded(
     prob_ = !(prob.u0 isa AbstractArray) ? remake(prob; u0 = X) : prob
 
     return FIRKCacheExpand{iip, T, typeof(diffcache), fit_parameters}(
-        alg_order(alg), stage, M, size(X), f, bc, prob_, prob.problem_type,
-        prob.p, alg, TU, ITU, bcresid_prototype, mesh, mesh_dt, k_discrete, y,
-        y₀, residual, fᵢ_cache, fᵢ₂_cache, defect, resid₁_size, nlsolve_kwargs,
+        alg_order(alg), stage, M, size(X), f, bc, prob_, prob.problem_type, prob.p,
+        alg, TU, ITU, f_prototype, bcresid_prototype, mesh, mesh_dt, k_discrete,
+        y, y₀, residual, fᵢ_cache, fᵢ₂_cache, defect, resid₁_size, nlsolve_kwargs,
         optimize_kwargs, (; abstol, dt, adaptive, controller, kwargs...))
 end
 
@@ -456,6 +500,16 @@ end
 # Constructing the Nonlinear Problem
 function __construct_problem(cache::Union{FIRKCacheNested{iip}, FIRKCacheExpand{iip}},
         y::AbstractVector, y₀::AbstractVectorOfArray) where {iip}
+    constraint = (!isnothing(cache.prob.f.inequality)) ||
+                 (!isnothing(cache.prob.f.equality)) ||
+                 (!isnothing(cache.prob.lb)) ||
+                 (!isnothing(cache.prob.ub))
+    return __construct_problem(cache, y, y₀, Val(constraint))
+end
+
+# Constructing the Nonlinear Problem
+function __construct_problem(cache::Union{FIRKCacheNested{iip}, FIRKCacheExpand{iip}},
+        y::AbstractVector, y₀::AbstractVectorOfArray, constraint) where {iip}
     pt = cache.problem_type
     (; jac_alg) = cache.alg
 
@@ -476,7 +530,7 @@ function __construct_problem(cache::Union{FIRKCacheNested{iip}, FIRKCacheExpand{
         @closure (du,
             u,
             p) -> __firk_loss_collocation!(
-            du, u, p, cache.y, cache.mesh, cache.residual, cache, trait)
+            du, u, p, cache.y, cache.mesh, cache.residual, cache, trait, constraint)
     else
         @closure (u,
             p) -> __firk_loss_collocation(
@@ -487,7 +541,7 @@ function __construct_problem(cache::Union{FIRKCacheNested{iip}, FIRKCacheExpand{
         @closure (du,
             u,
             p) -> __firk_loss!(du, u, p, cache.y, pt, cache.bc, cache.residual,
-            cache.mesh, cache, eval_sol, trait)
+            cache.mesh, cache, eval_sol, trait, constraint)
     else
         @closure (u,
             p) -> __firk_loss(
@@ -497,17 +551,81 @@ function __construct_problem(cache::Union{FIRKCacheNested{iip}, FIRKCacheExpand{
     if !isnothing(cache.alg.optimize)
         loss = @closure (du,
             u,
-            p) -> __firk_loss!(
-            du, u, p, cache.y, pt, cache.bc, cache.residual, cache.mesh, cache, trait)
+            p) -> __firk_loss!(du, u, p, cache.y, pt, cache.bc, cache.residual,
+            cache.bcresid_prototype, cache.mesh, cache, eval_sol, trait, constraint)
+    end
+
+    return __construct_problem(cache, y, loss_bc, loss_collocation, loss, pt, constraint)
+end
+
+function __construct_problem(
+        cache::FIRKCacheExpand{iip}, y, loss_bc::BC, loss_collocation::C,
+        loss::LF, ::StandardBVProblem, ::Val{true}) where {iip, BC, C, LF}
+    (; alg, stage, bcresid_prototype, f_prototype) = cache
+    (; jac_alg) = alg
+    (; bc_diffmode) = jac_alg
+    N = length(cache.mesh)
+
+    resid_bc = cache.bcresid_prototype
+    L = length(resid_bc)
+    L_f_prototype = length(f_prototype)
+    resid_collocation = safe_similar(y, L_f_prototype * (N - 1) * (stage + 1))
+
+    cache_bc = if iip
+        DI.prepare_jacobian(loss_bc, resid_bc, bc_diffmode, y, Constant(cache.p))
+    else
+        DI.prepare_jacobian(loss_bc, bc_diffmode, y, Constant(cache.p))
+    end
+
+    nonbc_diffmode = AutoSparse(get_dense_ad(jac_alg.nonbc_diffmode),
+        sparsity_detector = __default_sparsity_detector(jac_alg.nonbc_diffmode),
+        coloring_algorithm = __default_coloring_algorithm(jac_alg.nonbc_diffmode))
+
+    cache_collocation = if iip
+        DI.prepare_jacobian(
+            loss_collocation, resid_collocation, nonbc_diffmode, y, Constant(cache.p))
+    else
+        DI.prepare_jacobian(loss_collocation, nonbc_diffmode, y, Constant(cache.p))
+    end
+
+    J_bc = if iip
+        DI.jacobian(loss_bc, resid_bc, cache_bc, bc_diffmode, y, Constant(cache.p))
+    else
+        DI.jacobian(loss_bc, cache_bc, bc_diffmode, y, Constant(cache.p))
+    end
+    J_c = if iip
+        DI.jacobian(loss_collocation, resid_collocation, cache_collocation,
+            nonbc_diffmode, y, Constant(cache.p))
+    else
+        DI.jacobian(
+            loss_collocation, cache_collocation, nonbc_diffmode, y, Constant(cache.p))
+    end
+
+    jac_prototype = vcat(J_bc, J_c)
+    jac = if iip
+        @closure (J,
+            u,
+            p) -> __firk_mpoint_jacobian!(
+            J, J_c, u, bc_diffmode, nonbc_diffmode, cache_bc, cache_collocation,
+            loss_bc, loss_collocation, resid_bc, resid_collocation, L, cache.p)
+    else
+        @closure (u,
+            p) -> __firk_mpoint_jacobian(
+            jac_prototype, J_c, u, bc_diffmode, nonbc_diffmode, cache_bc,
+            cache_collocation, loss_bc, loss_collocation, L, cache.p)
     end
 
-    return __construct_problem(cache, y, loss_bc, loss_collocation, loss, pt)
+    resid_prototype = vcat(resid_bc, resid_collocation)
+    return __construct_internal_problem(
+        cache.prob, cache.problem_type, cache.alg, loss, jac,
+        jac_prototype, resid_prototype, bcresid_prototype,
+        f_prototype, y, cache.p, cache.M, (N - 1) * (stage + 1) + 1)
 end
 
 function __construct_problem(
         cache::FIRKCacheExpand{iip}, y, loss_bc::BC, loss_collocation::C,
-        loss::LF, ::StandardBVProblem) where {iip, BC, C, LF}
-    (; alg, stage) = cache
+        loss::LF, ::StandardBVProblem, ::Val{false}) where {iip, BC, C, LF}
+    (; alg, stage, bcresid_prototype, f_prototype) = cache
     (; jac_alg) = alg
     (; bc_diffmode) = jac_alg
     N = length(cache.mesh)
@@ -586,15 +704,66 @@ function __construct_problem(
 
     resid_prototype = vcat(resid_bc, resid_collocation)
     return __construct_internal_problem(
-        cache.prob, cache.problem_type, cache.alg, loss, jac, jac_prototype,
-        resid_prototype, y, cache.p, cache.M, (N - 1) * (stage + 1) + 1)
+        cache.prob, cache.problem_type, cache.alg, loss, jac,
+        jac_prototype, resid_prototype, bcresid_prototype,
+        f_prototype, y, cache.p, cache.M, (N - 1) * (stage + 1) + 1)
+end
+
+function __construct_problem(
+        cache::FIRKCacheExpand{iip}, y, loss_bc::BC, loss_collocation::C,
+        loss::LF, ::TwoPointBVProblem, ::Val{true}) where {iip, BC, C, LF}
+    (; jac_alg) = cache.alg
+    (; stage, bcresid_prototype, f_prototype) = cache
+    N = length(cache.mesh)
+
+    resid_collocation = safe_similar(y, cache.M * (N - 1) * (stage + 1))
+
+    resid = vcat(
+        @view(cache.bcresid_prototype[1:prod(cache.resid_size[1])]), resid_collocation,
+        @view(cache.bcresid_prototype[(prod(cache.resid_size[1]) + 1):end]))
+    L = length(cache.bcresid_prototype)
+
+    diffmode = if jac_alg.diffmode isa AutoSparse
+        AutoSparse(get_dense_ad(jac_alg.diffmode);
+            sparsity_detector = __default_sparsity_detector(jac_alg.diffmode),
+            coloring_algorithm = __default_coloring_algorithm(jac_alg.diffmode))
+    else
+        jac_alg.diffmode
+    end
+
+    diffcache = if iip
+        DI.prepare_jacobian(loss, resid, diffmode, y, Constant(cache.p))
+    else
+        DI.prepare_jacobian(loss, diffmode, y, Constant(cache.p))
+    end
+
+    jac_prototype = if iip
+        DI.jacobian(loss, resid, diffcache, diffmode, y, Constant(cache.p))
+    else
+        DI.jacobian(loss, diffcache, diffmode, y, Constant(cache.p))
+    end
+
+    jac = if iip
+        @closure (J, u,
+            p) -> __firk_2point_jacobian!(J, u, diffmode, diffcache, loss, resid, cache.p)
+    else
+        @closure (u,
+            p) -> __firk_2point_jacobian(
+            u, jac_prototype, diffmode, diffcache, loss, cache.p)
+    end
+
+    resid_prototype = copy(resid)
+    return __construct_internal_problem(
+        cache.prob, cache.problem_type, cache.alg, loss, jac,
+        jac_prototype, resid_prototype, bcresid_prototype,
+        f_prototype, y, cache.p, cache.M, (N - 1) * (stage + 1) + 1)
 end
 
 function __construct_problem(
         cache::FIRKCacheExpand{iip}, y, loss_bc::BC, loss_collocation::C,
-        loss::LF, ::TwoPointBVProblem) where {iip, BC, C, LF}
+        loss::LF, ::TwoPointBVProblem, ::Val{false}) where {iip, BC, C, LF}
     (; jac_alg) = cache.alg
-    (; stage) = cache
+    (; stage, bcresid_prototype, f_prototype) = cache
     N = length(cache.mesh)
 
     resid_collocation = safe_similar(y, cache.M * (N - 1) * (stage + 1))
@@ -645,16 +814,78 @@ function __construct_problem(
     end
 
     resid_prototype = copy(resid)
+    return __construct_internal_problem(
+        cache.prob, cache.problem_type, cache.alg, loss, jac,
+        jac_prototype, resid_prototype, bcresid_prototype,
+        f_prototype, y, cache.p, cache.M, (N - 1) * (stage + 1) + 1)
+end
+
+function __construct_problem(
+        cache::FIRKCacheNested{iip}, y, loss_bc::BC, loss_collocation::C,
+        loss::LF, ::StandardBVProblem, ::Val{true}) where {iip, BC, C, LF}
+    (; jac_alg) = cache.alg
+    (; bc_diffmode) = jac_alg
+    (; bcresid_prototype, f_prototype) = cache
+    N = length(cache.mesh)
+    resid_bc = cache.bcresid_prototype
+    L = length(resid_bc)
+    resid_collocation = safe_similar(y, cache.M * (N - 1))
+    cache_bc = if iip
+        DI.prepare_jacobian(loss_bc, resid_bc, bc_diffmode, y, Constant(cache.p))
+    else
+        DI.prepare_jacobian(loss_bc, bc_diffmode, y, Constant(cache.p))
+    end
+
+    nonbc_diffmode = AutoSparse(get_dense_ad(jac_alg.nonbc_diffmode),
+        sparsity_detector = __default_sparsity_detector(jac_alg.nonbc_diffmode),
+        coloring_algorithm = __default_coloring_algorithm(jac_alg.nonbc_diffmode))
+
+    cache_collocation = if iip
+        DI.prepare_jacobian(
+            loss_collocation, resid_collocation, nonbc_diffmode, y, Constant(cache.p))
+    else
+        DI.prepare_jacobian(loss_collocation, nonbc_diffmode, y, Constant(cache.p))
+    end
+
+    J_bc = if iip
+        DI.jacobian(loss_bc, resid_bc, cache_bc, bc_diffmode, y, Constant(cache.p))
+    else
+        DI.jacobian(loss_bc, cache_bc, bc_diffmode, y, Constant(cache.p))
+    end
+    J_c = if iip
+        DI.jacobian(loss_collocation, resid_collocation, cache_collocation,
+            nonbc_diffmode, y, Constant(cache.p))
+    else
+        DI.jacobian(
+            loss_collocation, cache_collocation, nonbc_diffmode, y, Constant(cache.p))
+    end
+
+    jac_prototype = vcat(J_bc, J_c)
+    jac = if iip
+        @closure (J,
+            u,
+            p) -> __firk_mpoint_jacobian!(
+            J, J_c, u, bc_diffmode, nonbc_diffmode, cache_bc, cache_collocation,
+            loss_bc, loss_collocation, resid_bc, resid_collocation, L, cache.p)
+    else
+        @closure (u,
+            p) -> __firk_mpoint_jacobian(
+            jac_prototype, J_c, u, bc_diffmode, nonbc_diffmode, cache_bc,
+            cache_collocation, loss_bc, loss_collocation, L, cache.p)
+    end
+
+    resid_prototype = vcat(resid_bc, resid_collocation)
     return __construct_internal_problem(
         cache.prob, cache.problem_type, cache.alg, loss, jac, jac_prototype,
-        resid_prototype, y, cache.p, cache.M, (N - 1) * (stage + 1) + 1)
+        resid_prototype, bcresid_prototype, f_prototype, y, cache.p, cache.M, N)
 end
 
 function __construct_problem(
         cache::FIRKCacheNested{iip}, y, loss_bc::BC, loss_collocation::C,
-        loss::LF, ::StandardBVProblem) where {iip, BC, C, LF}
+        loss::LF, ::StandardBVProblem, ::Val{false}) where {iip, BC, C, LF}
     (; jac_alg) = cache.alg
     (; bc_diffmode) = jac_alg
+    (; bcresid_prototype, f_prototype) = cache
     N = length(cache.mesh)
     resid_bc = cache.bcresid_prototype
     L = length(resid_bc)
@@ -726,14 +957,61 @@ function __construct_problem(
 
     resid_prototype = vcat(resid_bc, resid_collocation)
     return __construct_internal_problem(
-        cache.prob, cache.problem_type, cache.alg, loss, jac,
-        jac_prototype, resid_prototype, y, cache.p, cache.M, N)
+        cache.prob, cache.problem_type, cache.alg, loss, jac, jac_prototype,
+        resid_prototype, bcresid_prototype, f_prototype, y, cache.p, cache.M, N)
 end
 
 function __construct_problem(
         cache::FIRKCacheNested{iip}, y, loss_bc::BC, loss_collocation::C,
-        loss::LF, ::TwoPointBVProblem) where {iip, BC, C, LF}
+        loss::LF, ::TwoPointBVProblem, ::Val{true}) where {iip, BC, C, LF}
     (; jac_alg) = cache.alg
+    (; bcresid_prototype, f_prototype) = cache
+    N = length(cache.mesh)
+
+    resid = vcat(@view(cache.bcresid_prototype[1:prod(cache.resid_size[1])]),
+        safe_similar(y, cache.M * (N - 1)),
+        @view(cache.bcresid_prototype[(prod(cache.resid_size[1]) + 1):end]))
+
+    diffmode = if jac_alg.diffmode isa AutoSparse
+        AutoSparse(get_dense_ad(jac_alg.diffmode);
+            sparsity_detector = __default_sparsity_detector(jac_alg.diffmode),
+            coloring_algorithm = __default_coloring_algorithm(jac_alg.diffmode))
+    else
+        jac_alg.diffmode
+    end
+
+    diffcache = if iip
+        DI.prepare_jacobian(loss, resid, diffmode, y, Constant(cache.p))
+    else
+        DI.prepare_jacobian(loss, diffmode, y, Constant(cache.p))
+    end
+
+    jac_prototype = if iip
+        DI.jacobian(loss, resid, diffcache, diffmode, y, Constant(cache.p))
+    else
+        DI.jacobian(loss, diffcache, diffmode, y, Constant(cache.p))
+    end
+
+    jac = if iip
+        @closure (J, u,
+            p) -> __firk_2point_jacobian!(J, u, diffmode, diffcache, loss, resid, cache.p)
+    else
+        @closure (u,
+            p) -> __firk_2point_jacobian(
+            u, jac_prototype, diffmode, diffcache, loss, cache.p)
+    end
+
+    resid_prototype = copy(resid)
+    return __construct_internal_problem(
+        cache.prob, cache.problem_type, cache.alg, loss, jac, jac_prototype,
+        resid_prototype, bcresid_prototype, f_prototype, y, cache.p, cache.M, N)
+end
+
+function __construct_problem(
+        cache::FIRKCacheNested{iip}, y, loss_bc::BC, loss_collocation::C,
+        loss::LF, ::TwoPointBVProblem, ::Val{false}) where {iip, BC, C, LF}
+    (; jac_alg) = cache.alg
+    (; bcresid_prototype, f_prototype) = cache
     N = length(cache.mesh)
 
     resid = vcat(@view(cache.bcresid_prototype[1:prod(cache.resid_size[1])]),
@@ -776,26 +1054,26 @@ function __construct_problem(
 
     resid_prototype = copy(resid)
     return __construct_internal_problem(
-        cache.prob, cache.problem_type, cache.alg, loss, jac,
-        jac_prototype, resid_prototype, y, cache.p, cache.M, N)
+        cache.prob, cache.problem_type, cache.alg, loss, jac, jac_prototype,
+        resid_prototype, bcresid_prototype, f_prototype, y, cache.p, cache.M, N)
 end
 
-@views function __firk_loss!(resid, u, p, y, pt::StandardBVProblem, bc!::BC, residual,
-        mesh, cache, eval_sol, trait::DiffCacheNeeded) where {BC}
+@views function __firk_loss!(resid, u, p, y, pt::StandardBVProblem, bc!::BC, residual, mesh,
+        cache, eval_sol, trait::DiffCacheNeeded, constraint) where {BC}
     y_ = recursive_unflatten!(y, u)
     resids = [get_tmp(r, u) for r in residual]
-    Φ!(resids[2:end], cache, y_, u, trait)
+    Φ!(resids[2:end], cache, y_, u, trait, constraint)
     eval_sol.u[1:end] .= y_
     eval_bc_residual!(resids[1], pt, bc!, eval_sol, p, mesh)
     recursive_flatten!(resid, resids)
     return nothing
 end
 
-@views function __firk_loss!(resid, u, p, y, pt::StandardBVProblem, bc!::BC, residual,
-        mesh, cache, eval_sol, trait::NoDiffCacheNeeded) where {BC}
+@views function __firk_loss!(resid, u, p, y, pt::StandardBVProblem, bc!::BC, residual, mesh,
+        cache, eval_sol, trait::NoDiffCacheNeeded, constraint) where {BC}
     y_ = recursive_unflatten!(y, u)
     resids = [r for r in residual]
-    Φ!(resids[2:end], cache, y_, u, trait)
+    Φ!(resids[2:end], cache, y_, u, trait, constraint)
     eval_sol.u[1:end] .= y_
     eval_bc_residual!(resids[1], pt, bc!, eval_sol, p, mesh)
     recursive_flatten!(resid, resids)
@@ -803,44 +1081,46 @@ end
 end
 
 # loss function for optimization based solvers
-@views function __firk_loss!(resid, u, p, y, pt::StandardBVProblem, bc!::BC,
-        residual, mesh, cache, trait) where {BC}
+@views function __firk_loss!(resid, u, p, y, pt::StandardBVProblem, bc!::BC, residual,
+        bcresid_prototype, mesh, cache, _, trait, constraint) where {BC}
     bcresid = length(cache.bcresid_prototype)
     __firk_loss_bc!(resid[1:bcresid], u, p, pt, bc!, y, mesh, cache, trait)
     __firk_loss_collocation!(
-        resid[(bcresid + 1):end], u, p, y, mesh, residual, cache, trait)
+        resid[(bcresid + 1):end], u, p, y, mesh, residual, cache, trait, constraint)
     return nothing
 end
 
 @views function __firk_loss!(
         resid, u, p, y::AbstractVector, pt::TwoPointBVProblem, bc!::Tuple{BC1, BC2},
-        residual, mesh, cache, _, trait::DiffCacheNeeded) where {BC1, BC2}
+        residual, mesh, cache, _, trait::DiffCacheNeeded, constraint) where {BC1, BC2}
     y_ = recursive_unflatten!(y, u)
     resids = [get_tmp(r, u) for r in residual]
     resida = resids[1][1:prod(cache.resid_size[1])]
     residb = resids[1][(prod(cache.resid_size[1]) + 1):end]
     eval_bc_residual!((resida, residb), pt, bc!, y_, p, mesh)
-    Φ!(resids[2:end], cache, y_, u, trait)
+    Φ!(resids[2:end], cache, y_, u, trait, constraint)
     recursive_flatten_twopoint!(resid, resids, cache.resid_size)
     return nothing
 end
 
-@views function __firk_loss!(resid, u, p, y, pt::TwoPointBVProblem, bc!::Tuple{BC1, BC2},
-        residual, mesh, cache, _, trait::NoDiffCacheNeeded) where {BC1, BC2}
+@views function __firk_loss!(
+        resid, u, p, y, pt::TwoPointBVProblem, bc!::Tuple{BC1, BC2}, residual,
+        mesh, cache, _, trait::NoDiffCacheNeeded, constraint) where {BC1, BC2}
     y_ = recursive_unflatten!(y, u)
     soly_ = VectorOfArray(y_)
     resida = residual[1][1:prod(cache.resid_size[1])]
     residb = residual[1][(prod(cache.resid_size[1]) + 1):end]
     eval_bc_residual!((resida, residb), pt, bc!, soly_, p, mesh)
-    Φ!(residual[2:end], cache, y_, u, trait)
+    Φ!(residual[2:end], cache, y_, u, trait, constraint)
     recursive_flatten_twopoint!(resid, residual, cache.resid_size)
     return nothing
 end
 
 # loss function for optimization based solvers
-@views function __firk_loss!(resid, u, p, y, pt::TwoPointBVProblem, bc!::Tuple{BC1, BC2},
-        residual, mesh, cache, trait) where {BC1, BC2}
-    __firk_loss!(resid, u, p, y, pt, bc!, residual, mesh, cache, nothing, trait)
+@views function __firk_loss!(
+        resid, u, p, y, pt::TwoPointBVProblem, bc!::Tuple{BC1, BC2}, residual,
+        bcresid_prototype, mesh, cache, _, trait, constraint) where {BC1, BC2}
+    __firk_loss!(resid, u, p, y, pt, bc!, residual, mesh, cache, nothing, trait, constraint)
     return nothing
 end
 
@@ -877,19 +1157,19 @@ end
 end
 
 @views function __firk_loss_collocation!(
-        resid, u, p, y, mesh, residual, cache, trait::DiffCacheNeeded)
+        resid, u, p, y, mesh, residual, cache, trait::DiffCacheNeeded, constraint)
     y_ = recursive_unflatten!(y, u)
     resids = [get_tmp(r, u) for r in residual[2:end]]
-    Φ!(resids, cache, y_, u, trait)
+    Φ!(resids, cache, y_, u, trait, constraint)
     recursive_flatten!(resid, resids)
     return nothing
 end
 
 @views function __firk_loss_collocation!(
-        resid, u, p, y, mesh, residual, cache, trait::NoDiffCacheNeeded)
+        resid, u, p, y, mesh, residual, cache, trait::NoDiffCacheNeeded, constraint)
     y_ = recursive_unflatten!(y, u)
     resids = [r for r in residual[2:end]]
-    Φ!(resids, cache, y_, u, trait)
+    Φ!(resids, cache, y_, u, trait, constraint)
     recursive_flatten!(resid, resids)
     return nothing
 end
diff --git a/lib/BoundaryValueDiffEqMIRK/src/BoundaryValueDiffEqMIRK.jl b/lib/BoundaryValueDiffEqMIRK/src/BoundaryValueDiffEqMIRK.jl
index 9801f9b31..9606e118e 100644
--- a/lib/BoundaryValueDiffEqMIRK/src/BoundaryValueDiffEqMIRK.jl
+++ b/lib/BoundaryValueDiffEqMIRK/src/BoundaryValueDiffEqMIRK.jl
@@ -23,7 +23,8 @@ using BoundaryValueDiffEqCore: AbstractBoundaryValueDiffEqAlgorithm,
                                __use_both_error_control, __default_coloring_algorithm,
                                DiffCacheNeeded, NoDiffCacheNeeded, __split_kwargs,
                                __concrete_kwargs, __FastShortcutNonlinearPolyalg,
-                               __construct_internal_problem, __internal_solve
+                               __construct_internal_problem, __internal_solve,
+                               __default_sparsity_detector
 
 using ConcreteStructs: @concrete
 using DiffEqBase: DiffEqBase
diff --git a/lib/BoundaryValueDiffEqMIRK/src/collocation.jl b/lib/BoundaryValueDiffEqMIRK/src/collocation.jl
index 88d79bd02..327502fae 100644
--- a/lib/BoundaryValueDiffEqMIRK/src/collocation.jl
+++ b/lib/BoundaryValueDiffEqMIRK/src/collocation.jl
@@ -1,16 +1,16 @@
 function Φ!(residual, cache::MIRKCache, y, u, trait, constraint)
-    return Φ!(residual, cache.fᵢ_cache, cache.k_discrete, cache.f, cache.TU, y, u,
-        cache.p, cache.mesh, cache.mesh_dt, cache.stage, trait, constraint)
+    return Φ!(residual, cache.fᵢ_cache, cache.k_discrete, cache.f, cache.TU, y, u, cache.p,
+        cache.mesh, cache.mesh_dt, cache.stage, cache.f_prototype, trait, constraint)
 end
 
 @views function Φ!(residual, fᵢ_cache, k_discrete, f!, TU::MIRKTableau, y, u, p, mesh,
-        mesh_dt, stage::Int, ::DiffCacheNeeded, constraint::Val{true})
+        mesh_dt, stage::Int, f_prototype, ::DiffCacheNeeded, ::Val{true})
     (; c, v, x, b) = TU
+    L_f_prototype = length(f_prototype)
 
-    ttt = get_tmp(fᵢ_cache, u)
-    tmp = copy(ttt[1:3])
-
-    length_control = length(last(residual))
+    tmpy,
+    tmpu = get_tmp(fᵢ_cache, u)[1:L_f_prototype],
+    get_tmp(fᵢ_cache, u)[(L_f_prototype + 1):end]
 
     T = eltype(u)
     for i in eachindex(k_discrete)
@@ -21,13 +21,14 @@ end
         yᵢ = get_tmp(y[i], u)
         yᵢ₊₁ = get_tmp(y[i + 1], u)
 
-        yᵢ, uᵢ = yᵢ[1:(end - length_control)], yᵢ[(end - length_control + 1):end]
-        yᵢ₊₁, uᵢ₊₁ = yᵢ₊₁[1:(end - length_control)], yᵢ₊₁[(end - length_control + 1):end]
+        yᵢ, uᵢ = yᵢ[1:L_f_prototype], yᵢ[(L_f_prototype + 1):end]
+        yᵢ₊₁, uᵢ₊₁ = yᵢ₊₁[1:L_f_prototype], yᵢ₊₁[(L_f_prototype + 1):end]
 
         for r in 1:stage
-            @. tmp = (1 - v[r]) * yᵢ + v[r] * yᵢ₊₁
-            __maybe_matmul!(tmp, K[:, 1:(r - 1)], x[r, 1:(r - 1)], h, T(1))
-            f!(K[:, r], vcat(tmp, uᵢ), p, mesh[i] + c[r] * h)
+            @. tmpy = (1 - v[r]) * yᵢ + v[r] * yᵢ₊₁
+            @. tmpu = (1 - v[r]) * uᵢ + v[r] * uᵢ₊₁
+            __maybe_matmul!(tmpy, K[:, 1:(r - 1)], x[r, 1:(r - 1)], h, T(1))
+            f!(K[:, r], vcat(tmpy, tmpu), p, mesh[i] + c[r] * h)
         end
 
         # Update residual
@@ -37,7 +38,7 @@ end
 end
 
 @views function Φ!(residual, fᵢ_cache, k_discrete, f!, TU::MIRKTableau, y, u, p, mesh,
-        mesh_dt, stage::Int, ::DiffCacheNeeded, constraint::Val{false})
+        mesh_dt, stage::Int, _, ::DiffCacheNeeded, constraint::Val{false})
     (; c, v, x, b) = TU
 
     tmp = get_tmp(fᵢ_cache, u)
@@ -62,8 +63,8 @@ end
     end
 end
 
-@views function Φ!(residual, fᵢ_cache, k_discrete, f!, TU::MIRKTableau, y, u, p, mesh,
-        mesh_dt, stage::Int, ::NoDiffCacheNeeded, constraint::Val{false})
+@views function Φ!(residual, fᵢ_cache, k_discrete, f!, TU::MIRKTableau, y, u, p,
+        mesh, mesh_dt, stage::Int, _, ::NoDiffCacheNeeded, ::Val{false})
     (; c, v, x, b) = TU
 
     tmp = similar(fᵢ_cache)
diff --git a/lib/BoundaryValueDiffEqMIRK/src/mirk.jl b/lib/BoundaryValueDiffEqMIRK/src/mirk.jl
index 8855cfd35..cf896c028 100644
--- a/lib/BoundaryValueDiffEqMIRK/src/mirk.jl
+++ b/lib/BoundaryValueDiffEqMIRK/src/mirk.jl
@@ -44,7 +44,10 @@ function SciMLBase.__init(
     diffcache = __cache_trait(alg.jac_alg)
     @assert (iip || isnothing(alg.optimize)) "Out-of-place constraints don't allow optimization solvers "
     fit_parameters = haskey(prob.kwargs, :fit_parameters)
-    constraint = (!isnothing(prob.f.inequality)) || (!isnothing(prob.f.equality))
+    constraint = (!isnothing(prob.f.inequality)) ||
+                 (!isnothing(prob.f.equality)) ||
+                 (!isnothing(prob.lb)) ||
+                 (!isnothing(prob.ub))
 
     t₀, t₁ = prob.tspan
     ig, T,
@@ -67,24 +70,23 @@ function SciMLBase.__init(
     TU, ITU = constructMIRK(alg, T)
     stage = alg_stage(alg)
     f_prototype = isnothing(prob.f.f_prototype) ? nothing : __vec(prob.f.f_prototype)
+    L_f_prototype = isnothing(f_prototype) ? N : length(f_prototype)
 
     k_discrete = if !constraint
         [__maybe_allocate_diffcache(safe_similar(X, N, stage), chunksize, alg.jac_alg)
          for _ in 1:Nig]
     else
-        [__maybe_allocate_diffcache(safe_similar(X, N - length(f_prototype), stage), chunksize, alg.jac_alg)
+        [__maybe_allocate_diffcache(safe_similar(X, L_f_prototype, stage), chunksize, alg.jac_alg)
          for _ in 1:Nig]
     end
     k_interp = if !constraint
         VectorOfArray([similar(X, N, ITU.s_star - stage) for _ in 1:Nig])
     else
-        VectorOfArray([similar(X, N - length(f_prototype), ITU.s_star - stage)
-                       for _ in 1:Nig])
+        VectorOfArray([similar(X, L_f_prototype, ITU.s_star - stage) for _ in 1:Nig])
     end
 
     bcresid_prototype, resid₁_size = __get_bcresid_prototype(prob.problem_type, prob, X)
 
-    # residual should be [bc_resid_prototype; f_prototype; f_prototype; f_prototype; f_prototype...]
     residual = if iip
         if !constraint
             if prob.problem_type isa TwoPointBVProblem
@@ -94,13 +96,11 @@ function SciMLBase.__init(
             end
         else
             if prob.problem_type isa TwoPointBVProblem
-                #vcat(__alloc.(copy.(y₀.u)), [__alloc(__vec(bcresid_prototype))],
-                #    __alloc.(copy.(@view(y₀.u[2:end]))))
+                vcat([__alloc(__vec(bcresid_prototype))], __alloc.(copy.([f_prototype
+                                                                          for _ in 1:Nig])))
             else
-                #vcat(__alloc.(copy.(y₀.u)), [__alloc(bcresid_prototype)],
-                #    __alloc.(copy.(@view(y₀.u[2:end]))))
-                vcat(__alloc.(copy.(y₀.u)), [__alloc(bcresid_prototype)],
-                    __alloc.(copy.([f_prototype for _ in 1:Nig])))
+                vcat([__alloc(bcresid_prototype)], __alloc.(copy.([f_prototype
+                                                                   for _ in 1:Nig])))
             end
         end
     else
@@ -108,9 +108,18 @@ function SciMLBase.__init(
     end
 
     use_both = __use_both_error_control(controller)
-    errors = VectorOfArray([similar(X, ifelse(adaptive, N, 0))
-                            for _ in 1:ifelse(use_both, 2Nig, Nig)])
-    new_stages = VectorOfArray([similar(X, N) for _ in 1:Nig])
+    errors = if !constraint
+        VectorOfArray([similar(X, ifelse(adaptive, N, 0))
+                       for _ in 1:ifelse(use_both, 2Nig, Nig)])
+    else
+        VectorOfArray([similar(X, ifelse(adaptive, L_f_prototype, 0))
+                       for _ in 1:ifelse(use_both, 2Nig, Nig)])
+    end
+    new_stages = if !constraint
+        VectorOfArray([similar(X, N) for _ in 1:Nig])
+    else
+        VectorOfArray([similar(X, L_f_prototype) for _ in 1:Nig])
+    end
 
     # Transform the functions to handle non-vector inputs
     bcresid_prototype = __vec(bcresid_prototype)
@@ -271,12 +280,14 @@ end
 # Constructing the Nonlinear Problem
 function __construct_problem(cache::MIRKCache{iip}, y::AbstractVector, y₀::AbstractVectorOfArray) where {iip}
     constraint = (!isnothing(cache.prob.f.inequality)) ||
-                 (!isnothing(cache.prob.f.equality))
+                 (!isnothing(cache.prob.f.equality)) ||
+                 (!isnothing(cache.prob.lb)) ||
+                 (!isnothing(cache.prob.ub))
     return __construct_problem(cache, y, y₀, Val(constraint))
 end
 
 function __construct_problem(cache::MIRKCache{iip}, y::AbstractVector,
-        y₀::AbstractVectorOfArray, constraint::Val{false}) where {iip}
+        y₀::AbstractVectorOfArray, constraint) where {iip}
     pt = cache.problem_type
     (; jac_alg) = cache.alg
 
@@ -317,73 +328,16 @@ function __construct_problem(cache::MIRKCache{iip}, y::AbstractVector,
 
     if !isnothing(cache.alg.optimize)
         loss = @closure (du,
-            u,
-            p) -> __mirk_loss!(
-            du, u, p, cache.y, pt, cache.bc, cache.residual, cache.mesh, cache, trait)
-    end
-
-    return __construct_problem(cache, y, loss_bc, loss_collocation, loss, pt, constraint)
-end
-
-function __construct_problem(cache::MIRKCache{iip}, y::AbstractVector,
-        y₀::AbstractVectorOfArray, constraint::Val{true}) where {iip}
-    pt = cache.problem_type
-    (; jac_alg) = cache.alg
-
-    eval_sol = EvalSol(__restructure_sol(y₀.u, cache.in_size), cache.mesh, cache)
-
-    trait = __cache_trait(jac_alg)
-
-    loss_bc = if iip
-        @closure (du,
-            u,
-            p) -> __mirk_loss_bc!(du, u, p, pt, cache.bc, cache.y, cache.mesh, cache, trait)
-    else
-        @closure (
-            u, p) -> __mirk_loss_bc(u, p, pt, cache.bc, cache.y, cache.mesh, cache, trait)
-    end
-
-    loss_collocation = if iip
-        @closure (du,
-            u,
-            p) -> __mirk_loss_collocation!(
-            du, u, p, cache.y, cache.mesh, cache.residual, cache, trait, constraint)
-    else
-        @closure (u,
-            p) -> __mirk_loss_collocation(
-            u, p, cache.y, cache.mesh, cache.residual, cache, trait)
-    end
-
-    loss_constraint = @closure (du,
-        u,
-        p) -> __mirk_loss_constraint!(
-        du, u, p, cache.y, cache.mesh, cache.residual, cache, trait)
-
-    loss = if iip
-        @closure (du,
             u,
             p) -> __mirk_loss!(du, u, p, cache.y, pt, cache.bc, cache.residual,
-            cache.mesh, cache, eval_sol, trait, constraint)
-    else
-        @closure (u,
-            p) -> __mirk_loss(
-            u, p, cache.y, pt, cache.bc, cache.mesh, cache, eval_sol, trait)
+            cache.bcresid_prototype, cache.mesh, cache, eval_sol, trait, constraint)
     end
 
-    return __construct_problem(
-        cache, y, loss_bc, loss_collocation, loss_constraint, loss, pt, constraint)
+    return __construct_problem(cache, y, loss_bc, loss_collocation, loss, pt, constraint)
 end
 
-# Let's only do inplace version now since Optimization.jl only supports that.
-
-# J = [J_constraints;
-#      J_bvp]
-# where J_constraints is the Jacobian from equality/inequality constraints(whole-time equality/inequality)
-# and J_bvp is the Jacobian computed from bc and collocation
-
-@views function __mirk_loss!(
-        resid, u, p, y, pt::StandardBVProblem, bc!::BC, residual, mesh, cache,
-        EvalSol, trait::DiffCacheNeeded, constraint::Val{false}) where {BC}
+@views function __mirk_loss!(resid, u, p, y, pt::StandardBVProblem, bc!::BC, residual, mesh,
+        cache, EvalSol, trait::DiffCacheNeeded, constraint) where {BC}
     y_ = recursive_unflatten!(y, u)
     resids = [get_tmp(r, u) for r in residual]
     Φ!(resids[2:end], cache, y_, u, trait, constraint)
@@ -394,9 +348,8 @@ end
     return nothing
 end
 
-@views function __mirk_loss!(
-        resid, u, p, y, pt::StandardBVProblem, bc!::BC, residual, mesh, cache,
-        EvalSol, trait::NoDiffCacheNeeded, constraint::Val{false}) where {BC}
+@views function __mirk_loss!(resid, u, p, y, pt::StandardBVProblem, bc!::BC, residual, mesh,
+        cache, EvalSol, trait::NoDiffCacheNeeded, constraint) where {BC}
     y_ = recursive_unflatten!(y, u)
     Φ!(residual[2:end], cache, y_, u, trait, constraint)
     EvalSol.u[1:end] .= __restructure_sol(y_, cache.in_size)
@@ -407,17 +360,18 @@ end
 end
 
 # loss function for optimization based solvers
-@views function __mirk_loss!(resid, u, p, y, pt::StandardBVProblem, bc!::BC,
-        residual, mesh, cache, trait) where {BC}
-    bcresid = length(cache.bcresid_prototype)
+@views function __mirk_loss!(resid, u, p, y, pt::StandardBVProblem, bc!::BC, residual,
+        bcresid_prototype, mesh, cache, _, trait, constraint) where {BC}
+    bcresid = length(bcresid_prototype)
     __mirk_loss_bc!(resid[1:bcresid], u, p, pt, bc!, y, mesh, cache, trait)
     __mirk_loss_collocation!(
-        resid[(bcresid + 1):end], u, p, y, mesh, residual, cache, trait)
+        resid[(bcresid + 1):end], u, p, y, mesh, residual, cache, trait, constraint)
     return nothing
 end
 
-@views function __mirk_loss!(resid, u, p, y, pt::TwoPointBVProblem, bc!::Tuple{BC1, BC2},
-        residual, mesh, cache, _, trait::DiffCacheNeeded, constraint::Val{false}) where {BC1, BC2}
+@views function __mirk_loss!(
+        resid, u, p, y, pt::TwoPointBVProblem, bc!::Tuple{BC1, BC2}, residual,
+        mesh, cache, _, trait::DiffCacheNeeded, constraint) where {BC1, BC2}
     y_ = recursive_unflatten!(y, u)
     resids = [get_tmp(r, u) for r in residual]
     Φ!(resids[2:end], cache, y_, u, trait, constraint)
@@ -429,8 +383,8 @@ end
 end
 
 @views function __mirk_loss!(
-        resid, u, p, y, pt::TwoPointBVProblem, bc!::Tuple{BC1, BC2}, residual, mesh,
-        cache, _, trait::NoDiffCacheNeeded, constraint::Val{false}) where {BC1, BC2}
+        resid, u, p, y, pt::TwoPointBVProblem, bc!::Tuple{BC1, BC2}, residual,
+        mesh, cache, _, trait::NoDiffCacheNeeded, constraint) where {BC1, BC2}
     y_ = recursive_unflatten!(y, u)
     Φ!(residual[2:end], cache, y_, u, trait, constraint)
     resida = residual[1][1:prod(cache.resid_size[1])]
@@ -440,26 +394,10 @@ end
     return nothing
 end
 
-@views function __mirk_loss!(
-        resid, u, p, y, pt::StandardBVProblem, bc!::BC, residual, mesh, cache,
-        EvalSol, trait::DiffCacheNeeded, constraint::Val{true}) where {BC}
-    y_ = recursive_unflatten!(y, u)
-    L = length(y_)
-    resids = [get_tmp(r, u) for r in residual]
-    # whole-time inequality constraints
-    cache.prob.f.inequality.(resids[1:L], y_, nothing)
-    Φ!(resids[(L + 2):2L], cache, y_, u, trait, constraint)
-    EvalSol.u[1:end] .= __restructure_sol(y_, cache.in_size)
-    EvalSol.cache.k_discrete[1:end] .= cache.k_discrete
-    eval_bc_residual!(resids[L + 1], pt, bc!, EvalSol, p, mesh)
-
-    recursive_flatten!(resid, resids)
-    return nothing
-end
-
 # loss function for optimization based solvers
-@views function __mirk_loss!(resid, u, p, y, pt::TwoPointBVProblem, bc!::Tuple{BC1, BC2},
-        residual, mesh, cache, trait, constraint::Val{true}) where {BC1, BC2}
+@views function __mirk_loss!(
+        resid, u, p, y, pt::TwoPointBVProblem, bc!::Tuple{BC1, BC2}, residual,
+        bcresid_prototype, mesh, cache, _, trait, constraint) where {BC1, BC2}
     __mirk_loss!(resid, u, p, y, pt, bc!, residual, mesh, cache, nothing, trait, constraint)
     return nothing
 end
@@ -482,10 +420,6 @@ end
     return vcat(resid_bca, mapreduce(vec, vcat, resid_co), resid_bcb)
 end
 
-################################################################################
-# Dispatch for boundary condition part
-################################################################################
-
 @views function __mirk_loss_bc!(
         resid, u, p, pt, bc!::BC, y, mesh, cache::MIRKCache, trait) where {BC}
     y_ = recursive_unflatten!(y, u)
@@ -501,45 +435,19 @@ end
     return eval_bc_residual(pt, bc!, soly_, p, mesh)
 end
 
-################################################################################
-# Dispatch for collocation part
-################################################################################
-
-@views function __mirk_loss_collocation!(resid, u, p, y, mesh, residual, cache,
-        trait::DiffCacheNeeded, constraint::Val{false})
-    y_ = recursive_unflatten!(y, u)
-    N = length(y)
-    resids = [get_tmp(r, u) for r in residual[2:N]]
-    Φ!(resids, cache, y_, u, trait, constraint)
-    recursive_flatten!(resid, resids)
-    return nothing
-end
-
-@views function __mirk_loss_collocation!(resid, u, p, y, mesh, residual, cache,
-        trait::DiffCacheNeeded, constraint::Val{true})
-    y_ = recursive_unflatten!(y, u)
-    N = length(y)
-    resids = [get_tmp(r, u) for r in residual[(N + 2):end]]
-    Φ!(resids, cache, y_, u, trait, constraint)
-    recursive_flatten!(resid, resids)
-    return nothing
-end
-
-@views function __mirk_loss_collocation!(resid, u, p, y, mesh, residual, cache,
-        trait::NoDiffCacheNeeded, constraint::Val{false})
+@views function __mirk_loss_collocation!(
+        resid, u, p, y, mesh, residual, cache, trait::DiffCacheNeeded, constraint)
     y_ = recursive_unflatten!(y, u)
-    N = length(y)
-    resids = [r for r in residual[2:N]]
+    resids = [get_tmp(r, u) for r in residual[2:end]]
     Φ!(resids, cache, y_, u, trait, constraint)
     recursive_flatten!(resid, resids)
     return nothing
 end
 
-@views function __mirk_loss_collocation!(resid, u, p, y, mesh, residual, cache,
-        trait::NoDiffCacheNeeded, constraint::Val{true})
+@views function __mirk_loss_collocation!(
+        resid, u, p, y, mesh, residual, cache, trait::NoDiffCacheNeeded, constraint)
     y_ = recursive_unflatten!(y, u)
-    N = length(y)
-    resids = [r for r in residual[(N + 2):end]]
+    resids = [r for r in residual[2:end]]
     Φ!(resids, cache, y_, u, trait, constraint)
     recursive_flatten!(resid, resids)
     return nothing
@@ -551,30 +459,18 @@ end
     return mapreduce(vec, vcat, resids)
 end
 
-################################################################################
-# Dispatch for problems with constraints
-################################################################################
-
-@views function __mirk_loss_constraint!(
-        resid, u, p, y, mesh, residual, cache, trait::DiffCacheNeeded)
-    y_ = recursive_unflatten!(y, u)
-    N = length(y)
-    resids = [get_tmp(r, u) for r in residual[1:N]]
-    cache.prob.f.inequality.(resids, y_, nothing)
-    recursive_flatten!(resid, resids)
-    return nothing
-end
-
 function __construct_problem(
         cache::MIRKCache{iip}, y, loss_bc::BC, loss_collocation::C, loss::LF,
-        ::StandardBVProblem, constraint::Val{false}) where {iip, BC, C, LF}
+        ::StandardBVProblem, constraint::Val{true}) where {iip, BC, C, LF}
     (; jac_alg) = cache.alg
+    (; f_prototype, bcresid_prototype) = cache
     (; bc_diffmode) = jac_alg
     N = length(cache.mesh)
 
-    resid_bc = cache.bcresid_prototype
+    resid_bc = bcresid_prototype
     L = length(resid_bc)
-    resid_collocation = safe_similar(y, cache.M * (N - 1))
+    L_f_prototype = length(f_prototype)
+    resid_collocation = safe_similar(y, L_f_prototype * (N - 1))
 
     cache_bc = if iip
         DI.prepare_jacobian(loss_bc, resid_bc, bc_diffmode, y, Constant(cache.p))
@@ -582,26 +478,9 @@ function __construct_problem(
         DI.prepare_jacobian(loss_bc, bc_diffmode, y, Constant(cache.p))
     end
 
-    nonbc_diffmode = if jac_alg.nonbc_diffmode isa AutoSparse
-        if L < cache.M
-            # For underdetermined problems we use sparse since we don't have banded qr
-            J_full_band = nothing
-            sparse_jacobian_prototype = __generate_sparse_jacobian_prototype(
-                cache, cache.problem_type, y, y, cache.M, N)
-        else
-            J_full_band = BandedMatrix(Ones{eltype(y)}(L + cache.M * (N - 1), cache.M * N),
-                (L + 1, cache.M + max(cache.M - L, 0)))
-            sparse_jacobian_prototype = __generate_sparse_jacobian_prototype(
-                cache, cache.problem_type, y, y, cache.M, N)
-        end
-        AutoSparse(get_dense_ad(jac_alg.nonbc_diffmode);
-            sparsity_detector = ADTypes.KnownJacobianSparsityDetector(sparse_jacobian_prototype),
-            coloring_algorithm = __default_coloring_algorithm(jac_alg.nonbc_diffmode))
-    else
-        J_full_band = nothing
-        jac_alg.nonbc_diffmode
-    end
-
+    nonbc_diffmode = AutoSparse(get_dense_ad(jac_alg.nonbc_diffmode),
+        sparsity_detector = __default_sparsity_detector(jac_alg.nonbc_diffmode),
+        coloring_algorithm = __default_coloring_algorithm(jac_alg.nonbc_diffmode))
     cache_collocation = if iip
         DI.prepare_jacobian(
             loss_collocation, resid_collocation, nonbc_diffmode, y, Constant(cache.p))
@@ -621,12 +500,7 @@ function __construct_problem(
         DI.jacobian(
             loss_collocation, cache_collocation, nonbc_diffmode, y, Constant(cache.p))
     end
-
-    if J_full_band === nothing
-        jac_prototype = vcat(J_bc, J_c)
-    else
-        jac_prototype = AlmostBandedMatrix{eltype(cache)}(J_full_band, J_bc)
-    end
+    jac_prototype = vcat(J_bc, J_c)
 
     jac = if iip
         @closure (J,
@@ -643,23 +517,23 @@ function __construct_problem(
 
     resid_prototype = vcat(resid_bc, resid_collocation)
     return __construct_internal_problem(
-        cache.prob, cache.problem_type, cache.alg, loss, jac,
-        jac_prototype, resid_prototype, y, cache.p, cache.M, N)
+        cache.prob, cache.problem_type, cache.alg, loss, jac, jac_prototype,
+        resid_prototype, bcresid_prototype, f_prototype, y, cache.p, cache.M, N)
 end
 
 # Dispatch for problems with constraints
 function __construct_problem(
-        cache::MIRKCache{iip}, y, loss_bc::BC, loss_collocation::C, loss_constraint::LC,
-        loss::LF, ::StandardBVProblem, constraint::Val{true}) where {iip, BC, C, LC, LF}
+        cache::MIRKCache{iip}, y, loss_bc::BC, loss_collocation::C, loss::LF,
+        ::StandardBVProblem, constraint::Val{false}) where {iip, BC, C, LF}
     (; jac_alg) = cache.alg
+    (; f_prototype, bcresid_prototype) = cache
     (; bc_diffmode) = jac_alg
-    (; f_prototype) = cache
     N = length(cache.mesh)
 
-    resid_bc = cache.bcresid_prototype
+    resid_bc = bcresid_prototype
     L = length(resid_bc)
-    resid_collocation = safe_similar(y, length(f_prototype) * (N - 1))
-    resid_prototype = safe_similar(y, cache.M * N)
+    resid_collocation = safe_similar(y, cache.M * (N - 1))
+    resid_prototype = vcat(resid_bc, resid_collocation)
 
     cache_bc = if iip
         DI.prepare_jacobian(loss_bc, resid_bc, bc_diffmode, y, Constant(cache.p))
@@ -674,12 +548,10 @@ function __construct_problem(
             sparse_jacobian_prototype = __generate_sparse_jacobian_prototype(
                 cache, cache.problem_type, y, y, cache.M, N)
         else
-            J_full_band = BandedMatrix(
-                Ones{eltype(y)}(L + length(f_prototype) * (N - 1), cache.M * N),
+            J_full_band = BandedMatrix(Ones{eltype(y)}(L + cache.M * (N - 1), cache.M * N),
                 (L + 1, cache.M + max(cache.M - L, 0)))
-            sparse_jacobian_prototype = BandedMatrix(
-                Ones{eltype(y)}(length(f_prototype) * (N - 1), cache.M * N), (
-                    1, 2cache.M - 1))
+            sparse_jacobian_prototype = __generate_sparse_jacobian_prototype(
+                cache, cache.problem_type, y, y, cache.M, N)
         end
         AutoSparse(get_dense_ad(jac_alg.nonbc_diffmode);
             sparsity_detector = ADTypes.KnownJacobianSparsityDetector(sparse_jacobian_prototype),
@@ -715,25 +587,22 @@ function __construct_problem(
         jac_prototype = AlmostBandedMatrix{eltype(cache)}(J_full_band, J_bc)
     end
 
-    constraint_diffmode = AutoSparse(AutoForwardDiff();
-        sparsity_detector = BoundaryValueDiffEqCore.TracerLocalSparsityDetector(),
-        coloring_algorithm = __default_coloring_algorithm(jac_alg.bc_diffmode))
-
-    cache_constraint = DI.prepare_jacobian(
-        loss_constraint, resid_prototype, constraint_diffmode, y, Constant(cache.p))
-    J_constraint = DI.jacobian(loss_constraint, resid_prototype, cache_constraint,
-        constraint_diffmode, y, Constant(cache.p))
-    jac_prototype = vcat(J_constraint, jac_prototype)
-
-    jac = @closure (J,
-        u,
-        p) -> __mirk_mpoint_jacobian!(
-        J, J_c, u, bc_diffmode, nonbc_diffmode, constraint_diffmode, cache_bc,
-        cache_collocation, cache_constraint, loss_bc, loss_collocation,
-        loss_constraint, resid_bc, resid_collocation, resid_prototype, L, cache.p)
+    jac = if iip
+        @closure (J,
+            u,
+            p) -> __mirk_mpoint_jacobian!(
+            J, J_c, u, bc_diffmode, nonbc_diffmode, cache_bc, cache_collocation,
+            loss_bc, loss_collocation, resid_bc, resid_collocation, L, cache.p)
+    else
+        @closure (u,
+            p) -> __mirk_mpoint_jacobian(
+            jac_prototype, J_c, u, bc_diffmode, nonbc_diffmode, cache_bc,
+            cache_collocation, loss_bc, loss_collocation, L, cache.p)
+    end
 
-    return __construct_internal_problem(cache.prob, cache.alg, loss, jac, jac_prototype,
-        resid_prototype, y, cache.p, cache.M, N)
+    return __construct_internal_problem(
+        cache.prob, cache.problem_type, cache.alg, loss, jac, jac_prototype,
+        resid_prototype, bcresid_prototype, f_prototype, y, cache.p, cache.M, N)
 end
 
 function __mirk_mpoint_jacobian!(
@@ -746,34 +615,6 @@ function __mirk_mpoint_jacobian!(
     return nothing
 end
 
-function __mirk_mpoint_jacobian!(J::AlmostBandedMatrix, J_c, x, bc_diffmode, nonbc_diffmode,
-        bc_diffcache, nonbc_diffcache, inequality_diffcache,
-        loss_bc::BC, loss_collocation::C, loss_inequality, resid_bc,
-        resid_collocation, resid_prototype, L::Int, p) where {BC, C}
-    J_bc = fillpart(J)
-    DI.jacobian!(loss_collocation, resid_collocation, J_c,
-        nonbc_diffcache, nonbc_diffmode, x, Constant(p))
-    DI.jacobian!(loss_bc, resid_bc, J_bc, bc_diffcache, bc_diffmode, x, Constant(p))
-    exclusive_bandpart(J) .= J_c
-    finish_part_setindex!(J)
-    return nothing
-end
-
-function __mirk_mpoint_jacobian!(J, _, x, bc_diffmode, nonbc_diffmode, constraint_diffmode,
-        bc_diffcache, nonbc_diffcache, constraint_diffcache,
-        loss_bc::BC, loss_collocation::C, loss_inequality, resid_bc,
-        resid_collocation, resid_prototype, L::Int, p) where {BC, C}
-    N = length(x)
-    # The Jacobian of constraints
-    DI.jacobian!(loss_inequality, resid_prototype, @view(J[1:N, :]),
-        constraint_diffcache, constraint_diffmode, x, Constant(p))
-    DI.jacobian!(loss_bc, resid_bc, @view(J[(N + 1):(N + L), :]),
-        bc_diffcache, bc_diffmode, x, Constant(p))
-    DI.jacobian!(loss_collocation, resid_collocation, @view(J[(N + L + 1):end, :]),
-        nonbc_diffcache, nonbc_diffmode, x, Constant(p))
-    return nothing
-end
-
 function __mirk_mpoint_jacobian!(J::AlmostBandedMatrix, J_c, x, bc_diffmode, nonbc_diffmode,
         bc_diffcache, nonbc_diffcache, loss_bc::BC, loss_collocation::C,
         resid_bc, resid_collocation, L::Int, p) where {BC, C}
@@ -806,22 +647,68 @@ function __mirk_mpoint_jacobian(
     return J
 end
 
+function __construct_problem(
+        cache::MIRKCache{iip}, y, loss_bc::BC, loss_collocation::C, loss::LF,
+        ::TwoPointBVProblem, constraint::Val{true}) where {iip, BC, C, LF}
+    (; jac_alg) = cache.alg
+    (; f_prototype, bcresid_prototype) = cache
+    N = length(cache.mesh)
+    L_f_prototype = length(f_prototype)
+
+    resid = vcat(@view(bcresid_prototype[1:prod(cache.resid_size[1])]),
+        safe_similar(y, L_f_prototype * (N - 1)),
+        @view(bcresid_prototype[(prod(cache.resid_size[1]) + 1):end]))
+
+    diffmode = if jac_alg.diffmode isa AutoSparse
+        AutoSparse(get_dense_ad(jac_alg.diffmode);
+            sparsity_detector = __default_sparsity_detector(jac_alg.diffmode),
+            coloring_algorithm = __default_coloring_algorithm(jac_alg.diffmode))
+    else
+        jac_alg.diffmode
+    end
+
+    diffcache = if iip
+        DI.prepare_jacobian(loss, resid, diffmode, y, Constant(cache.p))
+    else
+        DI.prepare_jacobian(loss, diffmode, y, Constant(cache.p))
+    end
+
+    jac_prototype = if iip
+        DI.jacobian(loss, resid, diffcache, diffmode, y, Constant(cache.p))
+    else
+        DI.jacobian(loss, diffcache, diffmode, y, Constant(cache.p))
+    end
+
+    jac = if iip
+        @closure (
+            J, u, p) -> __mirk_2point_jacobian!(J, u, diffmode, diffcache, loss, resid, p)
+    else
+        @closure (
+            u, p) -> __mirk_2point_jacobian(u, jac_prototype, diffmode, diffcache, loss, p)
+    end
+
+    resid_prototype = copy(resid)
+    return __construct_internal_problem(
+        cache.prob, cache.problem_type, cache.alg, loss, jac, jac_prototype,
+        resid_prototype, bcresid_prototype, f_prototype, y, cache.p, cache.M, N)
+end
+
 function __construct_problem(
         cache::MIRKCache{iip}, y, loss_bc::BC, loss_collocation::C, loss::LF,
         ::TwoPointBVProblem, constraint::Val{false}) where {iip, BC, C, LF}
     (; jac_alg) = cache.alg
+    (; f_prototype, bcresid_prototype) = cache
     N = length(cache.mesh)
 
-    resid = vcat(@view(cache.bcresid_prototype[1:prod(cache.resid_size[1])]),
+    resid = vcat(@view(bcresid_prototype[1:prod(cache.resid_size[1])]),
         safe_similar(y, cache.M * (N - 1)),
-        @view(cache.bcresid_prototype[(prod(cache.resid_size[1]) + 1):end]))
+        @view(bcresid_prototype[(prod(cache.resid_size[1]) + 1):end]))
 
     diffmode = if jac_alg.diffmode isa AutoSparse
         sparse_jacobian_prototype = __generate_sparse_jacobian_prototype(
             cache, cache.problem_type,
-            @view(cache.bcresid_prototype[1:prod(cache.resid_size[1])]),
-            @view(cache.bcresid_prototype[(prod(cache.resid_size[1]) + 1):end]),
-            cache.M, N)
+            @view(bcresid_prototype[1:prod(cache.resid_size[1])]),
+            @view(bcresid_prototype[(prod(cache.resid_size[1]) + 1):end]), cache.M, N)
         AutoSparse(get_dense_ad(jac_alg.diffmode);
             sparsity_detector = ADTypes.KnownJacobianSparsityDetector(sparse_jacobian_prototype),
             coloring_algorithm = __default_coloring_algorithm(jac_alg.diffmode))
@@ -851,8 +738,8 @@ function __construct_problem(
 
     resid_prototype = copy(resid)
     return __construct_internal_problem(
-        cache.prob, cache.problem_type, cache.alg, loss, jac,
-        jac_prototype, resid_prototype, y, cache.p, cache.M, N)
+        cache.prob, cache.problem_type, cache.alg, loss, jac, jac_prototype,
+        resid_prototype, bcresid_prototype, f_prototype, y, cache.p, cache.M, N)
 end
 
 function __mirk_2point_jacobian!(J, x, diffmode, diffcache, loss_fn::L, resid, p) where {L}
diff --git a/lib/BoundaryValueDiffEqMIRK/test/dynamic_optimization_tests.jl b/lib/BoundaryValueDiffEqMIRK/test/dynamic_optimization_tests.jl
new file mode 100644
index 000000000..963f7ad03
--- /dev/null
+++ b/lib/BoundaryValueDiffEqMIRK/test/dynamic_optimization_tests.jl
@@ -0,0 +1,57 @@
+@testitem "Rocket launching problem" begin
+    using BoundaryValueDiffEqMIRK, OptimizationIpopt
+    h_0 = 1                      # Initial height
+    v_0 = 0                      # Initial velocity
+    m_0 = 1.0                    # Initial mass
+    m_T = 0.6                    # Final mass
+    g_0 = 1                      # Gravity at the surface
+    h_c = 500                    # Used for drag
+    c = 0.5 * sqrt(g_0 * h_0)    # Thrust-to-fuel mass
+    D_c = 0.5 * 620 * m_0 / g_0  # Drag scaling
+    u_t_max = 3.5 * g_0 * m_0    # Maximum thrust
+    T_max = 0.2                  # Number of seconds
+    T = 1_000                    # Number of time steps
+    Δt = 0.2 / T;                # Time per discretized step
+
+    tspan = (0.0, 0.2)
+    drag(x_h, x_v) = D_c * x_v^2 * exp(-h_c * (x_h - h_0) / h_0)
+    g(x_h) = g_0 * (h_0 / x_h)^2
+    function rocket_launch!(du, u, p, t)
+        # u_t is the control variable (thrust)
+        x_v, x_h, x_m, u_t = u[1], u[2], u[3], u[4]
+        du[1] = (u_t-drag(x_h, x_v))/x_m - g(x_h)
+        du[2] = x_v
+        du[3] = -u_t/c
+    end
+    function rocket_launch_bc!(res, u, p, t)
+        res[1] = u(0.0)[1] - v_0
+        res[2] = u(0.0)[2] - h_0
+        res[3] = u(0.0)[3] - m_0
+        res[4] = u(0.2)[4] - 0.0
+    end
+    function rocket_launch_bc_a!(res, ua, p)
+        res[1] = ua[1] - v_0
+        res[2] = ua[2] - h_0
+        res[3] = ua[3] - m_0
+    end
+    function rocket_launch_bc_b!(res, ub, p)
+        res[1] = ub[4] - 0.0
+    end
+    cost_fun(u, p) = -u[end - 2] #Final altitude x_h. To minimize, only temporary, need to use temporary solution interpolation here similar to what we do in boundary condition evaluations.
+    u0 = [v_0, h_0, m_T, 3.0]
+    rocket_launch_fun_mp = BVPFunction(
+        rocket_launch!, rocket_launch_bc!; cost = cost_fun, f_prototype = zeros(3))
+    rocket_launch_prob_mp = BVProblem(rocket_launch_fun_mp, u0, tspan;
+        lb = [0.0, h_0, m_T, 0.0], ub = [Inf, Inf, m_0, u_t_max])
+    sol = solve(rocket_launch_prob_mp, MIRK4(; optimize = IpoptOptimizer()); dt = Δt, adaptive = false)
+    @test SciMLBase.successful_retcode(sol)
+
+    rocket_launch_fun_tp = BVPFunction(
+        rocket_launch!, (rocket_launch_bc_a!, rocket_launch_bc_b!);
+        cost = cost_fun, f_prototype = zeros(3),
+        bcresid_prototype = (zeros(3), zeros(1)), twopoint = Val(true))
+    rocket_launch_prob_tp = TwoPointBVProblem(rocket_launch_fun_tp, u0, tspan;
+        lb = [0.0, h_0, m_T, 0.0], ub = [Inf, Inf, m_0, u_t_max])
+    sol = solve(rocket_launch_prob_tp, MIRK4(; optimize = IpoptOptimizer()); dt = Δt, adaptive = false)
+    @test SciMLBase.successful_retcode(sol)
+end

From e03cc9819e030457b9abcb5aed89b18dd5b6261b Mon Sep 17 00:00:00 2001
From: Qingyu Qu <2283984853@qq.com>
Date: Mon, 17 Nov 2025 22:05:27 +0800
Subject: [PATCH 08/11] Move problem constructor to a new file

---
 .../src/BoundaryValueDiffEqCore.jl            |   1 +
 .../src/internal_problems.jl                  | 224 ++++++++++++++++++
 lib/BoundaryValueDiffEqCore/src/utils.jl      | 164 -------------
 3 files changed, 225 insertions(+), 164 deletions(-)
 create mode 100644 lib/BoundaryValueDiffEqCore/src/internal_problems.jl

diff --git a/lib/BoundaryValueDiffEqCore/src/BoundaryValueDiffEqCore.jl b/lib/BoundaryValueDiffEqCore/src/BoundaryValueDiffEqCore.jl
index bcc83ca6b..299c83dc8 100644
--- a/lib/BoundaryValueDiffEqCore/src/BoundaryValueDiffEqCore.jl
+++ b/lib/BoundaryValueDiffEqCore/src/BoundaryValueDiffEqCore.jl
@@ -27,6 +27,7 @@ using SparseMatrixColorings: GreedyColoringAlgorithm
 include("types.jl")
 include("solution_utils.jl")
 include("utils.jl")
+include("internal_problems.jl")
 include("algorithms.jl")
 include("abstract_types.jl")
 include("alg_utils.jl")
diff --git a/lib/BoundaryValueDiffEqCore/src/internal_problems.jl b/lib/BoundaryValueDiffEqCore/src/internal_problems.jl
new file mode 100644
index 000000000..24d782450
--- /dev/null
+++ b/lib/BoundaryValueDiffEqCore/src/internal_problems.jl
@@ -0,0 +1,224 @@
+@inline __default_cost(::Nothing) = (x, p) -> 0.0
+@inline __default_cost(f) = f
+@inline __default_cost(fun::BVPFunction) = __default_cost(fun.cost)
+
+@inline function __extract_lcons_ucons(
+        prob::AbstractBVProblem, ::Type{T}, M, N, bcresid_prototype, f_prototype) where {T}
+    L_f_prototype = length(f_prototype)
+    L_bcresid_prototype = length(bcresid_prototype)
+    lcons = if isnothing(prob.lcons)
+        zeros(T, L_bcresid_prototype + (N - 1)*L_f_prototype)
+    else
+        lcons_length = length(prob.lcons)
+        vcat(prob.lcons, zeros(T, N*M - lcons_length))
+    end
+    ucons = if isnothing(prob.ucons)
+        zeros(T, L_bcresid_prototype + (N - 1)*L_f_prototype)
+    else
+        ucons_length = length(prob.ucons)
+        vcat(prob.ucons, zeros(T, N*M - ucons_length))
+    end
+    return lcons, ucons
+end
+
+@inline function __extract_lcons_ucons(prob::AbstractBVProblem, ::Type{T}, M, N) where {T}
+    lcons = if isnothing(prob.lcons)
+        zeros(T, N*M)
+    else
+        lcons_length = length(prob.lcons)
+        vcat(prob.lcons, zeros(T, N*M - lcons_length))
+    end
+    ucons = if isnothing(prob.ucons)
+        zeros(T, N*M)
+    else
+        ucons_length = length(prob.ucons)
+        vcat(prob.ucons, zeros(T, N*M - ucons_length))
+    end
+    return lcons, ucons
+end
+
+@inline function __extract_lb_ub(prob::AbstractBVProblem, ::Type{T}, M, N) where {T}
+    lb = if isnothing(prob.lb)
+        nothing
+    else
+        repeat(prob.lb, N)
+    end
+    ub = if isnothing(prob.ub)
+        nothing
+    else
+        repeat(prob.ub, N)
+    end
+    return lb, ub
+end
+
+"""
+    __construct_internal_problem
+
+Constructs the internal problem based on the type of the boundary value problem and the
+algorithm used. It returns either a `NonlinearProblem` or an `OptimizationProblem`.
+"""
+function __construct_internal_problem(
+        prob, pt::StandardBVProblem, alg, loss, jac, jac_prototype,
+        resid_prototype, bcresid_prototype, f_prototype, y, p, M::Int, N::Int)
+    T = eltype(y)
+    iip = SciMLBase.isinplace(prob)
+    if !isnothing(alg.nlsolve) || (isnothing(alg.nlsolve) && isnothing(alg.optimize))
+        nlf = NonlinearFunction{iip}(loss; jac = jac, resid_prototype = resid_prototype,
+            jac_prototype = jac_prototype)
+        return __internal_nlsolve_problem(prob, resid_prototype, y, nlf, y, p)
+    else
+        optf = OptimizationFunction{true}(__default_cost(prob.f),
+            AutoSparse(get_dense_ad(alg.jac_alg.nonbc_diffmode),
+                sparsity_detector = __default_sparsity_detector(alg.jac_alg.diffmode)),
+            cons = loss,
+            cons_j = jac,
+            cons_jac_prototype = sparse(jac_prototype))
+        lcons, ucons = __extract_lcons_ucons(prob, T, M, N, bcresid_prototype, f_prototype)
+        lb, ub = __extract_lb_ub(prob, T, M, N)
+
+        return __internal_optimization_problem(
+            prob, optf, y, p; lcons = lcons, ucons = ucons, lb = lb, ub = ub)
+    end
+end
+
+function __construct_internal_problem(
+        prob, pt::TwoPointBVProblem, alg, loss, jac, jac_prototype,
+        resid_prototype, bcresid_prototype, f_prototype, y, p, M::Int, N::Int)
+    T = eltype(y)
+    iip = SciMLBase.isinplace(prob)
+    if !isnothing(alg.nlsolve) || (isnothing(alg.nlsolve) && isnothing(alg.optimize))
+        nlf = NonlinearFunction{iip}(loss; jac = jac, resid_prototype = resid_prototype,
+            jac_prototype = jac_prototype)
+        return __internal_nlsolve_problem(prob, resid_prototype, y, nlf, y, p)
+    else
+        optf = OptimizationFunction{true}(__default_cost(prob.f),
+            AutoSparse(get_dense_ad(alg.jac_alg.diffmode),
+                sparsity_detector = __default_sparsity_detector(alg.jac_alg.diffmode)),
+            cons = loss,
+            cons_j = jac,
+            cons_jac_prototype = sparse(jac_prototype))
+        lcons, ucons = __extract_lcons_ucons(prob, T, M, N, bcresid_prototype, f_prototype)
+        lb, ub = __extract_lb_ub(prob, T, M, N)
+
+        return __internal_optimization_problem(
+            prob, optf, y, p; lcons = lcons, ucons = ucons, lb = lb, ub = ub)
+    end
+end
+
+# Single shooting use diffmode for StandardBVProblem and TwoPointBVProblem
+function __construct_internal_problem(prob, alg, loss, jac, jac_prototype, resid_prototype,
+        bcresid_prototype, f_prototype, y, p, M::Int, N::Int, ::Nothing)
+    T = eltype(y)
+    iip = SciMLBase.isinplace(prob)
+    if !isnothing(alg.nlsolve) || (isnothing(alg.nlsolve) && isnothing(alg.optimize))
+        nlf = NonlinearFunction{iip}(loss; jac = jac, resid_prototype = resid_prototype,
+            jac_prototype = jac_prototype)
+        return __internal_nlsolve_problem(prob, resid_prototype, y, nlf, y, p)
+    else
+        optf = OptimizationFunction{iip}(__default_cost(prob.f),
+            AutoSparse(get_dense_ad(alg.jac_alg.diffmode),
+                sparsity_detector = __default_sparsity_detector(alg.jac_alg.diffmode)),
+            cons = loss,
+            cons_j = jac,
+            cons_jac_prototype = sparse(jac_prototype))
+        lcons, ucons = __extract_lcons_ucons(prob, T, M, N, bcresid_prototype, f_prototype)
+        lb, ub = __extract_lb_ub(prob, T, M, N)
+
+        return __internal_optimization_problem(
+            prob, optf, y, p; lcons = lcons, ucons = ucons, lb = lb, ub = ub)
+    end
+end
+
+# Multiple shooting always use inplace version internal problem constructor
+function __construct_internal_problem(
+        prob, pt::StandardBVProblem, alg, loss, jac, jac_prototype, resid_prototype,
+        bcresid_prototype, f_prototype, y, p, M::Int, N::Int, ::Nothing)
+    T = eltype(y)
+    if !isnothing(alg.nlsolve) || (isnothing(alg.nlsolve) && isnothing(alg.optimize))
+        nlf = NonlinearFunction{true}(loss; jac = jac, resid_prototype = resid_prototype,
+            jac_prototype = jac_prototype)
+        return __internal_nlsolve_problem(prob, resid_prototype, y, nlf, y, p)
+    else
+        optf = OptimizationFunction{true}(__default_cost(prob.f),
+            AutoSparse(get_dense_ad(alg.jac_alg.nonbc_diffmode),
+                sparsity_detector = __default_sparsity_detector(alg.jac_alg.nonbc_diffmode)),
+            cons = loss,
+            cons_j = jac,
+            cons_jac_prototype = sparse(jac_prototype))
+        lcons, ucons = __extract_lcons_ucons(prob, T, M, N, bcresid_prototype, f_prototype)
+        lb, ub = __extract_lb_ub(prob, T, M, N)
+
+        return __internal_optimization_problem(
+            prob, optf, y, p; lcons = lcons, ucons = ucons, lb = lb, ub = ub)
+    end
+end
+function __construct_internal_problem(
+        prob, pt::TwoPointBVProblem, alg, loss, jac, jac_prototype, resid_prototype,
+        bcresid_prototype, f_prototype, y, p, M::Int, N::Int, ::Nothing)
+    T = eltype(y)
+    iip = SciMLBase.isinplace(prob)
+    if !isnothing(alg.nlsolve) || (isnothing(alg.nlsolve) && isnothing(alg.optimize))
+        nlf = NonlinearFunction{iip}(loss; jac = jac, resid_prototype = resid_prototype,
+            jac_prototype = jac_prototype)
+        return __internal_nlsolve_problem(prob, resid_prototype, y, nlf, y, p)
+    else
+        optf = OptimizationFunction{true}(__default_cost(prob.f),
+            AutoSparse(get_dense_ad(alg.jac_alg.diffmode),
+                sparsity_detector = __default_sparsity_detector(alg.jac_alg.nonbc_diffmode)),
+            cons = loss,
+            cons_j = jac,
+            cons_jac_prototype = sparse(jac_prototype))
+        lcons, ucons = __extract_lcons_ucons(prob, T, M, N, bcresid_prototype, f_prototype)
+        lb, ub = __extract_lb_ub(prob, T, M, N)
+
+        return __internal_optimization_problem(
+            prob, optf, y, p; lcons = lcons, ucons = ucons, lb = lb, ub = ub)
+    end
+end
+
+# SecondOrderBVProblem
+function __construct_internal_problem(
+        prob, pt::StandardSecondOrderBVProblem, alg, loss, jac,
+        jac_prototype, resid_prototype, y, p, M::Int, N::Int)
+    T = eltype(y)
+    if !isnothing(alg.nlsolve) || (isnothing(alg.nlsolve) && isnothing(alg.optimize))
+        nlf = NonlinearFunction{true}(loss; jac = jac, resid_prototype = resid_prototype,
+            jac_prototype = jac_prototype)
+        return __internal_nlsolve_problem(prob, resid_prototype, y, nlf, y, p)
+    else
+        optf = OptimizationFunction{true}(__default_cost(prob.f),
+            AutoSparse(get_dense_ad(alg.jac_alg.nonbc_diffmode),
+                sparsity_detector = __default_sparsity_detector(alg.jac_alg.nonbc_diffmode)),
+            cons = loss,
+            cons_j = jac,
+            cons_jac_prototype = sparse(jac_prototype))
+        lcons, ucons = __extract_lcons_ucons(prob, T, M, N)
+        lb, ub = __extract_lb_ub(prob, T, M, N)
+
+        return __internal_optimization_problem(
+            prob, optf, y, p; lcons = lcons, ucons = ucons, lb = lb, ub = ub)
+    end
+end
+function __construct_internal_problem(
+        prob, pt::TwoPointSecondOrderBVProblem, alg, loss, jac,
+        jac_prototype, resid_prototype, y, p, M::Int, N::Int)
+    T = eltype(y)
+    iip = SciMLBase.isinplace(prob)
+    if !isnothing(alg.nlsolve) || (isnothing(alg.nlsolve) && isnothing(alg.optimize))
+        nlf = NonlinearFunction{iip}(loss; jac = jac, resid_prototype = resid_prototype,
+            jac_prototype = jac_prototype)
+        return __internal_nlsolve_problem(prob, resid_prototype, y, nlf, y, p)
+    else
+        optf = OptimizationFunction{true}(__default_cost(prob.f),
+            AutoSparse(get_dense_ad(alg.jac_alg.diffmode),
+                sparsity_detector = __default_sparsity_detector(alg.jac_alg.nonbc_diffmode)),
+            cons = loss,
+            cons_j = jac,
+            cons_jac_prototype = sparse(jac_prototype))
+        lcons, ucons = __extract_lcons_ucons(prob, T, M, N)
+        lb, ub = __extract_lb_ub(prob, T, M, N)
+
+        return __internal_optimization_problem(
+            prob, optf, y, p; lcons = lcons, ucons = ucons, lb = lb, ub = ub)
+    end
+end
diff --git a/lib/BoundaryValueDiffEqCore/src/utils.jl b/lib/BoundaryValueDiffEqCore/src/utils.jl
index bbe3cd044..e0ffdccc6 100644
--- a/lib/BoundaryValueDiffEqCore/src/utils.jl
+++ b/lib/BoundaryValueDiffEqCore/src/utils.jl
@@ -620,167 +620,3 @@ end
 @inline __concrete_kwargs(::Nothing, optimize, nlsolve_kwargs, optimize_kwargs) = (;) # Doesn't support for now
 @inline __concrete_kwargs(::Nothing, ::Nothing, nlsolve_kwargs, optimize_kwargs) = (;
     nlsolve_kwargs...)
-
-## Optimization solver related utils ##
-
-@inline __default_cost(::Nothing) = (x, p) -> 0.0
-@inline __default_cost(f) = f
-@inline __default_cost(fun::BVPFunction) = __default_cost(fun.cost)
-
-@inline function __extract_lcons_ucons(
-        prob::AbstractBVProblem, ::Type{T}, M, N, bcresid_prototype, f_prototype) where {T}
-    L_f_prototype = length(f_prototype)
-    L_bcresid_prototype = length(bcresid_prototype)
-    lcons = if isnothing(prob.lcons)
-        zeros(T, L_bcresid_prototype + (N - 1)*L_f_prototype)
-    else
-        lcons_length = length(prob.lcons)
-        vcat(prob.lcons, zeros(T, N*M - lcons_length))
-    end
-    ucons = if isnothing(prob.ucons)
-        zeros(T, L_bcresid_prototype + (N - 1)*L_f_prototype)
-    else
-        ucons_length = length(prob.ucons)
-        vcat(prob.ucons, zeros(T, N*M - ucons_length))
-    end
-    return lcons, ucons
-end
-
-@inline function __extract_lb_ub(prob::AbstractBVProblem, ::Type{T}, M, N) where {T}
-    lb = if isnothing(prob.lb)
-        nothing
-    else
-        repeat(prob.lb, N)
-    end
-    ub = if isnothing(prob.ub)
-        nothing
-    else
-        repeat(prob.ub, N)
-    end
-    return lb, ub
-end
-
-"""
-    __construct_internal_problem
-
-Constructs the internal problem based on the type of the boundary value problem and the
-algorithm used. It returns either a `NonlinearProblem` or an `OptimizationProblem`.
-"""
-function __construct_internal_problem(
-        prob, pt::StandardBVProblem, alg, loss, jac, jac_prototype,
-        resid_prototype, bcresid_prototype, f_prototype, y, p, M::Int, N::Int)
-    T = eltype(y)
-    iip = SciMLBase.isinplace(prob)
-    if !isnothing(alg.nlsolve) || (isnothing(alg.nlsolve) && isnothing(alg.optimize))
-        nlf = NonlinearFunction{iip}(loss; jac = jac, resid_prototype = resid_prototype,
-            jac_prototype = jac_prototype)
-        return __internal_nlsolve_problem(prob, resid_prototype, y, nlf, y, p)
-    else
-        optf = OptimizationFunction{true}(__default_cost(prob.f),
-            AutoSparse(get_dense_ad(alg.jac_alg.nonbc_diffmode),
-                sparsity_detector = __default_sparsity_detector(alg.jac_alg.diffmode)),
-            cons = loss,
-            cons_j = jac,
-            cons_jac_prototype = sparse(jac_prototype))
-        lcons, ucons = __extract_lcons_ucons(prob, T, M, N, bcresid_prototype, f_prototype)
-        lb, ub = __extract_lb_ub(prob, T, M, N)
-
-        return __internal_optimization_problem(
-            prob, optf, y, p; lcons = lcons, ucons = ucons, lb = lb, ub = ub)
-    end
-end
-
-function __construct_internal_problem(
-        prob, pt::TwoPointBVProblem, alg, loss, jac, jac_prototype,
-        resid_prototype, bcresid_prototype, f_prototype, y, p, M::Int, N::Int)
-    T = eltype(y)
-    iip = SciMLBase.isinplace(prob)
-    if !isnothing(alg.nlsolve) || (isnothing(alg.nlsolve) && isnothing(alg.optimize))
-        nlf = NonlinearFunction{iip}(loss; jac = jac, resid_prototype = resid_prototype,
-            jac_prototype = jac_prototype)
-        return __internal_nlsolve_problem(prob, resid_prototype, y, nlf, y, p)
-    else
-        optf = OptimizationFunction{true}(__default_cost(prob.f),
-            AutoSparse(get_dense_ad(alg.jac_alg.diffmode),
-                sparsity_detector = __default_sparsity_detector(alg.jac_alg.diffmode)),
-            cons = loss,
-            cons_j = jac,
-            cons_jac_prototype = sparse(jac_prototype))
-        lcons, ucons = __extract_lcons_ucons(prob, T, M, N, bcresid_prototype, f_prototype)
-        lb, ub = __extract_lb_ub(prob, T, M, N)
-
-        return __internal_optimization_problem(
-            prob, optf, y, p; lcons = lcons, ucons = ucons, lb = lb, ub = ub)
-    end
-end
-
-# Single shooting use diffmode for StandardBVProblem and TwoPointBVProblem
-function __construct_internal_problem(prob, alg, loss, jac, jac_prototype, resid_prototype,
-        bcresid_prototype, f_prototype, y, p, M::Int, N::Int, ::Nothing)
-    T = eltype(y)
-    iip = SciMLBase.isinplace(prob)
-    if !isnothing(alg.nlsolve) || (isnothing(alg.nlsolve) && isnothing(alg.optimize))
-        nlf = NonlinearFunction{iip}(loss; jac = jac, resid_prototype = resid_prototype,
-            jac_prototype = jac_prototype)
-        return __internal_nlsolve_problem(prob, resid_prototype, y, nlf, y, p)
-    else
-        optf = OptimizationFunction{iip}(__default_cost(prob.f),
-            AutoSparse(get_dense_ad(alg.jac_alg.diffmode),
-                sparsity_detector = __default_sparsity_detector(alg.jac_alg.diffmode)),
-            cons = loss,
-            cons_j = jac,
-            cons_jac_prototype = sparse(jac_prototype))
-        lcons, ucons = __extract_lcons_ucons(prob, T, M, N, bcresid_prototype, f_prototype)
-        lb, ub = __extract_lb_ub(prob, T, M, N)
-
-        return __internal_optimization_problem(
-            prob, optf, y, p; lcons = lcons, ucons = ucons, lb = lb, ub = ub)
-    end
-end
-
-# Multiple shooting always use inplace version internal problem constructor
-function __construct_internal_problem(
-        prob, pt::StandardBVProblem, alg, loss, jac, jac_prototype, resid_prototype,
-        bcresid_prototype, f_prototype, y, p, M::Int, N::Int, ::Nothing)
-    T = eltype(y)
-    if !isnothing(alg.nlsolve) || (isnothing(alg.nlsolve) && isnothing(alg.optimize))
-        nlf = NonlinearFunction{true}(loss; jac = jac, resid_prototype = resid_prototype,
-            jac_prototype = jac_prototype)
-        return __internal_nlsolve_problem(prob, resid_prototype, y, nlf, y, p)
-    else
-        optf = OptimizationFunction{true}(__default_cost(prob.f),
-            AutoSparse(get_dense_ad(alg.jac_alg.nonbc_diffmode),
-                sparsity_detector = __default_sparsity_detector(alg.jac_alg.nonbc_diffmode)),
-            cons = loss,
-            cons_j = jac,
-            cons_jac_prototype = sparse(jac_prototype))
-        lcons, ucons = __extract_lcons_ucons(prob, T, M, N, bcresid_prototype, f_prototype)
-        lb, ub = __extract_lb_ub(prob, T, M, N)
-
-        return __internal_optimization_problem(
-            prob, optf, y, p; lcons = lcons, ucons = ucons, lb = lb, ub = ub)
-    end
-end
-function __construct_internal_problem(
-        prob, pt::TwoPointBVProblem, alg, loss, jac, jac_prototype, resid_prototype,
-        bcresid_prototype, f_prototype, y, p, M::Int, N::Int, ::Nothing)
-    T = eltype(y)
-    iip = SciMLBase.isinplace(prob)
-    if !isnothing(alg.nlsolve) || (isnothing(alg.nlsolve) && isnothing(alg.optimize))
-        nlf = NonlinearFunction{iip}(loss; jac = jac, resid_prototype = resid_prototype,
-            jac_prototype = jac_prototype)
-        return __internal_nlsolve_problem(prob, resid_prototype, y, nlf, y, p)
-    else
-        optf = OptimizationFunction{true}(__default_cost(prob.f),
-            AutoSparse(get_dense_ad(alg.jac_alg.diffmode),
-                sparsity_detector = __default_sparsity_detector(alg.jac_alg.nonbc_diffmode)),
-            cons = loss,
-            cons_j = jac,
-            cons_jac_prototype = sparse(jac_prototype))
-        lcons, ucons = __extract_lcons_ucons(prob, T, M, N, bcresid_prototype, f_prototype)
-        lb, ub = __extract_lb_ub(prob, T, M, N)
-
-        return __internal_optimization_problem(
-            prob, optf, y, p; lcons = lcons, ucons = ucons, lb = lb, ub = ub)
-    end
-end

From 8de6efcaa0ec296677c730744a85e1d90858a405 Mon Sep 17 00:00:00 2001
From: Qingyu Qu <2283984853@qq.com>
Date: Tue, 18 Nov 2025 17:54:38 +0800
Subject: [PATCH 09/11] Add cart-pole swing up example

---
 docs/src/tutorials/optimal_control.md | 143 ++++++++++++++++++++++++++
 1 file changed, 143 insertions(+)

diff --git a/docs/src/tutorials/optimal_control.md b/docs/src/tutorials/optimal_control.md
index decfcb6e6..da72f1003 100644
--- a/docs/src/tutorials/optimal_control.md
+++ b/docs/src/tutorials/optimal_control.md
@@ -102,3 +102,146 @@ plot(p1, p2, p3, p4, layout = (2, 2))
 ```
 
 Similar optimal control problem solving can also be deployed in JuMP.jl and InfiniteOpt.jl.
+
+## Cart-Pole Optimal Control
+
+The dynamic equation of the motion of cart-pole swing-up problem are given by:
+
+$$
+
+
+\begin{bmatrix}
+\ddot{x} \\
+\ddot{\theta}
+\end{bmatrix}
+=
+\begin{bmatrix}
+\cos\theta & \ell \\
+m_1 + m_2 & m_2 \ell \cos\theta
+\end{bmatrix}^{-1}
+\begin{bmatrix}
+- g \sin\theta \\
+F + m_2 \ell \dot{\theta}^2 \sin\theta
+\end{bmatrix}
+$$
+
+where $x$ is the location of the cart, $\theta$ is the pole angle, $m_1$ is the cart mass, $m_2$ is the pole mass, $l$ is the pole length.
+
+By converting the dynamics to first order equations, we can get the formulation:
+
+$$
+\begin{bmatrix}
+\dot{x} \\
+\dot{\theta} \\
+\ddot{x} \\
+\ddot{\theta} \\
+\dot{e}
+\end{bmatrix}
+= 
+f\!\left(
+\begin{bmatrix}
+x \\ \theta \\ \dot{x} \\ \dot{\theta} \\ e
+\end{bmatrix}
+\right)
+=
+\begin{bmatrix}
+\dot{x} \\
+\dot{\theta} \\
+\dfrac{-m_2 g \sin\theta \cos\theta - \left(F + m_2 \ell \dot{\theta}^2 \sin\theta\right)}
+{m_2 \cos^2\theta - (m_1 + m_2)} \\
+\dfrac{(m_1 + m_2) g \sin\theta + \cos\theta \left(F + m_2 \ell \dot{\theta}^2 \sin\theta\right)}
+{m_2 \ell \cos^2\theta - (m_1 + m_2)\ell} \\
+F^2
+\end{bmatrix}
+$$
+
+and the initial conditions of all states at $t=0$ are all zero, the boundary conditions at time $t_f$ are:
+
+$$
+x_f=d, \dot{x_f}=0, \theta_f=\pi, \dot{\theta_f}=0
+$$
+
+The target cost function is defined as the "energy" 
+
+```julia
+using BoundaryValueDiffEqMIRK, OptimizationMOI, Ipopt, Plots
+m_1 = 1.0                      # Cart mass
+m_2 = 0.3                      # Pole mass
+l = 0.5                        # Pole length
+d = 2.0                        # Cart target location
+t_0 = 0.0                      # Start time
+t_f = 2.0                      # Final time
+g = 9.81                       # Gravity constant
+tspan = (0.0, t_f)
+function cart_pole!(du, u, p, t)
+	x, θ, dx, dθ, f = u[1], u[2], u[3], u[4], u[5]
+	du[1] = dx
+	du[2] = dθ
+	du[3] = (- m_2*g*sin(θ)*cos(θ) - (f + m_2*l*θ^2*sin(θ))) / (m_2*l*cos(θ)^2 - m_1 - m_2)
+	du[4] = ((m_1 + m_2)*g*sin(θ) + cos(θ)*(f + m_1*l*dθ^2*sin(θ))) / (m_2*l*cos(θ)^2 - (m_1 + m_2)*l)
+end
+
+function cart_pole_bc!(du, u, p, t)
+	du[1] = u(t_f)[1] - d
+	du[2] = u(t_f)[2] - π
+	du[3] = u(t_0)[2] - 0.0
+	du[4] = u(t_0)[3] - 0.0
+	du[5] = u(t_0)[4] - 0.0
+	du[6] = u(t_f)[3] - 0.0
+	du[7] = u(t_f)[4] - 0.0
+end
+cost_fun(u, p) = u[end]
+u0 = [0.0, 0.0, 0.0, 0.0, 10.0]
+cart_pole_fun = BVPFunction(cart_pole!, cart_pole_bc!; cost = cost_fun, bcresid_prototype = zeros(7), f_prototype = zeros(4))
+cart_pole_prob = BVProblem(cart_pole_fun, u0, tspan; lb = [-2.0, -Inf, -Inf, -Inf, -20.0], ub = [2.0, Inf, Inf, Inf, 20.0])
+sol = solve(cart_pole_prob, MIRK4(; optimize = Ipopt.Optimizer()); dt = 0.01, adaptive = false)
+
+t = sol.t
+x, theta, dx, dtheta, f = sol[1, :], sol[2, :], sol[3, :], sol[4, :], sol[5, :]
+
+L      = 1.0    # pole length (visual)
+cart_w = 0.4    # cart width
+cart_h = 0.2    # cart height
+
+# Precompute pole tip coordinates
+px = x .+ L .* sin.(theta)
+py = cart_h/2 .- L .* cos.(theta)
+
+# Axis limits (a bit margin around the cart trajectory)
+xmin = minimum(x) - 2L
+xmax = maximum(x) + 2L
+ymin = -2.0
+ymax = 2.0
+
+anim = @animate for k in eachindex(t)
+    cart_x = x[k]
+    pole_x = px[k]
+    pole_y = py[k]
+
+    # Base plot / axis
+    plot(; xlim=(xmin, xmax), ylim=(ymin, ymax),
+         aspect_ratio=:equal, legend=false,
+         title = "Cart–Pole (t = $(round(t[k], digits=2)) s)")
+
+    # Draw ground
+    plot!([xmin, xmax], [0, 0], lw=2, color=:black)
+
+    # Draw cart as a rectangle
+    rect = Shape(
+        [cart_x - cart_w/2, cart_x + cart_w/2, cart_x + cart_w/2, cart_x - cart_w/2],
+        [0, 0, cart_h, cart_h]
+    )
+    plot!(rect, color=:gray)
+
+    # Draw pole as a line from cart center to tip
+    plot!([cart_x, pole_x], [cart_h/2, pole_y], lw=3, color=:red)
+
+    # Draw pivot point
+    scatter!([cart_x], [cart_h/2], ms=4, color=:black)
+end
+
+# Save GIF
+gif(anim, "./cart_pole.gif", fps=40)
+```
+
+![cart_pole](./cart_pole.gif)
\ No newline at end of file

From 0777c90ab5eb9f0b08595b47b9afd56593da1a93 Mon Sep 17 00:00:00 2001
From: Qingyu Qu <2283984853@qq.com>
Date: Tue, 18 Nov 2025 17:54:59 +0800
Subject: [PATCH 10/11] format

---
 docs/src/tutorials/optimal_control.md | 77 ++++++++++++++-------------
 1 file changed, 39 insertions(+), 38 deletions(-)

diff --git a/docs/src/tutorials/optimal_control.md b/docs/src/tutorials/optimal_control.md
index da72f1003..b7631bc1f 100644
--- a/docs/src/tutorials/optimal_control.md
+++ b/docs/src/tutorials/optimal_control.md
@@ -109,27 +109,27 @@ The dynamic equation of the motion of cart-pole swing-up problem are given by:
 
 $$
 
-
-\begin{bmatrix}
+# \begin{bmatrix}
 \ddot{x} \\
 \ddot{\theta}
 \end{bmatrix}
-=
+
 \begin{bmatrix}
 \cos\theta & \ell \\
 m_1 + m_2 & m_2 \ell \cos\theta
 \end{bmatrix}^{-1}
 \begin{bmatrix}
-- g \sin\theta \\
-F + m_2 \ell \dot{\theta}^2 \sin\theta
-\end{bmatrix}
-$$
+
+  - g \sin\theta \\
+    F + m_2 \ell \dot{\theta}^2 \sin\theta
+    \end{bmatrix}
+    $$
 
 where $x$ is the location of the cart, $\theta$ is the pole angle, $m_1$ is the cart mass, $m_2$ is the pole mass, $l$ is the pole length.
 
 By converting the dynamics to first order equations, we can get the formulation:
 
-$$
+# $$
 \begin{bmatrix}
 \dot{x} \\
 \dot{\theta} \\
@@ -137,13 +137,13 @@ $$
 \ddot{\theta} \\
 \dot{e}
 \end{bmatrix}
-= 
-f\!\left(
+
+# f\!\left(
 \begin{bmatrix}
 x \\ \theta \\ \dot{x} \\ \dot{\theta} \\ e
 \end{bmatrix}
 \right)
-=
+
 \begin{bmatrix}
 \dot{x} \\
 \dot{\theta} \\
@@ -161,7 +161,7 @@ $$
 x_f=d, \dot{x_f}=0, \theta_f=\pi, \dot{\theta_f}=0
 $$
 
-The target cost function is defined as the "energy" 
+The target cost function is defined as the "energy"
 
 ```julia
 using BoundaryValueDiffEqMIRK, OptimizationMOI, Ipopt, Plots
@@ -174,32 +174,35 @@ t_f = 2.0                      # Final time
 g = 9.81                       # Gravity constant
 tspan = (0.0, t_f)
 function cart_pole!(du, u, p, t)
-	x, θ, dx, dθ, f = u[1], u[2], u[3], u[4], u[5]
-	du[1] = dx
-	du[2] = dθ
-	du[3] = (- m_2*g*sin(θ)*cos(θ) - (f + m_2*l*θ^2*sin(θ))) / (m_2*l*cos(θ)^2 - m_1 - m_2)
-	du[4] = ((m_1 + m_2)*g*sin(θ) + cos(θ)*(f + m_1*l*dθ^2*sin(θ))) / (m_2*l*cos(θ)^2 - (m_1 + m_2)*l)
+    x, θ, dx, dθ, f = u[1], u[2], u[3], u[4], u[5]
+    du[1] = dx
+    du[2] = dθ
+    du[3] = (- m_2*g*sin(θ)*cos(θ) - (f + m_2*l*θ^2*sin(θ))) / (m_2*l*cos(θ)^2 - m_1 - m_2)
+    du[4] = ((m_1 + m_2)*g*sin(θ) + cos(θ)*(f + m_1*l*dθ^2*sin(θ))) /
+            (m_2*l*cos(θ)^2 - (m_1 + m_2)*l)
 end
 
 function cart_pole_bc!(du, u, p, t)
-	du[1] = u(t_f)[1] - d
-	du[2] = u(t_f)[2] - π
-	du[3] = u(t_0)[2] - 0.0
-	du[4] = u(t_0)[3] - 0.0
-	du[5] = u(t_0)[4] - 0.0
-	du[6] = u(t_f)[3] - 0.0
-	du[7] = u(t_f)[4] - 0.0
+    du[1] = u(t_f)[1] - d
+    du[2] = u(t_f)[2] - π
+    du[3] = u(t_0)[2] - 0.0
+    du[4] = u(t_0)[3] - 0.0
+    du[5] = u(t_0)[4] - 0.0
+    du[6] = u(t_f)[3] - 0.0
+    du[7] = u(t_f)[4] - 0.0
 end
 cost_fun(u, p) = u[end]
 u0 = [0.0, 0.0, 0.0, 0.0, 10.0]
-cart_pole_fun = BVPFunction(cart_pole!, cart_pole_bc!; cost = cost_fun, bcresid_prototype = zeros(7), f_prototype = zeros(4))
-cart_pole_prob = BVProblem(cart_pole_fun, u0, tspan; lb = [-2.0, -Inf, -Inf, -Inf, -20.0], ub = [2.0, Inf, Inf, Inf, 20.0])
+cart_pole_fun = BVPFunction(cart_pole!, cart_pole_bc!; cost = cost_fun,
+    bcresid_prototype = zeros(7), f_prototype = zeros(4))
+cart_pole_prob = BVProblem(cart_pole_fun, u0, tspan; lb = [-2.0, -Inf, -Inf, -Inf, -20.0],
+    ub = [2.0, Inf, Inf, Inf, 20.0])
 sol = solve(cart_pole_prob, MIRK4(; optimize = Ipopt.Optimizer()); dt = 0.01, adaptive = false)
 
 t = sol.t
 x, theta, dx, dtheta, f = sol[1, :], sol[2, :], sol[3, :], sol[4, :], sol[5, :]
 
-L      = 1.0    # pole length (visual)
+L = 1.0    # pole length (visual)
 cart_w = 0.4    # cart width
 cart_h = 0.2    # cart height
 
@@ -219,29 +222,27 @@ anim = @animate for k in eachindex(t)
     pole_y = py[k]
 
     # Base plot / axis
-    plot(; xlim=(xmin, xmax), ylim=(ymin, ymax),
-         aspect_ratio=:equal, legend=false,
-         title = "Cart–Pole (t = $(round(t[k], digits=2)) s)")
+    plot(; xlim = (xmin, xmax), ylim = (ymin, ymax), aspect_ratio = :equal,
+        legend = false, title = "Cart–Pole (t = $(round(t[k], digits=2)) s)")
 
     # Draw ground
-    plot!([xmin, xmax], [0, 0], lw=2, color=:black)
+    plot!([xmin, xmax], [0, 0], lw = 2, color = :black)
 
     # Draw cart as a rectangle
     rect = Shape(
         [cart_x - cart_w/2, cart_x + cart_w/2, cart_x + cart_w/2, cart_x - cart_w/2],
-        [0, 0, cart_h, cart_h]
-    )
-    plot!(rect, color=:gray)
+        [0, 0, cart_h, cart_h])
+    plot!(rect, color = :gray)
 
     # Draw pole as a line from cart center to tip
-    plot!([cart_x, pole_x], [cart_h/2, pole_y], lw=3, color=:red)
+    plot!([cart_x, pole_x], [cart_h/2, pole_y], lw = 3, color = :red)
 
     # Draw pivot point
-    scatter!([cart_x], [cart_h/2], ms=4, color=:black)
+    scatter!([cart_x], [cart_h/2], ms = 4, color = :black)
 end
 
 # Save GIF
-gif(anim, "./cart_pole.gif", fps=40)
+gif(anim, "./cart_pole.gif", fps = 40)
 ```
 
-![cart_pole](./cart_pole.gif)
\ No newline at end of file
+![cart_pole](./cart_pole.gif)

From e9dfef1d2ce172044dd5443d8cc06b72cdfbccd0 Mon Sep 17 00:00:00 2001
From: Qingyu Qu <2283984853@qq.com>
Date: Tue, 18 Nov 2025 17:56:37 +0800
Subject: [PATCH 11/11] Fix typo

---
 docs/src/tutorials/optimal_control.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/src/tutorials/optimal_control.md b/docs/src/tutorials/optimal_control.md
index b7631bc1f..f14a1ab14 100644
--- a/docs/src/tutorials/optimal_control.md
+++ b/docs/src/tutorials/optimal_control.md
@@ -109,7 +109,7 @@ The dynamic equation of the motion of cart-pole swing-up problem are given by:
 
 $$
 
-# \begin{bmatrix}
+\begin{bmatrix}
 \ddot{x} \\
 \ddot{\theta}
 \end{bmatrix}
@@ -129,7 +129,7 @@ where $x$ is the location of the cart, $\theta$ is the pole angle, $m_1$ is the
 
 By converting the dynamics to first order equations, we can get the formulation:
 
-# $$
+$$
 \begin{bmatrix}
 \dot{x} \\
 \dot{\theta} \\
@@ -138,7 +138,7 @@ By converting the dynamics to first order equations, we can get the formulation:
 \dot{e}
 \end{bmatrix}
 
-# f\!\left(
+f\!\left(
 \begin{bmatrix}
 x \\ \theta \\ \dot{x} \\ \dot{\theta} \\ e
 \end{bmatrix}