diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 2885d51b7..f6d09bd31 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -13,6 +13,7 @@ jobs: runs-on: ${{ matrix.os }} env: JULIA_NUM_THREADS: 8 + JULIA_EXTENDED_TESTS: true strategy: fail-fast: false matrix: diff --git a/src/StructuralEquationModels.jl b/src/StructuralEquationModels.jl index 49f5695c2..1ab585927 100644 --- a/src/StructuralEquationModels.jl +++ b/src/StructuralEquationModels.jl @@ -11,6 +11,7 @@ export *, ==, @StenoGraph, AbstractEdge, AbstractNode, DirectedEdge, Edge, EdgeM # type hierarchy include("types.jl") +include("objective_gradient_hessian.jl") # fitted objects include("frontend/fit/SemFit.jl") # specification of models @@ -76,21 +77,21 @@ include("frontend/fit/standard_errors/bootstrap.jl") export AbstractSem, AbstractSemSingle, AbstractSemCollection, Sem, SemFiniteDiff, SemForwardDiff, SemEnsemble, SemImply, - RAMSymbolic, RAM, ImplyEmpty, + RAMSymbolic, RAM, ImplyEmpty, imply, start_val, start_fabin3, start_simple, start_parameter_table, SemLoss, SemLossFunction, SemML, SemFIML, em_mvn, SemLasso, SemRidge, - SemConstant, SemWLS, + SemConstant, SemWLS, loss, SemDiff, - SemDiffEmpty, SemDiffOptim, SemDiffNLopt, NLoptConstraint, + SemDiffEmpty, SemDiffOptim, SemDiffNLopt, NLoptConstraint, diff, SemObs, - SemObsCommon, SemObsMissing, + SemObsCommon, SemObsMissing, observed, sem_fit, SemFit, minimum, solution, sem_summary, - objective, objective!, gradient, gradient!, hessian, hessian!, objective_gradient!, + objective!, gradient!, hessian!, objective_gradient!, objective_hessian!, gradient_hessian!, objective_gradient_hessian!, ParameterTable, EnsembleParameterTable, update_partable!, update_estimate!, update_start!, Fixed, fixed, Start, start, Label, label, diff --git a/src/frontend/fit/standard_errors/hessian.jl b/src/frontend/fit/standard_errors/hessian.jl index 8774637a1..56329633e 100644 --- a/src/frontend/fit/standard_errors/hessian.jl +++ b/src/frontend/fit/standard_errors/hessian.jl @@ -7,12 +7,12 @@ function se_hessian(sem_fit::SemFit; hessian = :finitediff) c = H_scaling(sem_fit.model) if hessian == :analytic - H = hessian!(sem_fit.model, sem_fit.solution) - elseif hessian == :analytic_last - H = hessian(sem_fit.model) + par = solution(sem_fit) + H = zeros(eltype(par), length(par), length(par)) + hessian!(H, sem_fit.model, sem_fit.solution) elseif hessian == :finitediff H = FiniteDiff.finite_difference_hessian( - x -> objective!(sem_fit.model, x)[1], + x -> objective!(sem_fit.model, x), sem_fit.solution ) elseif hessian == :optimizer diff --git a/src/frontend/specification/Sem.jl b/src/frontend/specification/Sem.jl index 1f07076ed..a6d9a8ec7 100644 --- a/src/frontend/specification/Sem.jl +++ b/src/frontend/specification/Sem.jl @@ -34,7 +34,7 @@ function SemFiniteDiff(; observed, imply, loss, diff = get_fields!(kwargs, observed, imply, loss, diff) - sem = SemFiniteDiff(observed, imply, loss, diff, has_gradient) + sem = SemFiniteDiff(observed, imply, loss, diff, Val(has_gradient)) return sem end @@ -53,7 +53,7 @@ function SemForwardDiff(; observed, imply, loss, diff = get_fields!(kwargs, observed, imply, loss, diff) - sem = SemForwardDiff(observed, imply, loss, diff, has_gradient) + sem = SemForwardDiff(observed, imply, loss, diff, Val(has_gradient)) return sem end @@ -121,6 +121,8 @@ function get_SemLoss(loss; kwargs...) else if !isa(loss, SemLossFunction) loss = SemLoss(loss(;kwargs...); kwargs...) + else + loss = SemLoss(loss; kwargs...) end end return loss @@ -178,10 +180,14 @@ function Base.show(io::IO, loss::SemLoss) print(io, "SemLoss \n") print(io, "- Loss Functions \n") print(io, lossfuntypes...) - print(io, "- Fields \n") - print(io, " F: $(typeof(loss.F))) \n") - print(io, " G: $(typeof(loss.G))) \n") - print(io, " H: $(typeof(loss.H))) \n") + print(io, "- Weights \n") + for weight in loss.weights + if isnothing(weight.w) + print(io, " one \n") + else + print(io, "$(round.(weight.w, digits = 2)) \n") + end + end end function Base.show(io::IO, models::SemEnsemble) diff --git a/src/imply/RAM/generic.jl b/src/imply/RAM/generic.jl index 94a6014f3..bc294d459 100644 --- a/src/imply/RAM/generic.jl +++ b/src/imply/RAM/generic.jl @@ -2,7 +2,7 @@ ### Types ############################################################################ -mutable struct RAM{A1, A2, A3, A4, A5, A6, V, V2, I1, I2, I3, M1, M2, M3, S1, S2, S3, D} <: SemImply +mutable struct RAM{A1, A2, A3, A4, A5, A6, V, V2, I1, I2, I3, M1, M2, M3, M4, S1, S2, S3, B, D} <: SemImply Σ::A1 A::A2 S::A3 @@ -12,6 +12,7 @@ mutable struct RAM{A1, A2, A3, A4, A5, A6, V, V2, I1, I2, I3, M1, M2, M3, S1, S2 n_par::V ram_matrices::V2 + has_meanstructure::B A_indices::I1 S_indices::I2 @@ -20,6 +21,7 @@ mutable struct RAM{A1, A2, A3, A4, A5, A6, V, V2, I1, I2, I3, M1, M2, M3, S1, S2 F⨉I_A⁻¹::M1 F⨉I_A⁻¹S::M2 I_A::M3 + I_A⁻¹::M4 ∇A::S1 ∇S::S2 @@ -36,6 +38,7 @@ function RAM(; specification, vech = false, gradient = true, + meanstructure = false, kwargs...) if specification isa RAMMatrices @@ -84,7 +87,9 @@ function RAM(; end # μ - if !isnothing(M_indices) + if meanstructure + + has_meanstructure = Val(true) if gradient ∇M = get_matrix_derivative(M_indices, parameters, n_nod) @@ -95,6 +100,7 @@ function RAM(; μ = zeros(n_var) else + has_meanstructure = Val(false) M_indices = nothing M_pre = nothing μ = nothing @@ -111,6 +117,7 @@ function RAM(; n_par, ram_matrices, + has_meanstructure, A_indices, S_indices, @@ -119,6 +126,7 @@ function RAM(; F⨉I_A⁻¹, F⨉I_A⁻¹S, I_A, + copy(I_A), ∇A, ∇S, @@ -129,29 +137,31 @@ function RAM(; end ############################################################################ -### functors +### methods ############################################################################ -function (imply::RAM)(parameters, F, G, H, model) +# dispatch on meanstructure +objective!(imply::RAM, par, model) = + objective!(imply, par, model, imply.has_meanstructure) +gradient!(imply::RAM, par, model) = + gradient!(imply, par, model, imply.has_meanstructure) +# objective and gradient +function objective!(imply::RAM, parameters, model, has_meanstructure::Val{T}) where T + fill_A_S_M( - imply.A, + imply.A, imply.S, imply.M, imply.A_indices, imply.S_indices, imply.M_indices, parameters) - + imply.I_A .= I - imply.A - - if !G - copyto!(imply.F⨉I_A⁻¹, imply.F) - rdiv!(imply.F⨉I_A⁻¹, factorize(imply.I_A)) - else - imply.I_A .= LinearAlgebra.inv!(factorize(imply.I_A)) - imply.F⨉I_A⁻¹ .= imply.F*imply.I_A - end + + copyto!(imply.F⨉I_A⁻¹, imply.F) + rdiv!(imply.F⨉I_A⁻¹, factorize(imply.I_A)) Σ_RAM!( imply.Σ, @@ -159,12 +169,46 @@ function (imply::RAM)(parameters, F, G, H, model) imply.S, imply.F⨉I_A⁻¹S) - if !isnothing(imply.μ) + if T μ_RAM!(imply.μ, imply.F⨉I_A⁻¹, imply.M) end + +end + +function gradient!(imply::RAM, parameters, model, has_meanstructure::Val{T}) where T + fill_A_S_M( + imply.A, + imply.S, + imply.M, + imply.A_indices, + imply.S_indices, + imply.M_indices, + parameters) + + imply.I_A .= I - imply.A + copyto!(imply.I_A⁻¹, imply.I_A) + + imply.I_A⁻¹ .= LinearAlgebra.inv!(factorize(imply.I_A⁻¹)) + imply.F⨉I_A⁻¹ .= imply.F*imply.I_A⁻¹ + + Σ_RAM!( + imply.Σ, + imply.F⨉I_A⁻¹, + imply.S, + imply.F⨉I_A⁻¹S) + + if T + μ_RAM!(imply.μ, imply.F⨉I_A⁻¹, imply.M) + end + end +objective_gradient!(imply::RAM, par, model, has_meanstructure) = gradient!(imply, par, model, has_meanstructure) +objective_hessian!(imply::RAM, par, model, has_meanstructure) = gradient!(imply, par, model, has_meanstructure) +gradient_hessian!(imply::RAM, par, model, has_meanstructure) = gradient!(imply, par, model, has_meanstructure) +objective_gradient_hessian!(imply::RAM, par, model, has_meanstructure) = gradient!(imply, par, model, has_meanstructure) + ############################################################################ ### Recommended methods ############################################################################ @@ -203,6 +247,9 @@ M_indices(imply::RAM) = imply.M_indices F⨉I_A⁻¹(imply::RAM) = imply.F⨉I_A⁻¹ F⨉I_A⁻¹S(imply::RAM) = imply.F⨉I_A⁻¹S I_A(imply::RAM) = imply.I_A +I_A⁻¹(imply::RAM) = imply.I_A⁻¹ # only for gradient available! + +has_meanstructure(imply::RAM) = imply.has_meanstructure ############################################################################ ### additional functions diff --git a/src/imply/RAM/symbolic.jl b/src/imply/RAM/symbolic.jl index fc18dab51..fd37d005c 100644 --- a/src/imply/RAM/symbolic.jl +++ b/src/imply/RAM/symbolic.jl @@ -2,7 +2,7 @@ ### Types ############################################################################ -struct RAMSymbolic{F1, F2, F3, A1, A2, A3, S1, S2, S3, V, V2, F4, A4, F5, A5, D1} <: SemImplySymbolic +struct RAMSymbolic{F1, F2, F3, A1, A2, A3, S1, S2, S3, V, V2, F4, A4, F5, A5, D1, B} <: SemImplySymbolic Σ_function::F1 ∇Σ_function::F2 ∇²Σ_function::F3 @@ -19,6 +19,7 @@ struct RAMSymbolic{F1, F2, F3, A1, A2, A3, S1, S2, S3, V, V2, F4, A4, F5, A5, D1 ∇μ_function::F5 ∇μ::A5 identifier::D1 + has_meanstructure::B end ############################################################################ @@ -31,6 +32,7 @@ function RAMSymbolic(; vech = false, gradient = true, hessian = false, + meanstructure = false, kwargs...) if specification isa RAMMatrices @@ -103,7 +105,8 @@ function RAMSymbolic(; end # μ - if !isnothing(M) + if meanstructure + has_meanstructure = Val(true) μ_symbolic = get_μ_symbolic_RAM(M, A, F) μ_function = Symbolics.build_function(μ_symbolic, par, expression=Val{false})[2] μ = zeros(size(μ_symbolic)) @@ -116,6 +119,7 @@ function RAMSymbolic(; ∇μ = nothing end else + has_meanstructure = Val(false) μ_function = nothing μ = nothing ∇μ_function = nothing @@ -138,27 +142,41 @@ function RAMSymbolic(; μ, ∇μ_function, ∇μ, - identifier + identifier, + has_meanstructure ) end ############################################################################ -### functors +### objective, gradient, hessian ############################################################################ -function (imply::RAMSymbolic)(par, F, G, H, model) +# dispatch on meanstructure +objective!(imply::RAMSymbolic, par, model) = + objective!(imply, par, model, imply.has_meanstructure) +gradient!(imply::RAMSymbolic, par, model) = + gradient!(imply, par, model, imply.has_meanstructure) + +# objective +function objective!(imply::RAMSymbolic, par, model, has_meanstructure::Val{T}) where T imply.Σ_function(imply.Σ, par) - if G || H - imply.∇Σ_function(imply.∇Σ, par) - end - if !isnothing(imply.μ) - imply.μ_function(imply.μ, par) - if G || H - imply.∇μ_function(imply.∇μ, par) - end - end + T && imply.μ_function(imply.μ, par) end +# gradient +function gradient!(imply::RAMSymbolic, par, model, has_meanstructure::Val{T}) where T + objective!(imply, par, model, imply.has_meanstructure) + imply.∇Σ_function(imply.∇Σ, par) + T && imply.∇μ_function(imply.∇μ, par) +end + +# other methods +hessian!(imply::RAMSymbolic, par, model) = gradient!(imply, par, model) +objective_gradient!(imply::RAMSymbolic, par, model) = gradient!(imply, par, model) +objective_hessian!(imply::RAMSymbolic, par, model) = gradient!(imply, par, model) +gradient_hessian!(imply::RAMSymbolic, par, model) = gradient!(imply, par, model) +objective_gradient_hessian!(imply::RAMSymbolic, par, model) = gradient!(imply, par, model) + ############################################################################ ### Recommended methods ############################################################################ @@ -189,6 +207,8 @@ end ∇Σ_function(imply::RAMSymbolic) = imply.∇Σ_function ∇²Σ_function(imply::RAMSymbolic) = imply.∇²Σ_function +has_meanstructure(imply::RAMSymbolic) = imply.has_meanstructure + ############################################################################ ### additional functions ############################################################################ diff --git a/src/imply/empty.jl b/src/imply/empty.jl index 598a21d3f..ebedbd513 100644 --- a/src/imply/empty.jl +++ b/src/imply/empty.jl @@ -34,10 +34,12 @@ function ImplyEmpty(; end ############################################################################ -### functors +### methods ############################################################################ -function (imply::ImplyEmpty)(par, F, G, H, model) end +objective!(imply::ImplyEmpty, par, model) = nothing +gradient!(imply::ImplyEmpty, par, model) = nothing +hessian!(imply::ImplyEmpty, par, model) = nothing ############################################################################ ### Recommended methods diff --git a/src/loss/ML/FIML.jl b/src/loss/ML/FIML.jl index 6a697c8a9..26a221d42 100644 --- a/src/loss/ML/FIML.jl +++ b/src/loss/ML/FIML.jl @@ -2,7 +2,7 @@ ### Types ############################################################################ -mutable struct SemFIML{INV, C, L, O, M, IM, I, T, U, W, FT, GT, HT} <: SemLossFunction +mutable struct SemFIML{INV, C, L, O, M, IM, I, T, U, W} <: SemLossFunction inverses::INV #preallocated inverses of imp_cov choleskys::C #preallocated choleskys logdets::L #logdets of implied covmats @@ -18,17 +18,13 @@ mutable struct SemFIML{INV, C, L, O, M, IM, I, T, U, W, FT, GT, HT} <: SemLossFu commutation_indices::U interaction::W - - objective::FT - gradient::GT - hessian::HT end ############################################################################ ### Constructors ############################################################################ -function SemFIML(;observed, specification, n_par, parameter_type = Float64, kwargs...) +function SemFIML(;observed, specification, kwargs...) inverses = broadcast(x -> zeros(x, x), Int64.(pattern_nvar_obs(observed))) choleskys = Array{Cholesky{Float64,Array{Float64,2}},1}(undef, length(inverses)) @@ -58,88 +54,50 @@ function SemFIML(;observed, specification, n_par, parameter_type = Float64, kwar imp_inv, mult, commutation_indices, - nothing, - - zeros(parameter_type, 1), - zeros(parameter_type, n_par), - zeros(parameter_type, n_par, n_par) + nothing ) end ############################################################################ -### functors +### methods ############################################################################ -function (semfiml::SemFIML)(par, F, G, H, model::Sem{O, I, L, D}) where {O, I <: SemImplySymbolic, L, D} +function objective!(semfiml::SemFIML, parameters, model) - if H throw(DomainError(H, "hessian for FIML is not implemented (yet)")) end + if !check_fiml(semfiml, model) return non_posdef_return(parameters) end - if !check_fiml(semfiml, model) - if G semfiml.gradient .+= 1.0 end - if F semfiml.objective[1] = Inf end - else - copy_per_pattern!(semfiml, model) - batch_cholesky!(semfiml, model) - #batch_sym_inv_update!(semfiml, model) - batch_inv!(semfiml, model) - for i in 1:size(pattern_n_obs(observed(model)), 1) - semfiml.meandiff[i] .= obs_mean(observed(model))[i] - semfiml.imp_mean[i] - end - #semfiml.logdets .= -logdet.(semfiml.inverses) - - if G - ∇F_FIML(semfiml.gradient, rows(observed(model)), semfiml, model) - semfiml.gradient .= semfiml.gradient/n_obs(observed(model)) - end - - if F - F_FIML(semfiml.objective, rows(observed(model)), semfiml, model) - semfiml.objective[1] = semfiml.objective[1]/n_obs(observed(model)) - end + prepare_SemFIML!(semfiml, model) - end - + objective = F_FIML(rows(observed(model)), semfiml, model, parameters) + return objective/n_obs(observed(model)) end -function (semfiml::SemFIML)(par, F, G, H, model::Sem{O, I, L, D}) where {O, I <: RAM, L, D} +function gradient!(semfiml::SemFIML, parameters, model) - if H throw(DomainError(H, "hessian for FIML is not implemented (yet)")) end + if !check_fiml(semfiml, model) return ones(eltype(parameters), size(parameters)) end - if !check_fiml(semfiml, model) - if G semfiml.gradient .+= 1.0 end - if F semfiml.objective[1] = Inf end - else - copy_per_pattern!(semfiml, model) - batch_cholesky!(semfiml, model) - #batch_sym_inv_update!(semfiml, model) - batch_inv!(semfiml, model) - for i in 1:size(pattern_n_obs(observed(model)), 1) - semfiml.meandiff[i] .= obs_mean(observed(model))[i] - semfiml.imp_mean[i] - end - #semfiml.logdets .= -logdet.(semfiml.inverses) - - if G - ∇F_FIML(semfiml.gradient, rows(observed(model)), semfiml, model) - semfiml.gradient .= semfiml.gradient/n_obs(observed(model)) - end - - if F - F_FIML(semfiml.objective, rows(observed(model)), semfiml, model) - semfiml.objective[1] = semfiml.objective[1]/n_obs(observed(model)) - end + prepare_SemFIML!(semfiml, model) - end - + gradient = ∇F_FIML(rows(observed(model)), semfiml, model)/n_obs(observed(model)) + return gradient +end + +function objective_gradient!(semfiml::SemFIML, parameters, model) + + if !check_fiml(semfiml, model) return non_posdef_return(parameters), ones(eltype(parameters), size(parameters)) end + + prepare_SemFIML!(semfiml, model) + + objective = F_FIML(rows(observed(model)), semfiml, model, parameters)/n_obs(observed(model)) + gradient = ∇F_FIML(rows(observed(model)), semfiml, model)/n_obs(observed(model)) + + return objective, gradient end ############################################################################ ### Recommended methods ############################################################################ -objective(lossfun::SemFIML) = lossfun.objective -gradient(lossfun::SemFIML) = lossfun.gradient -hessian(lossfun::SemFIML) = lossfun.hessian - update_observed(lossfun::SemFIML, observed::SemObs; kwargs...) = SemFIML(;observed = observed, kwargs...) ############################################################################ @@ -179,32 +137,33 @@ function ∇F_fiml_outer(JΣ, Jμ, imply, model, semfiml) Iₙ = sparse(1.0I, size(A(imply))...) P = kron(F⨉I_A⁻¹(imply), F⨉I_A⁻¹(imply)) - Q = kron(S(imply)*I_A(imply)', Iₙ) + Q = kron(S(imply)*I_A⁻¹(imply)', Iₙ) #commutation_matrix_pre_square_add!(Q, Q) Q2 = commutation_matrix_pre_square(Q, semfiml.commutation_indices) ∇Σ = P*(∇S(imply) + (Q+Q2)*∇A(imply)) - ∇μ = F⨉I_A⁻¹(imply)*∇M(imply) + kron((I_A(imply)*M(imply))', F⨉I_A⁻¹(imply))*∇A(imply) + ∇μ = F⨉I_A⁻¹(imply)*∇M(imply) + kron((I_A⁻¹(imply)*M(imply))', F⨉I_A⁻¹(imply))*∇A(imply) G = transpose(JΣ'*∇Σ-Jμ'*∇μ) return G end -function F_FIML(F, rows, semfiml, model) - F[1] = zero(eltype(F)) +function F_FIML(rows, semfiml, model, parameters) + F = zero(eltype(parameters)) for i = 1:size(rows, 1) - F[1] += F_one_pattern( + F += F_one_pattern( semfiml.meandiff[i], - semfiml.inverses[i], + semfiml.inverses[i], obs_cov(observed(model))[i], semfiml.logdets[i], pattern_n_obs(observed(model))[i]) end + return F end -function ∇F_FIML(G, rows, semfiml, model) +function ∇F_FIML(rows, semfiml, model) Jμ = zeros(Int64(n_man(model))) JΣ = zeros(Int64(n_man(model)^2)) @@ -220,7 +179,17 @@ function ∇F_FIML(G, rows, semfiml, model) JΣ, model) end - G .= ∇F_fiml_outer(JΣ, Jμ, imply(model), model, semfiml) + return ∇F_fiml_outer(JΣ, Jμ, imply(model), model, semfiml) +end + +function prepare_SemFIML!(semfiml, model) + copy_per_pattern!(semfiml, model) + batch_cholesky!(semfiml, model) + #batch_sym_inv_update!(semfiml, model) + batch_inv!(semfiml, model) + for i in 1:size(pattern_n_obs(observed(model)), 1) + semfiml.meandiff[i] .= obs_mean(observed(model))[i] - semfiml.imp_mean[i] + end end function copy_per_pattern!(inverses, source_inverses, means, source_means, patterns) diff --git a/src/loss/ML/ML.jl b/src/loss/ML/ML.jl index 6cfe41a3d..0ed908d74 100644 --- a/src/loss/ML/ML.jl +++ b/src/loss/ML/ML.jl @@ -4,22 +4,19 @@ ### Types ############################################################################ -struct SemML{INV,M,M2,B,FT,GT,HT} <: SemLossFunction - inverses::INV #preallocated inverses of imp_cov - mult::M +struct SemML{INV,M,M2,B, V} <: SemLossFunction + Σ⁻¹::INV + Σ⁻¹Σₒ::M meandiff::M2 approx_H::B - - objective::FT - gradient::GT - hessian::HT + has_meanstructure::V end ############################################################################ ### Constructors ############################################################################ -function SemML(;observed, n_par, approx_H = false, parameter_type = Float64, kwargs...) +function SemML(;observed, meanstructure = false, approx_H = false, kwargs...) isnothing(obs_mean(observed)) ? meandiff = nothing : meandiff = copy(obs_mean(observed)) @@ -28,192 +25,401 @@ function SemML(;observed, n_par, approx_H = false, parameter_type = Float64, kwa copy(obs_cov(observed)), meandiff, approx_H, - - zeros(parameter_type, 1), - zeros(parameter_type, n_par), - zeros(parameter_type, n_par, n_par) + Val(meanstructure) ) end ############################################################################ -### functors +### objective, gradient, hessian methods ############################################################################ -# for symbolic imply type -function (semml::SemML)( - par, - F, - G, - H, - model::Sem{O, I, L, D}) where {O, I <: SemImplySymbolic, L, D} - semml.inverses .= Σ(imply(model)) - a = cholesky!(Symmetric(semml.inverses); check = false) +# first, dispatch for meanstructure +objective!(semml::SemML, par, model::AbstractSemSingle) = objective!(semml::SemML, par, model, semml.has_meanstructure, imply(model)) +gradient!(semml::SemML, par, model::AbstractSemSingle) = gradient!(semml::SemML, par, model, semml.has_meanstructure, imply(model)) +hessian!(semml::SemML, par, model::AbstractSemSingle) = hessian!(semml::SemML, par, model, semml.has_meanstructure, imply(model)) +objective_gradient!(semml::SemML, par, model::AbstractSemSingle) = objective_gradient!(semml::SemML, par, model, semml.has_meanstructure, imply(model)) +objective_hessian!(semml::SemML, par, model::AbstractSemSingle) = objective_hessian!(semml::SemML, par, model, semml.has_meanstructure, imply(model)) +gradient_hessian!(semml::SemML, par, model::AbstractSemSingle) = gradient_hessian!(semml::SemML, par, model, semml.has_meanstructure, imply(model)) +objective_gradient_hessian!(semml::SemML, par, model::AbstractSemSingle) = objective_gradient_hessian!(semml::SemML, par, model, semml.has_meanstructure, imply(model)) - if !isposdef(a) - if G semml.gradient .= 1.0 end - if H semml.hessian .= 1.0 end - if F semml.objective[1] = Inf end - else - ld = logdet(a) - semml.inverses .= LinearAlgebra.inv!(a) - - # without means - if isnothing(μ(imply(model))) - - if G && H - J = (vec(semml.inverses)-vec(semml.inverses*obs_cov(observed(model))*semml.inverses))' - gradient = J*∇Σ(imply(model)) - semml.gradient .= gradient' - if semml.approx_H - hessian = 2*∇Σ(imply(model))'*kron(semml.inverses, semml.inverses)*∇Σ(imply(model)) - end - if !semml.approx_H - M = semml.inverses*obs_cov(observed(model))*semml.inverses - H_outer = - 2*kron(M, semml.inverses) - - kron(semml.inverses, semml.inverses) - hessian = ∇Σ(imply(model))'*H_outer*∇Σ(imply(model)) - ∇²Σ_function(imply(model))(∇²Σ(imply(model)), J, par) - hessian += ∇²Σ(imply(model)) - end - semml.hessian .= hessian - end +############################################################################ +### Symbolic Imply Types - if G && !H - gradient = (vec(semml.inverses)-vec(semml.inverses*obs_cov(observed(model))*semml.inverses))'*∇Σ(imply(model)) - semml.gradient .= gradient' - end +function objective!(semml::SemML, par, model::AbstractSemSingle, has_meanstructure::Val{T}, imp::SemImplySymbolic) where T + + let Σ = Σ(imply(model)), Σₒ = obs_cov(observed(model)), Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), Σ⁻¹ = Σ⁻¹(semml), + μ = μ(imply(model)), μₒ = obs_mean(observed(model)) - if !G && H - J = (vec(semml.inverses)-vec(semml.inverses*obs_cov(observed(model))*semml.inverses))' - if semml.approx_H - hessian = 2*∇Σ(imply(model))'*kron(semml.inverses, semml.inverses)*∇Σ(imply(model)) - end - if !semml.approx_H - M = semml.inverses*obs_cov(observed(model))*semml.inverses - H_outer = - 2*kron(M, semml.inverses) - - kron(semml.inverses, semml.inverses) - hessian = ∇Σ(imply(model))'*H_outer*∇Σ(imply(model)) - ∇²Σ_function(imply(model))(∇²Σ(imply(model)), J, par) - hessian += ∇²Σ(imply(model)) - end - semml.hessian .= hessian - end + copyto!(Σ⁻¹, Σ) + Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - if F - mul!(semml.mult, semml.inverses, obs_cov(observed(model))) - semml.objective[1] = ld + tr(semml.mult) - end + if !isposdef(Σ_chol) return non_posdef_return(par) end + + ld = logdet(Σ_chol) + Σ⁻¹ .= LinearAlgebra.inv!(Σ_chol) + mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) + + if T + μ₋ = μₒ - μ + return ld + tr(Σ⁻¹Σₒ) + dot(μ₋, Σ⁻¹, μ₋) else - # with means - μ_diff = obs_mean(observed(model)) - μ(imply(model)) - diff⨉inv = μ_diff'*semml.inverses - if H throw(DomainError(H, "hessian of ML + meanstructure is not implemented yet")) end - if G - gradient = - vec( - semml.inverses*( - LinearAlgebra.I - - obs_cov(observed(model))*semml.inverses - - μ_diff*diff⨉inv))'*∇Σ(imply(model)) - - 2*diff⨉inv*∇μ(imply(model)) - semml.gradient .= gradient' - end - if F - mul!(semml.mult, semml.inverses, obs_cov(observed(model))) - semml.objective[1] = ld + tr(semml.mult) + diff⨉inv*μ_diff - end + return ld + tr(Σ⁻¹Σₒ) + end + end +end + +function gradient!(semml::SemML, par, model::AbstractSemSingle, has_meanstructure::Val{T}, imp::SemImplySymbolic) where T + + let Σ = Σ(imply(model)), Σₒ = obs_cov(observed(model)), Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), Σ⁻¹ = Σ⁻¹(semml), ∇Σ = ∇Σ(imply(model)), + μ = μ(imply(model)), ∇μ = ∇μ(imply(model)), μₒ = obs_mean(observed(model)) + + copyto!(Σ⁻¹, Σ) + Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) + + if !isposdef(Σ_chol) + return ones(eltype(par), size(par)) + end + + Σ⁻¹ .= LinearAlgebra.inv!(Σ_chol) + mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) + + if T + μ₋ = μₒ - μ + μ₋ᵀΣ⁻¹ = μ₋'*Σ⁻¹ + gradient = vec(Σ⁻¹*(I - Σₒ*Σ⁻¹ - μ₋*μ₋ᵀΣ⁻¹))'*∇Σ - 2*μ₋ᵀΣ⁻¹*∇μ + return gradient' + else + gradient = (vec(Σ⁻¹)-vec(Σ⁻¹Σₒ*Σ⁻¹))'*∇Σ + return gradient' end end end -# for non-symbolic imply type -function (semml::SemML)(par, F, G, H, model::Sem{O, I, L, D}) where {O, I <: RAM, L, D} +function hessian!(semml::SemML, par, model::AbstractSemSingle, has_meanstructure::Val{false}, imp::SemImplySymbolic) + + let Σ = Σ(imply(model)), ∇Σ = ∇Σ(imply(model)), Σₒ = obs_cov(observed(model)), Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), Σ⁻¹ = Σ⁻¹(semml), + ∇²Σ_function! = ∇²Σ_function(imply(model)), ∇²Σ = ∇²Σ(imply(model)) + + copyto!(Σ⁻¹, Σ) + Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - if H - throw(DomainError(H, "hessian for ML estimation with non-symbolic imply type is not implemented")) + if !isposdef(Σ_chol) + return diagm(fill(one(eltype(par)), length(par))) end - semml.inverses .= Σ(imply(model)) - a = cholesky!(Symmetric(semml.inverses); check = false) + Σ⁻¹ .= LinearAlgebra.inv!(Σ_chol) - if !isposdef(a) - if G semml.gradient .= 1.0 end - if H semml.hessian .= 1.0 end - if F semml.objective[1] = Inf end + if semml.approx_H + hessian = 2*∇Σ'*kron(Σ⁻¹, Σ⁻¹)*∇Σ else - ld = logdet(a) - semml.inverses .= LinearAlgebra.inv!(a) - - # without means - if isnothing(μ(imply(model))) - - if G - gradient = SemML_gradient( - S(imply(model)), - F⨉I_A⁻¹(imply(model)), - semml.inverses, - I_A(imply(model)), - ∇A(imply(model)), - ∇S(imply(model)), - obs_cov(observed(model))) - semml.gradient .= gradient' - end + mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) + Σ⁻¹ΣₒΣ⁻¹ = Σ⁻¹Σₒ*Σ⁻¹ + # inner + J = vec(Σ⁻¹ - Σ⁻¹ΣₒΣ⁻¹)' + ∇²Σ_function!(∇²Σ, J, par) + # outer + H_outer = 2*kron(Σ⁻¹ΣₒΣ⁻¹, Σ⁻¹) - kron(Σ⁻¹, Σ⁻¹) + hessian = ∇Σ'*H_outer*∇Σ + hessian += ∇²Σ + end + + return hessian + end +end + +function hessian!(semml::SemML, par, model::AbstractSemSingle, has_meanstructure::Val{true}, imp::SemImplySymbolic) + throw(DomainError(H, "hessian of ML + meanstructure is not available")) +end + +function objective_gradient!(semml::SemML, par, model::AbstractSemSingle, has_meanstructure::Val{T}, imp::SemImplySymbolic) where T + + let Σ = Σ(imply(model)), Σₒ = obs_cov(observed(model)), Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), Σ⁻¹ = Σ⁻¹(semml), + μ = μ(imply(model)), μₒ = obs_mean(observed(model)), + ∇Σ = ∇Σ(imply(model)), ∇μ = ∇μ(imply(model)) + + copyto!(Σ⁻¹, Σ) + Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - if F - mul!(semml.mult, semml.inverses, obs_cov(observed(model))) - semml.objective[1] = ld + tr(semml.mult) + if !isposdef(Σ_chol) + return non_posdef_return(par), ones(eltype(par), size(par)) + else + ld = logdet(Σ_chol) + Σ⁻¹ .= LinearAlgebra.inv!(Σ_chol) + mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) + + if T + μ₋ = μₒ - μ + μ₋ᵀΣ⁻¹ = μ₋'*Σ⁻¹ + + objective = ld + tr(Σ⁻¹Σₒ) + dot(μ₋, Σ⁻¹, μ₋) + gradient = vec(Σ⁻¹*(I - Σₒ*Σ⁻¹ - μ₋*μ₋ᵀΣ⁻¹))'*∇Σ - 2*μ₋ᵀΣ⁻¹*∇μ + return objective, gradient' + else + objective = ld + tr(Σ⁻¹Σₒ) + gradient = (vec(Σ⁻¹)-vec(Σ⁻¹Σₒ*Σ⁻¹))'*∇Σ + return objective, gradient' end - - else # with means - - μ_diff = obs_mean(observed(model)) - μ(imply(model)) - diff⨉inv = μ_diff'*semml.inverses - - if G - gradient = SemML_gradient( - S(imply(model)), - F⨉I_A⁻¹(imply(model)), - semml.inverses, - I_A(imply(model)), - ∇A(imply(model)), - ∇S(imply(model)), - obs_cov(observed(model))) + - SemML_gradient_meanstructure( - diff⨉inv, - F⨉I_A⁻¹(imply(model)), - I_A(imply(model)), - S(imply(model)), - M(imply(model)), - ∇M(imply(model)), - ∇A(imply(model)), - ∇S(imply(model))) - semml.gradient .= gradient' + end + end +end + +function objective_hessian!(semml::SemML, par, model::AbstractSemSingle, has_meanstructure::Val{T}, imp::SemImplySymbolic) where T + + let Σ = Σ(imply(model)), Σₒ = obs_cov(observed(model)), Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), Σ⁻¹ = Σ⁻¹(semml), + ∇Σ = ∇Σ(imply(model)), ∇μ = ∇μ(imply(model)), + ∇²Σ_function! = ∇²Σ_function(imply(model)), ∇²Σ = ∇²Σ(imply(model)) + + copyto!(Σ⁻¹, Σ) + Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) + + if !isposdef(Σ_chol) + return non_posdef_return(par), diagm(fill(one(eltype(par)), length(par))) + else + ld = logdet(Σ_chol) + Σ⁻¹ .= LinearAlgebra.inv!(Σ_chol) + mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) + objective = ld + tr(Σ⁻¹Σₒ) + + if semml.approx_H + hessian = 2*∇Σ'*kron(Σ⁻¹, Σ⁻¹)*∇Σ + else + Σ⁻¹ΣₒΣ⁻¹ = Σ⁻¹Σₒ*Σ⁻¹ + # inner + J = vec(Σ⁻¹ - Σ⁻¹ΣₒΣ⁻¹)' + ∇²Σ_function!(∇²Σ, J, par) + # outer + H_outer = 2*kron(Σ⁻¹ΣₒΣ⁻¹, Σ⁻¹) - kron(Σ⁻¹, Σ⁻¹) + hessian = ∇Σ'*H_outer*∇Σ + hessian += ∇²Σ end - if F - mul!(semml.mult, semml.inverses, obs_cov(observed(model))) - semml.objective[1] = ld + tr(semml.mult) + diff⨉inv*μ_diff + return objective, hessian + end + end +end + +function objective_hessian!(semml::SemML, par, model::AbstractSemSingle, has_meanstructure::Val{true}, imp::SemImplySymbolic) + throw(DomainError(H, "hessian of ML + meanstructure is not available")) +end + +function gradient_hessian!(semml::SemML, par, model::AbstractSemSingle, has_meanstructure::Val{false}, imp::SemImplySymbolic) + + let Σ = Σ(imply(model)), Σₒ = obs_cov(observed(model)), Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), Σ⁻¹ = Σ⁻¹(semml), + ∇Σ = ∇Σ(imply(model)), ∇μ = ∇μ(imply(model)), + ∇²Σ_function! = ∇²Σ_function(imply(model)), ∇²Σ = ∇²Σ(imply(model)) + + copyto!(Σ⁻¹, Σ) + Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) + + if !isposdef(Σ_chol) + return ones(eltype(par), size(par)), diagm(fill(one(eltype(par)), length(par))) + end + + Σ⁻¹ .= LinearAlgebra.inv!(Σ_chol) + mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) + + Σ⁻¹ΣₒΣ⁻¹ = Σ⁻¹Σₒ*Σ⁻¹ + + J = vec(Σ⁻¹ - Σ⁻¹ΣₒΣ⁻¹)' + gradient = J*∇Σ + + if semml.approx_H + hessian = 2*∇Σ'*kron(Σ⁻¹, Σ⁻¹)*∇Σ + else + # inner + ∇²Σ_function!(∇²Σ, J, par) + # outer + H_outer = 2*kron(Σ⁻¹ΣₒΣ⁻¹, Σ⁻¹) - kron(Σ⁻¹, Σ⁻¹) + hessian = ∇Σ'*H_outer*∇Σ + hessian += ∇²Σ + end + + return gradient', hessian + end +end + +function gradient_hessian!(semml::SemML, par, model::AbstractSemSingle, has_meanstructure::Val{true}, imp::SemImplySymbolic) + throw(DomainError(H, "hessian of ML + meanstructure is not available")) +end + +function objective_gradient_hessian!(semml::SemML, par, model::AbstractSemSingle, has_meanstructure::Val{false}, imp::SemImplySymbolic) + + let Σ = Σ(imply(model)), Σₒ = obs_cov(observed(model)), Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), Σ⁻¹ = Σ⁻¹(semml), ∇Σ = ∇Σ(imply(model)), + ∇²Σ_function! = ∇²Σ_function(imply(model)), ∇²Σ = ∇²Σ(imply(model)) + + copyto!(Σ⁻¹, Σ) + Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) + + if !isposdef(Σ_chol) + objective = non_posdef_return(par) + gradient = ones(eltype(par), size(par)) + hessian = diagm(fill(one(eltype(par)), length(par))) + return objective, gradient, hessian + else + ld = logdet(Σ_chol) + Σ⁻¹ .= LinearAlgebra.inv!(Σ_chol) + mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) + objective = ld + tr(Σ⁻¹Σₒ) + end + + Σ⁻¹ΣₒΣ⁻¹ = Σ⁻¹Σₒ*Σ⁻¹ + + J = vec(Σ⁻¹ - Σ⁻¹ΣₒΣ⁻¹)' + gradient = J*∇Σ + + if semml.approx_H + hessian = 2*∇Σ'*kron(Σ⁻¹, Σ⁻¹)*∇Σ + else + Σ⁻¹ΣₒΣ⁻¹ = Σ⁻¹Σₒ*Σ⁻¹ + # inner + ∇²Σ_function!(∇²Σ, J, par) + # outer + H_outer = 2*kron(Σ⁻¹ΣₒΣ⁻¹, Σ⁻¹) - kron(Σ⁻¹, Σ⁻¹) + hessian = ∇Σ'*H_outer*∇Σ + hessian += ∇²Σ + end + + return objective, gradient', hessian + end +end + +function objective_gradient_hessian!(semml::SemML, par, model::AbstractSemSingle, has_meanstructure::Val{true}, imp::SemImplySymbolic) + throw(DomainError(H, "hessian of ML + meanstructure is not available")) +end + +############################################################################ +### Non-Symbolic Imply Types + +# no hessians --------------------------------------------------------------------------------------------------------------------- + +function hessian!(semml::SemML, par, model::AbstractSemSingle, has_meanstructure, imp::RAM) + throw(DomainError(H, "hessian of ML + non-symbolic imply type is not available")) +end + +function objective_hessian!(semml::SemML, par, model::AbstractSemSingle, has_meanstructure, imp::RAM) + throw(DomainError(H, "hessian of ML + non-symbolic imply type is not available")) +end + +function gradient_hessian!(semml::SemML, par, model::AbstractSemSingle, has_meanstructure, imp::RAM) + throw(DomainError(H, "hessian of ML + non-symbolic imply type is not available")) +end + +function objective_gradient_hessian!(semml::SemML, par, model::AbstractSemSingle, has_meanstructure, imp::RAM) + throw(DomainError(H, "hessian of ML + non-symbolic imply type is not available")) +end + +# objective, gradient ---------------------------------------------------------------------------------------------------------------------- + +function objective!(semml::SemML, par, model::AbstractSemSingle, has_meanstructure::Val{T}, imp::RAM) where T + let Σ = Σ(imply(model)), Σₒ = obs_cov(observed(model)), Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), Σ⁻¹ = Σ⁻¹(semml), + μ = μ(imply(model)), μₒ = obs_mean(observed(model)) + + copyto!(Σ⁻¹, Σ) + Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) + + if !isposdef(Σ_chol) return non_posdef_return(par) end + + ld = logdet(Σ_chol) + Σ⁻¹ .= LinearAlgebra.inv!(Σ_chol) + mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) + + if T + μ₋ = μₒ - μ + return ld + tr(Σ⁻¹Σₒ) + dot(μ₋, Σ⁻¹, μ₋) + else + return ld + tr(Σ⁻¹Σₒ) + end + end +end + +function gradient!(semml::SemML, par, model::AbstractSemSingle, has_meanstructure::Val{T}, imp::RAM) where T + + let Σ = Σ(imply(model)), Σₒ = obs_cov(observed(model)), Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), Σ⁻¹ = Σ⁻¹(semml), + S = S(imply(model)), M = M(imply(model)), F⨉I_A⁻¹ = F⨉I_A⁻¹(imply(model)), I_A⁻¹ = I_A⁻¹(imply(model)), + ∇A = ∇A(imply(model)), ∇S = ∇S(imply(model)), ∇M = ∇M(imply(model)), + μ = μ(imply(model)), μₒ = obs_mean(observed(model)) + + copyto!(Σ⁻¹, Σ) + Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) + + if !isposdef(Σ_chol) return ones(eltype(par), size(par)) end + + Σ⁻¹ .= LinearAlgebra.inv!(Σ_chol) + #mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) + + C = F⨉I_A⁻¹'*(I-Σₒ*Σ⁻¹)'*Σ⁻¹*F⨉I_A⁻¹ + gradient = 2vec(C*S*I_A⁻¹')'∇A + vec(C)'∇S + + if T + μ₋ = μₒ - μ + μ₋ᵀΣ⁻¹ = μ₋'*Σ⁻¹ + k = μ₋ᵀΣ⁻¹*F⨉I_A⁻¹ + + gradient += -2k*∇M - 2vec(k'M'I_A⁻¹')'∇A - 2vec(k'k*S*I_A⁻¹')'∇A - vec(k'k)'∇S + end + + return gradient' + end +end + +function objective_gradient!(semml::SemML, par, model::AbstractSemSingle, has_meanstructure::Val{T}, imp::RAM) where T + + let Σ = Σ(imply(model)), Σₒ = obs_cov(observed(model)), Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), Σ⁻¹ = Σ⁻¹(semml), + S = S(imply(model)), M = M(imply(model)), F⨉I_A⁻¹ = F⨉I_A⁻¹(imply(model)), I_A⁻¹ = I_A⁻¹(imply(model)), + ∇A = ∇A(imply(model)), ∇S = ∇S(imply(model)), ∇M = ∇M(imply(model)), + μ = μ(imply(model)), μₒ = obs_mean(observed(model)) + + copyto!(Σ⁻¹, Σ) + Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) + + if !isposdef(Σ_chol) + objective = non_posdef_return(par) + gradient = ones(eltype(par), size(par)) + return objective, gradient + else + ld = logdet(Σ_chol) + Σ⁻¹ .= LinearAlgebra.inv!(Σ_chol) + mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) + objective = ld + tr(Σ⁻¹Σₒ) + + C = F⨉I_A⁻¹'*(I-Σₒ*Σ⁻¹)'*Σ⁻¹*F⨉I_A⁻¹ + gradient = 2vec(C*S*I_A⁻¹')'∇A + vec(C)'∇S + + if T + μ₋ = μₒ - μ + objective += dot(μ₋, Σ⁻¹, μ₋) + + μ₋ᵀΣ⁻¹ = μ₋'*Σ⁻¹ + k = μ₋ᵀΣ⁻¹*F⨉I_A⁻¹ + gradient += -2k*∇M - 2vec(k'M'I_A⁻¹')'∇A - 2vec(k'k*S*I_A⁻¹')'∇A - vec(k'k)'∇S end + return objective, gradient' end end end ############################################################################ -### recommended methods +### additional functions ############################################################################ -objective(lossfun::SemML) = lossfun.objective -gradient(lossfun::SemML) = lossfun.gradient -hessian(lossfun::SemML) = lossfun.hessian +function non_posdef_return(par) + if eltype(par) <: AbstractFloat + return floatmax(eltype(par)) + else + return typemax(eltype(par)) + end +end + +############################################################################ +### recommended methods +############################################################################ update_observed(lossfun::SemML, observed::SemObsMissing; kwargs...) = throw(ArgumentError("ML estimation does not work with missing data - use FIML instead")) function update_observed(lossfun::SemML, observed::SemObs; kwargs...) - if (size(lossfun.inverses) == size(obs_cov(observed))) & (isnothing(lossfun.meandiff) == isnothing(obs_mean(observed))) + if size(lossfun.Σ⁻¹) == size(obs_cov(observed)) return lossfun else return SemML(;observed = observed, kwargs...) @@ -221,25 +427,11 @@ function update_observed(lossfun::SemML, observed::SemObs; kwargs...) end ############################################################################ -### additional functions +### additional methods ############################################################################ -function SemML_gradient_common(F⨉I_A⁻¹, obs_cov, Σ⁻¹) - M = transpose(F⨉I_A⁻¹)*transpose(LinearAlgebra.I-obs_cov*Σ⁻¹)*Σ⁻¹*F⨉I_A⁻¹ - return M -end - -function SemML_gradient(S, F⨉I_A⁻¹, Σ⁻¹, I_A⁻¹, ∇A, ∇S, obs_cov) - M = SemML_gradient_common(F⨉I_A⁻¹, obs_cov, Σ⁻¹) - G = 2vec(M*S*I_A⁻¹')'∇A + vec(M)'∇S - return G -end - -function SemML_gradient_meanstructure(diff⨉inv, F⨉I_A⁻¹, I_A⁻¹, S, M, ∇M, ∇A, ∇S) - k = diff⨉inv*F⨉I_A⁻¹ - G = -2k*∇M - 2vec(k'M'I_A⁻¹')'∇A - 2vec(k'k*S*I_A⁻¹')'∇A - vec(k'k)'∇S - return G -end +Σ⁻¹(semml::SemML) = semml.Σ⁻¹ +Σ⁻¹Σₒ(semml::SemML) = semml.Σ⁻¹Σₒ ############################################################################ ### Pretty Printing diff --git a/src/loss/WLS/WLS.jl b/src/loss/WLS/WLS.jl index 7c93cbd13..b1c425f24 100644 --- a/src/loss/WLS/WLS.jl +++ b/src/loss/WLS/WLS.jl @@ -4,22 +4,19 @@ ### Types ############################################################################ -struct SemWLS{Vt, St, B, C, FT, GT, HT} <: SemLossFunction +struct SemWLS{Vt, St, B, C, B2} <: SemLossFunction V::Vt - s::St + σₒ::St approx_H::B V_μ::C - - objective::FT - gradient::GT - hessian::HT + has_meanstructure::B2 end ############################################################################ ### Constructors ############################################################################ -function SemWLS(;observed, n_par, wls_weight_matrix = nothing, meanstructure = false, V_μ = nothing, approx_H = false, parameter_type = Float64, kwargs...) +function SemWLS(;observed, wls_weight_matrix = nothing, V_μ = nothing, approx_H = false, meanstructure = false, kwargs...) ind = CartesianIndices(obs_cov(observed)) ind = filter(x -> (x[1] >= x[2]), ind) s = obs_cov(observed)[ind] @@ -45,69 +42,173 @@ function SemWLS(;observed, n_par, wls_weight_matrix = nothing, meanstructure = f s, approx_H, V_μ, - - zeros(parameter_type, 1), - zeros(parameter_type, n_par), - zeros(parameter_type, n_par, n_par)) + Val(meanstructure) + ) end ############################################################################ -### functors +### methods ############################################################################ -function (semwls::SemWLS)(par, F, G, H, model) - σ_diff = semwls.s - Σ(imply(model)) - if isnothing(semwls.V_μ) - # without meanstructure - if G && H - J = (-2*(σ_diff)'*semwls.V)' - gradient = ∇Σ(imply(model))'*J - semwls.gradient .= gradient - hessian = 2*∇Σ(imply(model))'*semwls.V*∇Σ(imply(model)) - if !semwls.approx_H - ∇²Σ_function(imply(model))(∇²Σ(imply(model)), J, par) - hessian += ∇²Σ(imply(model)) - end - semwls.hessian .= hessian +objective!(semwls::SemWLS, par, model::AbstractSemSingle) = objective!(semwls::SemWLS, par, model, semwls.has_meanstructure) +gradient!(semwls::SemWLS, par, model::AbstractSemSingle) = gradient!(semwls::SemWLS, par, model, semwls.has_meanstructure) +hessian!(semwls::SemWLS, par, model::AbstractSemSingle) = hessian!(semwls::SemWLS, par, model, semwls.has_meanstructure) + +objective_gradient!(semwls::SemWLS, par, model::AbstractSemSingle) = objective_gradient!(semwls::SemWLS, par, model, semwls.has_meanstructure) +objective_hessian!(semwls::SemWLS, par, model::AbstractSemSingle) = objective_hessian!(semwls::SemWLS, par, model, semwls.has_meanstructure) +gradient_hessian!(semwls::SemWLS, par, model::AbstractSemSingle) = gradient_hessian!(semwls::SemWLS, par, model, semwls.has_meanstructure) + +objective_gradient_hessian!(semwls::SemWLS, par, model::AbstractSemSingle) = objective_gradient_hessian!(semwls::SemWLS, par, model, semwls.has_meanstructure) + + +function objective!(semwls::SemWLS, par, model::AbstractSemSingle, has_meanstructure::Val{T}) where T + + let σ = Σ(imply(model)), μ = μ(imply(model)), σₒ = semwls.σₒ, μₒ = obs_mean(observed(model)), V = semwls.V, V_μ = semwls.V_μ, + + σ₋ = σₒ - σ + + if T + μ₋ = μₒ - μ + return dot(σ₋, V, σ₋) + dot(μ₋, V_μ, μ₋) + else + return dot(σ₋, V, σ₋) end - if !G && H - hessian = 2*∇Σ(imply(model))'*semwls.V*∇Σ(imply(model)) + end +end + +function gradient!(semwls::SemWLS, par, model::AbstractSemSingle, has_meanstructure::Val{T}) where T + + let σ = Σ(imply(model)), μ = μ(imply(model)), σₒ = semwls.σₒ, μₒ = obs_mean(observed(model)), V = semwls.V, V_μ = semwls.V_μ, + ∇σ = ∇Σ(imply(model)), ∇μ = ∇μ(imply(model)) + + σ₋ = σₒ - σ + + if T + μ₋ = μₒ - μ + return -2*(σ₋'*V*∇σ + μ₋'*V_μ*∇μ)' + else + return -2*(σ₋'*V*∇σ)' + end + end +end + +function hessian!(semwls::SemWLS, par, model::AbstractSemSingle, has_meanstructure::Val{T}) where T + + let σ = Σ(imply(model)), σₒ = semwls.σₒ, V = semwls.V, + ∇σ = ∇Σ(imply(model)), + ∇²Σ_function! = ∇²Σ_function(imply(model)), ∇²Σ = ∇²Σ(imply(model)) + + σ₋ = σₒ - σ + + if T + throw(DomainError(H, "hessian of WLS with meanstructure is not available")) + else + hessian = 2*∇σ'*V*∇σ if !semwls.approx_H - J = (-2*(σ_diff)'*semwls.V)' - ∇²Σ_function(imply(model))(∇²Σ(imply(model)), J, par) - hessian += ∇²Σ(imply(model)) + J = -2*(σ₋'*semwls.V)' + ∇²Σ_function!(∇²Σ, J, par) + hessian += ∇²Σ end - semwls.hessian .= hessian + return hessian end - if G && !H - gradient = (-2*(σ_diff)'*semwls.V*∇Σ(imply(model)))' - semwls.gradient .= gradient + end +end + +function objective_gradient!(semwls::SemWLS, par, model::AbstractSemSingle, has_meanstructure::Val{T}) where T + + let σ = Σ(imply(model)), μ = μ(imply(model)), σₒ = semwls.σₒ, μₒ = obs_mean(observed(model)), V = semwls.V, V_μ = semwls.V_μ, + ∇σ = ∇Σ(imply(model)), ∇μ = ∇μ(imply(model)) + + σ₋ = σₒ - σ + + if T + μ₋ = μₒ - μ + objective = dot(σ₋, V, σ₋) + dot(μ₋', V_μ, μ₋) + gradient = -2*(σ₋'*V*∇σ + μ₋'*V_μ*∇μ)' + return objective, gradient + else + objective = dot(σ₋, V, σ₋) + gradient = -2*(σ₋'*V*∇σ)' + return objective, gradient end - if F - semwls.objective[1] = dot(σ_diff, semwls.V, σ_diff) + end +end + +function objective_hessian!(semwls::SemWLS, par, model::AbstractSemSingle, has_meanstructure::Val{T}) where T + + let σ = Σ(imply(model)), σₒ = semwls.σₒ, V = semwls.V, + ∇σ = ∇Σ(imply(model)), + ∇²Σ_function! = ∇²Σ_function(imply(model)), ∇²Σ = ∇²Σ(imply(model)) + + σ₋ = σₒ - σ + + objective = dot(σ₋, V, σ₋) + + hessian = 2*∇σ'*V*∇σ + if !semwls.approx_H + J = -2*(σ₋'*semwls.V)' + ∇²Σ_function!(∇²Σ, J, par) + hessian += ∇²Σ end - else - # with meanstructure - μ_diff = obs_mean(observed(model)) - μ(imply(model)) - if H throw(DomainError(H, "hessian of WLS with meanstructure is not available")) end - if G - gradient = -2*(σ_diff'*semwls.V*∇Σ(imply(model)) + μ_diff'*semwls.V_μ*∇μ(imply(model)))' - semwls.gradient .= gradient + + return objective, hessian + end +end + +objective_hessian!(semwls::SemWLS, par, model::AbstractSemSingle, has_meanstructure::Val{true}) = + throw(DomainError(H, "hessian of WLS with meanstructure is not available")) + +function gradient_hessian!(semwls::SemWLS, par, model::AbstractSemSingle, has_meanstructure::Val{false}) + + let σ = Σ(imply(model)), σₒ = semwls.σₒ, V = semwls.V, + ∇σ = ∇Σ(imply(model)), + ∇²Σ_function! = ∇²Σ_function(imply(model)), ∇²Σ = ∇²Σ(imply(model)) + + σ₋ = σₒ - σ + + gradient = -2*(σ₋'*V*∇σ)' + + hessian = 2*∇σ'*V*∇σ + if !semwls.approx_H + J = -2*(σ₋'*semwls.V)' + ∇²Σ_function!(∇²Σ, J, par) + hessian += ∇²Σ end - if F - semwls.objective[1] = σ_diff'*semwls.V*σ_diff + μ_diff'*semwls.V_μ*μ_diff + + return gradient, hessian + end +end + +gradient_hessian!(semwls::SemWLS, par, model::AbstractSemSingle, has_meanstructure::Val{true}) = + throw(DomainError(H, "hessian of WLS with meanstructure is not available")) + +function objective_gradient_hessian!(semwls::SemWLS, par, model::AbstractSemSingle, has_meanstructure::Val{false}) + + let σ = Σ(imply(model)), σₒ = semwls.σₒ, V = semwls.V, + ∇σ = ∇Σ(imply(model)), + ∇²Σ_function! = ∇²Σ_function(imply(model)), ∇²Σ = ∇²Σ(imply(model)) + + σ₋ = σₒ - σ + + objective = dot(σ₋, V, σ₋) + gradient = -2*(σ₋'*V*∇σ)' + hessian = 2*∇σ'*V*∇σ + if !semwls.approx_H + J = -2*(σ₋'*semwls.V)' + ∇²Σ_function!(∇²Σ, J, par) + hessian += ∇²Σ end + return objective, gradient, hessian end end +objective_gradient_hessian!(semwls::SemWLS, par, model::AbstractSemSingle, has_meanstructure::Val{true}) = + throw(DomainError(H, "hessian of WLS with meanstructure is not available")) + ############################################################################ ### Recommended methods ############################################################################ -objective(lossfun::SemWLS) = lossfun.objective -gradient(lossfun::SemWLS) = lossfun.gradient -hessian(lossfun::SemWLS) = lossfun.hessian - update_observed(lossfun::SemWLS, observed::SemObs; kwargs...) = SemWLS(;observed = observed, kwargs...) ############################################################################ diff --git a/src/loss/constant/constant.jl b/src/loss/constant/constant.jl index bfa062dbd..e40fc676e 100644 --- a/src/loss/constant/constant.jl +++ b/src/loss/constant/constant.jl @@ -4,28 +4,25 @@ ### Types ############################################################################ -struct SemConstant{FT, GT, HT} <: SemLossFunction - objective::FT - gradient::GT - hessian::HT +struct SemConstant{C} <: SemLossFunction + c::C end ############################################################################ ### Constructors ############################################################################ -function SemConstant(;constant_loss, n_par, parameter_type = Float64, kwargs...) - return SemConstant( - [constant_loss], - zeros(parameter_type, n_par), - zeros(parameter_type, n_par, n_par)) +function SemConstant(;constant_loss, kwargs...) + return SemConstant(constant_loss) end ############################################################################ -### functors +### methods ############################################################################ -function (constant::SemConstant)(par, F, G, H, model) end +objective!(constant::SemConstant, par, model) = constant.c +gradient!(constant::SemConstant, par, model) = zero(par) +hessian!(constant::SemConstant, par, model) = zeros(eltype(par), length(par), length(par)) ############################################################################ ### Recommended methods diff --git a/src/loss/regularization/ridge.jl b/src/loss/regularization/ridge.jl index ef08403e8..299ff471c 100644 --- a/src/loss/regularization/ridge.jl +++ b/src/loss/regularization/ridge.jl @@ -4,12 +4,11 @@ ### Types ############################################################################ -struct SemRidge{P, W1, W2, FT, GT, HT} <: SemLossFunction +struct SemRidge{P, W1, W2, GT, HT} <: SemLossFunction α::P which::W1 which_H::W2 - objective::FT gradient::GT hessian::HT end @@ -19,7 +18,7 @@ end ############################################################################ function SemRidge(;α_ridge, which_ridge, n_par, parameter_type = Float64, imply = nothing, kwargs...) - if which_ridge isa Vector{Symbol} + if eltype(which_ridge) <: Symbol which_ridge = get_identifier_indices(which_ridge, imply) end which = [CartesianIndex(x) for x in which_ridge] @@ -29,29 +28,24 @@ function SemRidge(;α_ridge, which_ridge, n_par, parameter_type = Float64, imply which, which_H, - zeros(parameter_type, 1), zeros(parameter_type, n_par), zeros(parameter_type, n_par, n_par)) end ############################################################################ -### functors +### methods ############################################################################ -function (ridge::SemRidge)(par, F, G, H, model) +objective!(ridge::SemRidge, par, model) = @views ridge.α*sum(x -> x^2, par[ridge.which]) - if G - ridge.gradient[ridge.which] .= 2*ridge.α*par[ridge.which] - end - - if H - @views @. ridge.hessian[ridge.which_H] += ridge.α*2.0 - end +function gradient!(ridge::SemRidge, par, model) + @views ridge.gradient[ridge.which] .= 2*ridge.α*par[ridge.which] + return ridge.gradient +end - if F - ridge.objective[1] = ridge.α*sum(par[ridge.which].^2) - end - +function hessian!(ridge::SemRidge, par, model) + @views @. ridge.hessian[ridge.which_H] += ridge.α*2.0 + return ridge.hessian end ############################################################################ diff --git a/src/objective_gradient_hessian.jl b/src/objective_gradient_hessian.jl new file mode 100644 index 000000000..203c67c68 --- /dev/null +++ b/src/objective_gradient_hessian.jl @@ -0,0 +1,362 @@ +##################################################################################################### +# methods for AbstractSem +##################################################################################################### + +function objective!(model::AbstractSemSingle, parameters) + objective!(imply(model), parameters, model) + return objective!(loss(model), parameters, model) +end + +##################################################################################################### +# methods for Sem +##################################################################################################### + +# pre-allocated gradient and hessian + +function gradient!(gradient, model::AbstractSemSingle, parameters) + fill!(gradient, zero(eltype(gradient))) + gradient!(imply(model), parameters, model) + gradient!(gradient, loss(model), parameters, model) +end + +function hessian!(hessian, model::AbstractSemSingle, parameters) + fill!(hessian, zero(eltype(hessian))) + hessian!(imply(model), parameters, model) + hessian!(hessian, loss(model), parameters, model) +end + +function objective_gradient!(gradient, model::AbstractSemSingle, parameters) + fill!(gradient, zero(eltype(gradient))) + objective_gradient!(imply(model), parameters, model) + objective_gradient!(gradient, loss(model), parameters, model) +end + +function objective_hessian!(hessian, model::AbstractSemSingle, parameters) + fill!(hessian, zero(eltype(hessian))) + objective_hessian!(imply(model), parameters, model) + objective_hessian!(hessian, loss(model), parameters, model) +end + +function gradient_hessian!(gradient, hessian, model::AbstractSemSingle, parameters) + fill!(gradient, zero(eltype(gradient))) + fill!(hessian, zero(eltype(hessian))) + gradient_hessian!(imply(model), parameters, model) + gradient_hessian!(gradient, hessian, loss(model), parameters, model) +end + +function objective_gradient_hessian!(gradient, hessian, model::AbstractSemSingle, parameters) + fill!(gradient, zero(eltype(gradient))) + fill!(hessian, zero(eltype(hessian))) + objective_gradient_hessian!(imply(model), parameters, model) + return objective_gradient_hessian!(gradient, hessian, loss(model), parameters, model) +end + +##################################################################################################### +# methods for SemFiniteDiff and SemForwardDiff +##################################################################################################### + +# gradient methods call themselves with the additional model.has_gradient argument + +gradient!(gradient, model::Union{SemFiniteDiff, SemForwardDiff}, par) = + gradient!(gradient, model, par, model.has_gradient) + +objective_gradient!(gradient, model::Union{SemFiniteDiff, SemForwardDiff}, par) = + objective_gradient!(gradient, model, par, model.has_gradient) + +# methods where autodiff takes place - these are specific to the method of automatic differentiation + +# FiniteDiff +gradient!(gradient, model::SemFiniteDiff, par, has_gradient::Val{false}) = + FiniteDiff.finite_difference_gradient!(gradient, x -> objective!(model, x), par) + +hessian!(hessian, model::SemFiniteDiff, par) = + FiniteDiff.finite_difference_hessian!(hessian, x -> objective!(model, x), par) + +# ForwardDiff +gradient!(gradient, model::SemForwardDiff, par, has_gradient::Val{false}) = + ForwardDiff.gradient!(gradient, x -> objective!(model, x), par) + +hessian!(hessian, model::SemForwardDiff, par) = + ForwardDiff.hessian!(hessian, x -> objective!(model, x), par) + +# gradient! +function gradient!(gradient, model::Union{SemFiniteDiff, SemForwardDiff}, par, has_gradient::Val{true}) + fill!(gradient, zero(eltype(gradient))) + gradient!(imply(model), parameters, model) + gradient!(gradient, loss(model), parameters, model) +end + +# objective_gradient! +function objective_gradient!(gradient, model::Union{SemFiniteDiff, SemForwardDiff}, par, has_gradient::Val{true}) + fill!(gradient, zero(eltype(gradient))) + objective_gradient!(imply(model), parameters, model) + return objective_gradient!(gradient, loss(model), parameters, model) +end + +function objective_gradient!(gradient, model::Union{SemFiniteDiff, SemForwardDiff}, par, has_gradient::Val{false}) + fill!(gradient, zero(eltype(gradient))) + gradient!(gradient, model, par) + return objective!(model, par) +end + +# other methods +function gradient_hessian!(gradient, hessian, model::Union{SemFiniteDiff, SemForwardDiff}, parameters) + fill!(gradient, zero(eltype(gradient))) + fill!(hessian, zero(eltype(hessian))) + gradient!(gradient, model, parameters) + hessian!(hessian, model, parameters) +end + +function objective_hessian!(hessian, model::Union{SemFiniteDiff, SemForwardDiff}, par) + fill!(hessian, zero(eltype(hessian))) + hessian!(hessian, model, par) + return objective!(model, par) +end + +function objective_gradient_hessian!(gradient, hessian, model::Union{SemFiniteDiff, SemForwardDiff}, par) + fill!(gradient, zero(eltype(gradient))) + fill!(hessian, zero(eltype(hessian))) + hessian!(hessian, model, par) + return objective_gradient!(gradient, model, par) +end + +##################################################################################################### +# methods for SemLoss +##################################################################################################### + +function objective!(loss::SemLoss, par, model) + return mapreduce( + fun_weight -> fun_weight[2]*objective!(fun_weight[1], par, model), + +, + zip(loss.functions, loss.weights) + ) +end + +function gradient!(gradient, loss::SemLoss, par, model) + for (lossfun, w) in zip(loss.functions, loss.weights) + new_gradient = gradient!(lossfun, par, model) + gradient .+= w*new_gradient + end +end + +function hessian!(hessian, loss::SemLoss, par, model) + for (lossfun, w) in zip(loss.functions, loss.weights) + hessian .+= w*hessian!(lossfun, par, model) + end +end + +function objective_gradient!(gradient, loss::SemLoss, par, model) + return mapreduce( + fun_weight -> objective_gradient_wrap_(gradient, fun_weight[1], par, model, fun_weight[2]), + +, + zip(loss.functions, loss.weights) + ) +end + +function objective_hessian!(hessian, loss::SemLoss, par, model) + return mapreduce( + fun_weight -> objective_hessian_wrap_(hessian, fun_weight[1], par, model, fun_weight[2]), + +, + zip(loss.functions, loss.weights) + ) +end + +function gradient_hessian!(gradient, hessian, loss::SemLoss, par, model) + for (lossfun, w) in zip(loss.functions, loss.weights) + new_gradient, new_hessian = gradient_hessian!(lossfun, par, model) + gradient .+= w*new_gradient + hessian .+= w*new_hessian + end +end + +function objective_gradient_hessian!(gradient, hessian, loss::SemLoss, par, model) + return mapreduce( + fun_weight -> objective_gradient_hessian_wrap_(gradient, hessian, fun_weight[1], par, model, fun_weight[2]), + +, + zip(loss.functions, loss.weights) + ) +end + +# wrapper to update gradient/hessian and return objective value +function objective_gradient_wrap_(gradient, lossfun, par, model, w) + new_objective, new_gradient = objective_gradient!(lossfun, par, model) + gradient .+= w*new_gradient + return w*new_objective +end + +function objective_hessian_wrap_(hessian, lossfun, par, model, w) + new_objective, new_hessian = objective_hessian!(lossfun, par, model) + hessian .+= w*new_hessian + return w*new_objective +end + +function objective_gradient_hessian_wrap_(gradient, hessian, lossfun, par, model, w) + new_objective, new_gradient, new_hessian = objective_gradient_hessian!(lossfun, par, model) + gradient .+= w*new_gradient + hessian .+= w*new_hessian + return w*new_objective +end + +##################################################################################################### +# methods for SemEnsemble +##################################################################################################### + +function objective!(ensemble::SemEnsemble, par) + return mapreduce( + model_weight -> model_weight[2]*objective!(model_weight[1], par), + +, + zip(ensemble.sems, ensemble.weights) + ) +end + +function gradient!(gradient, ensemble::SemEnsemble, par) + fill!(gradient, zero(eltype(gradient))) + for (model, w) in zip(ensemble.sems, ensemble.weights) + gradient_new = similar(gradient) + gradient!(gradient_new, model, par) + gradient .+= w*gradient_new + end +end + +function hessian!(hessian, ensemble::SemEnsemble, par) + fill!(hessian, zero(eltype(hessian))) + for (model, w) in zip(ensemble.sems, ensemble.weights) + hessian_new = similar(hessian) + hessian!(hessian_new, model, par) + hessian .+= w*hessian_new + end +end + +function objective_gradient!(gradient, ensemble::SemEnsemble, par) + fill!(gradient, zero(eltype(gradient))) + return mapreduce( + model_weight -> objective_gradient_wrap_(gradient, model_weight[1], par, model_weight[2]), + +, + zip(ensemble.sems, ensemble.weights) + ) +end + +function objective_hessian!(hessian, ensemble::SemEnsemble, par) + fill!(hessian, zero(eltype(hessian))) + return mapreduce( + model_weight -> objective_hessian_wrap_(hessian, model_weight[1], par, model_weight[2]), + +, + zip(ensemble.sems, ensemble.weights) + ) +end + +function gradient_hessian!(gradient, hessian, ensemble::SemEnsemble, par) + fill!(gradient, zero(eltype(gradient))) + fill!(hessian, zero(eltype(hessian))) + for (model, w) in zip(ensemble.sems, ensemble.weights) + + new_gradient = similar(gradient) + new_hessian = similar(hessian) + + gradient_hessian!(new_gradient, new_hessian, model, par) + + gradient .+= w*new_gradient + hessian .+= w*new_hessian + + end +end + +function objective_gradient_hessian!(gradient, hessian, ensemble::SemEnsemble, par) + fill!(gradient, zero(eltype(gradient))) + fill!(hessian, zero(eltype(hessian))) + return mapreduce( + model_weight -> objective_gradient_hessian_wrap_(gradient, hessian, model_weight[1], par, model, model_weight[2]), + +, + zip(ensemble.sems, ensemble.weights) + ) +end + +# wrapper to update gradient/hessian and return objective value +function objective_gradient_wrap_(gradient, model::AbstractSemSingle, par, w) + gradient_pre = similar(gradient) + new_objective = objective_gradient!(gradient_pre, model, par) + gradient .+= w*gradient_pre + return w*new_objective +end + +function objective_hessian_wrap_(hessian, model::AbstractSemSingle, par, w) + hessian_pre = similar(hessian) + new_objective = objective_hessian!(hessian_pre, model, par) + hessian .+= w*new_hessian + return w*new_objective +end + +function objective_gradient_hessian_wrap_(gradient, hessian, model::AbstractSemSingle, par, w) + gradient_pre = similar(gradient) + hessian_pre = similar(hessian) + new_objective = objective_gradient_hessian!(gradient_pre, hessian_pre, model, par) + gradient .+= w*new_gradient + hessian .+= w*new_hessian + return w*new_objective +end + +##################################################################################################### +# generic methods for loss functions +##################################################################################################### + +function objective_gradient!(lossfun::SemLossFunction, par, model) + objective = objective!(lossfun::SemLossFunction, par, model) + gradient = gradient!(lossfun::SemLossFunction, par, model) + return objective, gradient +end + +function objective_hessian!(lossfun::SemLossFunction, par, model) + objective = objective!(lossfun::SemLossFunction, par, model) + hessian = hessian!(lossfun::SemLossFunction, par, model) + return objective, hessian +end + +function gradient_hessian!(lossfun::SemLossFunction, par, model) + gradient = gradient!(lossfun::SemLossFunction, par, model) + hessian = hessian!(lossfun::SemLossFunction, par, model) + return gradient, hessian +end + +function objective_gradient_hessian!(lossfun::SemLossFunction, par, model) + objective = objective!(lossfun::SemLossFunction, par, model) + gradient = gradient!(lossfun::SemLossFunction, par, model) + hessian = hessian!(lossfun::SemLossFunction, par, model) + return objective, gradient, hessian +end + +# throw an error by default if gradient! and hessian! are not implemented + +#= gradient!(lossfun::SemLossFunction, par, model) = + throw(ArgumentError("gradient for $(typeof(lossfun).name.wrapper) is not available")) + +hessian!(lossfun::SemLossFunction, par, model) = + throw(ArgumentError("hessian for $(typeof(lossfun).name.wrapper) is not available")) =# + +##################################################################################################### +# generic methods for imply +##################################################################################################### + +function objective_gradient!(semimp::SemImply, par, model) + objective!(semimp::SemImply, par, model) + gradient!(semimp::SemImply, par, model) + return nothing +end + +function objective_hessian!(semimp::SemImply, par, model) + objective!(semimp::SemImply, par, model) + hessian!(semimp::SemImply, par, model) + return nothing +end + +function gradient_hessian!(semimp::SemImply, par, model) + gradient!(semimp::SemImply, par, model) + hessian!(semimp::SemImply, par, model) + return nothing +end + +function objective_gradient_hessian!(semimp::SemImply, par, model) + objective!(semimp::SemImply, par, model) + gradient!(semimp::SemImply, par, model) + hessian!(semimp::SemImply, par, model) + return nothing +end \ No newline at end of file diff --git a/src/optimizer/NLopt.jl b/src/optimizer/NLopt.jl index d1ba99b79..50aaf3a6b 100644 --- a/src/optimizer/NLopt.jl +++ b/src/optimizer/NLopt.jl @@ -3,11 +3,13 @@ ############################################################################ # wrapper to define the objective -function sem_wrap_nlopt(par, G, sem::AbstractSem) +function sem_wrap_nlopt(par, G, model::AbstractSem) need_gradient = length(G) != 0 - sem(par, true, need_gradient, false) - if need_gradient G .= gradient(sem) end - return objective(sem)[1] + if need_gradient + return objective_gradient!(G, model, par) + else + return objective!(model, par) + end end mutable struct NLoptResult diff --git a/src/optimizer/optim.jl b/src/optimizer/optim.jl index 32b505ecd..dfacacd6d 100644 --- a/src/optimizer/optim.jl +++ b/src/optimizer/optim.jl @@ -1,9 +1,29 @@ ## connect to Optim.jl as backend -function sem_wrap_optim(par, F, G, H, sem::AbstractSem) - sem(par, !isnothing(F), !isnothing(G), !isnothing(H)) - if !isnothing(G) G .= gradient(sem) end - if !isnothing(H) H .= hessian(sem) end - if !isnothing(F) return objective(sem)[1] end +function sem_wrap_optim(par, F, G, H, model::AbstractSem) + if !isnothing(F) + if !isnothing(G) + if !isnothing(H) + return objective_gradient_hessian!(G, H, model, par) + else + return objective_gradient!(G, model, par) + end + else + if !isnothing(H) + return objective_hessian!(H, model, par) + else + return objective!(model, par) + end + end + else + if !isnothing(G) + if !isnothing(H) + gradient_hessian!(G, H, model, par) + else + gradient!(G, model, par) + end + end + end + return nothing end function SemFit(optimization_result::Optim.MultivariateOptimizationResults, model::AbstractSem, start_val) diff --git a/src/types.jl b/src/types.jl index a1d533e70..f1a29d189 100644 --- a/src/types.jl +++ b/src/types.jl @@ -5,7 +5,7 @@ abstract type AbstractSem end "Supertype for all single SEMs, e.g. SEMs that have the fields `observed`, `imply`, loss, diff" -abstract type AbstractSemSingle <: AbstractSem end +abstract type AbstractSemSingle{O, I, L, D} <: AbstractSem end "Supertype for all collections of multiple SEMs" abstract type AbstractSemCollection <: AbstractSem end @@ -29,32 +29,32 @@ my_ridge_loss = SemRidge(...) my_loss = SemLoss(SemML, SemRidge) ``` """ -mutable struct SemLoss{F <: Tuple, T, FT, GT, HT} +mutable struct SemLoss{F <: Tuple, T} functions::F weights::T - - objective::FT - gradient::GT - hessian::HT end -function SemLoss(functions...; loss_weights = nothing, parameter_type = Float64, kwargs...) +function SemLoss(functions...; loss_weights = nothing, kwargs...) - n_par = length(gradient(functions[1])) - !isnothing(loss_weights) || (loss_weights = Tuple(nothing for _ in 1:length(functions))) + if !isnothing(loss_weights) + loss_weights = SemWeight.(loss_weights) + else + loss_weights = Tuple(SemWeight(nothing) for _ in 1:length(functions)) + end return SemLoss( functions, - loss_weights, + loss_weights + ) +end - zeros(parameter_type, 1), - zeros(parameter_type, n_par), - zeros(parameter_type, n_par, n_par)) +# weights for loss functions or models. If the weight is nothing, multiplication returs second argument +struct SemWeight{T} + w::T end -objective(loss::SemLoss) = loss.objective -gradient(loss::SemLoss) = loss.gradient -hessian(loss::SemLoss) = loss.hessian +Base.:*(x::SemWeight{Nothing}, y) = y +Base.:*(x::SemWeight, y) = x.w*y """ Supertype of all objects that can serve as the diff field of a SEM. @@ -99,57 +99,18 @@ Returns a struct of type Sem with fields - `loss::SemLoss`: Computes the objective and gradient of a sum of loss functions. See also [`SemLoss`](@ref). - `diff::SemDiff`: Connects the model to the optimizer. See also [`SemDiff`](@ref). """ -mutable struct Sem{O <: SemObs, I <: SemImply, L <: SemLoss, D <: SemDiff} <: AbstractSemSingle +mutable struct Sem{O <: SemObs, I <: SemImply, L <: SemLoss, D <: SemDiff} <: AbstractSemSingle{O, I, L, D} observed::O imply::I loss::L diff::D end -function (model::Sem)(par, F, G, H) - model.imply(par, F, G, H, model) - model.loss(par, F, G, H, model) -end - -function (loss::SemLoss)(par, F, G, H, model) - for lossfun in loss.functions lossfun(par, F, G, H, model) end - if H - loss.hessian .= 0.0 - for (lossfun, c) in zip(loss.functions, loss.weights) - if isnothing(c) - loss.hessian .+= hessian(lossfun) - else - loss.hessian .+= c*hessian(lossfun) - end - end - end - if G - loss.gradient .= 0.0 - for (lossfun, c) in zip(loss.functions, loss.weights) - if isnothing(c) - loss.gradient .+= gradient(lossfun) - else - loss.gradient .+= c*gradient(lossfun) - end - end - end - if F - loss.objective[1] = 0.0 - for (lossfun, c) in zip(loss.functions, loss.weights) - if isnothing(c) - loss.objective[1] += objective(lossfun)[1] - else - loss.objective[1] += c*objective(lossfun)[1] - end - end - end -end - ##################################################################################################### # automatic differentiation ##################################################################################################### -struct SemFiniteDiff{O <: SemObs, I <: SemImply, L <: SemLoss, D <: SemDiff, G} <: AbstractSemSingle +struct SemFiniteDiff{O <: SemObs, I <: SemImply, L <: SemLoss, D <: SemDiff, G} <: AbstractSemSingle{O, I, L, D} observed::O imply::I loss::L @@ -157,31 +118,7 @@ struct SemFiniteDiff{O <: SemObs, I <: SemImply, L <: SemLoss, D <: SemDiff, G} has_gradient::G end -function (model::SemFiniteDiff)(par, F, G, H) - - if H - model.loss.hessian .= FiniteDiff.finite_difference_hessian(x -> objective!(model, x), par) - end - - if model.has_gradient - model.imply(par, F, G, false, model) - model.loss(par, F, G, false, model) - else - - if G - model.loss.gradient .= FiniteDiff.finite_difference_gradient(x -> objective!(model, x), par) - end - - if F - model.imply(par, F, false, false, model) - model.loss(par, F, false, false, model) - end - - end - -end - -struct SemForwardDiff{O <: SemObs, I <: SemImply, L <: SemLoss, D <: SemDiff, G} <: AbstractSemSingle +struct SemForwardDiff{O <: SemObs, I <: SemImply, L <: SemLoss, D <: SemDiff, G} <: AbstractSemSingle{O, I, L, D} observed::O imply::I loss::L @@ -189,48 +126,21 @@ struct SemForwardDiff{O <: SemObs, I <: SemImply, L <: SemLoss, D <: SemDiff, G} has_gradient::G end -function (model::SemForwardDiff)(par, F, G, H) - - if H - model.loss.hessian .= ForwardDiff.hessian(x -> objective!(model, x), par) - end - - if model.has_gradient - model.imply(par, F, G, false, model) - model.loss(par, F, G, false, model) - else - - if G - model.loss.gradient .= ForwardDiff.gradient(x -> objective!(model, x), par) - end - - if F - model.imply(par, F, false, false, model) - model.loss(par, F, false, false, model) - end - - end - -end - ##################################################################################################### # ensemble models ##################################################################################################### -struct SemEnsemble{N, T <: Tuple, V <: AbstractVector, D, I, FT, GT, HT} <: AbstractSemCollection +struct SemEnsemble{N, T <: Tuple, V <: AbstractVector, D, I} <: AbstractSemCollection n::N sems::T weights::V diff::D identifier::I - - objective::FT - gradient::GT - hessian::HT end function SemEnsemble(models...; diff = SemDiffOptim, weights = nothing, parameter_type = Float64, kwargs...) n = length(models) + npar = n_par(models[1]) # default weights @@ -249,8 +159,6 @@ function SemEnsemble(models...; diff = SemDiffOptim, weights = nothing, paramete end end - npar = n_par(models[1]) - # diff if !isa(diff, SemDiff) diff = diff(;kwargs...) @@ -261,26 +169,8 @@ function SemEnsemble(models...; diff = SemDiffOptim, weights = nothing, paramete models, weights, diff, - id, - - zeros(parameter_type, 1), - zeros(parameter_type, npar), - zeros(parameter_type, npar, npar)) -end - -function (ensemble::SemEnsemble)(par, F, G, H) - - if H ensemble.hessian .= 0.0 end - if G ensemble.gradient .= 0.0 end - if F ensemble.objective .= 0.0 end - - for (model, weight) in zip(ensemble.sems, ensemble.weights) - model(par, F, G, H) - if H ensemble.hessian .+= weight*hessian(model) end - if G ensemble.gradient .+= weight*gradient(model) end - if F ensemble.objective .+= weight*objective(model) end - end - + id + ) end n_models(ensemble::SemEnsemble) = ensemble.n @@ -307,59 +197,4 @@ observed(model::SemFiniteDiff) = model.observed imply(model::SemFiniteDiff) = model.imply loss(model::SemFiniteDiff) = model.loss diff(model::SemFiniteDiff) = model.diff -has_gradient(model::SemFiniteDiff) = model.has_gradient - - -##################################################################################################### -# gradient, objective, hessian helpers -##################################################################################################### - -objective(lossfun::SemLossFunction) = lossfun.objective -gradient(lossfun::SemLossFunction) = lossfun.gradient -hessian(lossfun::SemLossFunction) = lossfun.hessian - -objective(model::AbstractSem) = model.loss.objective -gradient(model::AbstractSem) = model.loss.gradient -hessian(model::AbstractSem) = model.loss.hessian - -objective(model::SemEnsemble) = model.objective -gradient(model::SemEnsemble) = model.gradient -hessian(model::SemEnsemble) = model.hessian - -function objective!(model::AbstractSem, parameters) - model(parameters, true, false, false) - return objective(model)[1] -end - -function gradient!(model::AbstractSem, parameters) - model(parameters, false, true, false) - return gradient(model) -end - -function gradient!(grad, model::AbstractSem, parameters) - model(parameters, false, true, false) - copyto!(grad, gradient(model)) - return gradient(model) -end - -function hessian!(model::AbstractSem, parameters) - model(parameters, false, false, true) - return hessian(model) -end - -function hessian!(hessian, model::AbstractSem, parameters) - model(parameters, false, false, true) - copyto!(hessian, hessian(model)) - return hessian(model) -end - -function objective_gradient!(model::AbstractSem, parameters) - model(parameters, true, true, false) - return objective(model)[1], copy(gradient(model)) -end - -function objective_gradient!(grad, model::AbstractSem, parameters) - model(parameters, true, true, false) - copyto!(grad, gradient(model)) - return objective(model)[1] -end \ No newline at end of file +has_gradient(model::SemFiniteDiff) = model.has_gradient \ No newline at end of file diff --git a/test/examples/political_democracy/helper.jl b/test/examples/helper.jl similarity index 54% rename from test/examples/political_democracy/helper.jl rename to test/examples/helper.jl index 780636abd..d1089c561 100644 --- a/test/examples/political_democracy/helper.jl +++ b/test/examples/helper.jl @@ -1,38 +1,44 @@ function test_gradient(model, parameters; rtol = 1e-10, atol = 0) true_grad = FiniteDiff.finite_difference_gradient(x -> objective!(model, x)[1], parameters) + gradient = similar(parameters); gradient .= 1.0 # F and G - gradient(model) .= 0 - model(parameters, true, true, false) - correct1 = isapprox(gradient(model), true_grad; rtol = rtol, atol = atol) + gradient!(gradient, model, parameters) + correct1 = isapprox(gradient, true_grad; rtol = rtol, atol = atol) # only G - gradient(model) .= 0 - model(parameters, false, true, false) - correct2 = isapprox(gradient(model), true_grad; rtol = rtol, atol = atol) + gradient .= 1.0 + objective_gradient!(gradient, model, parameters) + correct2 = isapprox(gradient, true_grad; rtol = rtol, atol = atol) return correct1 & correct2 end function test_hessian(model, parameters; rtol = 1e-4, atol = 0) true_hessian = FiniteDiff.finite_difference_hessian(x -> objective!(model, x)[1], parameters) + hessian = zeros(size(true_hessian)); hessian .= 1.0 + gradient = similar(parameters) + + # H + hessian!(hessian, model, parameters) + correct1 = isapprox(hessian, true_hessian; rtol = rtol, atol = atol) # F and H - hessian(model) .= 0 - model(parameters, true, false, true) - correct1 = isapprox(hessian(model), true_hessian; rtol = rtol, atol = atol) + hessian .= 1.0 + objective_hessian!(hessian, model, parameters) + correct2 = isapprox(hessian, true_hessian; rtol = rtol, atol = atol) # G and H - hessian(model) .= 0 - model(parameters, false, true, true) - correct2 = isapprox(hessian(model), true_hessian; rtol = rtol, atol = atol) - - # only H - hessian(model) .= 0 - model(parameters, false, false, true) - correct3 = isapprox(hessian(model), true_hessian; rtol = rtol, atol = atol) + hessian .= 1.0 + gradient_hessian!(gradient, hessian, model, parameters) + correct3 = isapprox(hessian, true_hessian; rtol = rtol, atol = atol) + + # F, G and H + hessian .= 1.0 + objective_gradient_hessian!(gradient, hessian, model, parameters) + correct4 = isapprox(hessian, true_hessian; rtol = rtol, atol = atol) - return correct1 & correct2 & correct3 + return correct1 & correct2 & correct3 & correct4 end fitmeasure_names_ml = Dict( @@ -142,5 +148,76 @@ function compare_estimates(partable::ParameterTable, partable_lav; end + return all(correct) +end + +function compare_estimates(ens_partable::EnsembleParameterTable, partable_lav; + rtol = 1e-10, atol = 0, col = :estimate, lav_col = :est, + lav_groups) + + correct = [] + + for key in keys(ens_partable.tables) + + group = lav_groups[key] + partable = ens_partable.tables[key] + + for i in findall(partable.columns[:free]) + + from = partable.columns[:from][i] + to = partable.columns[:to][i] + type = partable.columns[:parameter_type][i] + estimate = partable.columns[col][i] + + if from == Symbol("1") + + lav_ind = findall( + (partable_lav.lhs .== String(to)) .& + (partable_lav.op .== "~1") .& + (partable_lav.group .== group)) + + if length(lav_ind) == 0 + throw(ErrorException("At least one parameter could not be found in the lavaan solution")) + elseif length(lav_ind) > 1 + throw(ErrorException("At least one parameter was found twice in the lavaan solution")) + else + is_correct = isapprox(estimate, partable_lav[:, lav_col][lav_ind[1]]; rtol = rtol, atol = atol) + push!(correct, is_correct) + end + + else + + if type == :↔ + type = "~~" + elseif type == :→ + if (from ∈ partable.variables[:latent_vars]) & (to ∈ partable.variables[:observed_vars]) + type = "=~" + else + type = "~" + from, to = to, from + end + end + + lav_ind = findall( + (partable_lav.lhs .== String(from)) .& + (partable_lav.rhs .== String(to)) .& + (partable_lav.op .== type).& + (partable_lav.group .== group)) + + if length(lav_ind) == 0 + throw(ErrorException("At least one parameter could not be found in the lavaan solution")) + elseif length(lav_ind) > 1 + throw(ErrorException("At least one parameter was found twice in the lavaan solution")) + else + is_correct = isapprox(estimate, partable_lav[:, lav_col][lav_ind[1]]; rtol = rtol, atol = atol) + push!(correct, is_correct) + end + + end + + end + + end + return all(correct) end \ No newline at end of file diff --git a/test/examples/multigroup/build_models.jl b/test/examples/multigroup/build_models.jl index 56accc8da..3ebb4d0b8 100644 --- a/test/examples/multigroup/build_models.jl +++ b/test/examples/multigroup/build_models.jl @@ -53,38 +53,21 @@ end # ML estimation - user defined loss function #################################################################### -struct UserSemML <: SemLossFunction - objective - gradient - hessian -end - -############################################################################ -### constructor -############################################################################ - -UserSemML(;n_par, kwargs...) = UserSemML([1.0], zeros(n_par), zeros(n_par, n_par)) +struct UserSemML <: SemLossFunction end ############################################################################ ### functors ############################################################################ -import LinearAlgebra: Symmetric, cholesky, isposdef, logdet, tr -import LinearAlgebra - -function (semml::UserSemML)(par, F, G, H, model) - if G error("analytic gradient of ML is not implemented (yet)") end - if H error("analytic hessian of ML is not implemented (yet)") end - - a = cholesky(Symmetric(model.imply.Σ); check = false) - if !isposdef(a) - semml.objective[1] = Inf - else - ld = logdet(a) - Σ_inv = LinearAlgebra.inv(a) - if !isnothing(F) - prod = Σ_inv*model.observed.obs_cov - semml.objective[1] = ld + tr(prod) +import LinearAlgebra: Symmetric, cholesky, isposdef, logdet, tr, inv +import StructuralEquationModels: Σ, obs_cov, objective! + +function objective!(semml::UserSemML, parameters, model::AbstractSem) + let Σ = Σ(imply(model)), Σₒ = obs_cov(observed(model)) + if !isposdef(Σ) + return Inf + else + return logdet(Σ) + tr(inv(Σ)*Σₒ) end end end @@ -100,7 +83,7 @@ model_g2 = SemFiniteDiff( specification = specification_g2, data = dat_g2, imply = RAMSymbolic, - loss = UserSemML + loss = UserSemML() ) model_ml_multigroup = SemEnsemble(model_g1, model_g2; diff = semdiff) @@ -180,7 +163,8 @@ model_g1 = Sem( loss = SemFIML, data = dat_miss_g1, imply = RAM, - diff = SemDiffEmpty() + diff = SemDiffEmpty(), + meanstructure = true ) model_g2 = Sem( @@ -189,7 +173,8 @@ model_g2 = Sem( loss = SemFIML, data = dat_miss_g2, imply = RAM, - diff = SemDiffEmpty() + diff = SemDiffEmpty(), + meanstructure = true ) model_ml_multigroup = SemEnsemble(model_g1, model_g2; diff = semdiff) diff --git a/test/examples/multigroup/helper.jl b/test/examples/multigroup/helper.jl deleted file mode 100644 index 7f0d10049..000000000 --- a/test/examples/multigroup/helper.jl +++ /dev/null @@ -1,135 +0,0 @@ -function test_gradient(model, parameters; rtol = 1e-10, atol = 0) - true_grad = FiniteDiff.finite_difference_gradient(x -> objective!(model, x)[1], parameters) - - # F and G - gradient(model) .= 0 - model(parameters, true, true, false) - correct1 = isapprox(gradient(model), true_grad; rtol = rtol, atol = atol) - - # only G - gradient(model) .= 0 - model(parameters, false, true, false) - correct2 = isapprox(gradient(model), true_grad; rtol = rtol, atol = atol) - - return correct1 & correct2 -end - -function test_hessian(model, parameters; rtol = 1e-4, atol = 0) - true_hessian = FiniteDiff.finite_difference_hessian(x -> objective!(model, x)[1], parameters) - - # F and H - hessian(model) .= 0 - model(parameters, true, false, true) - correct1 = isapprox(hessian(model), true_hessian; rtol = rtol, atol = atol) - - # G and H - hessian(model) .= 0 - model(parameters, false, true, true) - correct2 = isapprox(hessian(model), true_hessian; rtol = rtol, atol = atol) - - # only H - hessian(model) .= 0 - model(parameters, false, false, true) - correct3 = isapprox(hessian(model), true_hessian; rtol = rtol, atol = atol) - - return correct1 & correct2 & correct3 -end - -fitmeasure_names_ml = Dict( - :AIC => "aic", - :BIC => "bic", - :df => "df", - :χ² => "chisq", - :p_value => "pvalue", - :n_par => "npar", - :RMSEA => "rmsea", -) - -fitmeasure_names_ls = Dict( - :df => "df", - :χ² => "chisq", - :p_value => "pvalue", - :n_par => "npar", - :RMSEA => "rmsea", -) - -function test_fitmeasures(measures, measures_lav; rtol = 1e-4, atol = 0, fitmeasure_names = fitmeasure_names_ml) - correct = [] - for key in keys(fitmeasure_names) - measure = measures[key] - measure_lav = measures_lav.x[measures_lav[:, 1] .== fitmeasure_names[key]][1] - push!(correct, isapprox(measure, measure_lav; rtol = rtol, atol = atol)) - end - return correct -end - -function compare_estimates(ens_partable::EnsembleParameterTable, partable_lav; - rtol = 1e-10, atol = 0, col = :estimate, lav_col = :est, - lav_groups) - - correct = [] - - for key in keys(ens_partable.tables) - - group = lav_groups[key] - partable = ens_partable.tables[key] - - for i in findall(partable.columns[:free]) - - from = partable.columns[:from][i] - to = partable.columns[:to][i] - type = partable.columns[:parameter_type][i] - estimate = partable.columns[col][i] - - if from == Symbol("1") - - lav_ind = findall( - (partable_lav.lhs .== String(to)) .& - (partable_lav.op .== "~1") .& - (partable_lav.group .== group)) - - if length(lav_ind) == 0 - throw(ErrorException("At least one parameter could not be found in the lavaan solution")) - elseif length(lav_ind) > 1 - throw(ErrorException("At least one parameter was found twice in the lavaan solution")) - else - is_correct = isapprox(estimate, partable_lav[:, lav_col][lav_ind[1]]; rtol = rtol, atol = atol) - push!(correct, is_correct) - end - - else - - if type == :↔ - type = "~~" - elseif type == :→ - if (from ∈ partable.variables[:latent_vars]) & (to ∈ partable.variables[:observed_vars]) - type = "=~" - else - type = "~" - from, to = to, from - end - end - - lav_ind = findall( - (partable_lav.lhs .== String(from)) .& - (partable_lav.rhs .== String(to)) .& - (partable_lav.op .== type).& - (partable_lav.group .== group)) - - if length(lav_ind) == 0 - throw(ErrorException("At least one parameter could not be found in the lavaan solution")) - elseif length(lav_ind) > 1 - throw(ErrorException("At least one parameter was found twice in the lavaan solution")) - else - is_correct = isapprox(estimate, partable_lav[:, lav_col][lav_ind[1]]; rtol = rtol, atol = atol) - push!(correct, is_correct) - end - - end - - end - - end - - return all(correct) -end \ No newline at end of file diff --git a/test/examples/multigroup/multigroup.jl b/test/examples/multigroup/multigroup.jl index 5aefcbe7e..3ab05b805 100644 --- a/test/examples/multigroup/multigroup.jl +++ b/test/examples/multigroup/multigroup.jl @@ -1,7 +1,7 @@ using StructuralEquationModels, Test, FiniteDiff import LinearAlgebra: diagind # import StructuralEquationModels as SEM -include("helper.jl") +include(joinpath(chop(dirname(pathof(StructuralEquationModels)), tail = 3), "test/examples/helper.jl")) dat = example_data("holzinger_swineford") dat_missing = example_data("holzinger_swineford_missing") diff --git a/test/examples/political_democracy/constructor.jl b/test/examples/political_democracy/constructor.jl index 85b612a3b..c00a34eea 100644 --- a/test/examples/political_democracy/constructor.jl +++ b/test/examples/political_democracy/constructor.jl @@ -93,7 +93,10 @@ end # test constant objective value @testset "constant_objective_and_gradient" begin @test (objective!(model_constant, start_test) - 3.465) ≈ objective!(model_ml, start_test) - @test gradient!(model_constant, start_test) ≈ gradient!(model_ml, start_test) + grad = similar(start_test); grad2 = similar(start_test) + gradient!(grad, model_constant, start_test) + gradient!(grad2, model_ml, start_test) + @test grad ≈ grad2 end @testset "ml_solution_weighted" begin @@ -268,7 +271,8 @@ model_ml = Sem( data = dat_missing, observed = SemObsMissing, loss = SemFIML, - diff = semdiff + diff = semdiff, + meanstructure = true ) model_ml_sym = Sem( @@ -278,7 +282,8 @@ model_ml_sym = Sem( imply = RAMSymbolic, loss = SemFIML, start_val = start_test_mean, - diff = semdiff + diff = semdiff, + meanstructure = true ) ############################################################################ diff --git a/test/examples/political_democracy/political_democracy.jl b/test/examples/political_democracy/political_democracy.jl index 405589bac..444360fa7 100644 --- a/test/examples/political_democracy/political_democracy.jl +++ b/test/examples/political_democracy/political_democracy.jl @@ -1,6 +1,6 @@ using StructuralEquationModels, Test, FiniteDiff # import StructuralEquationModels as SEM -include("helper.jl") +include(joinpath(chop(dirname(pathof(StructuralEquationModels)), tail = 3), "test/examples/helper.jl")) ############################################################################ ### data diff --git a/test/examples/recover_parameters/helper.jl b/test/examples/recover_parameters/helper.jl deleted file mode 100644 index 7f12aad91..000000000 --- a/test/examples/recover_parameters/helper.jl +++ /dev/null @@ -1,125 +0,0 @@ -function test_gradient(model, parameters; rtol = 1e-10, atol = 0) - true_grad = FiniteDiff.finite_difference_gradient(x -> objective!(model, x)[1], parameters) - - # F and G - gradient(model) .= 0 - model(parameters, true, true, false) - correct1 = isapprox(gradient(model), true_grad; rtol = rtol, atol = atol) - - # only G - gradient(model) .= 0 - model(parameters, false, true, false) - correct2 = isapprox(gradient(model), true_grad; rtol = rtol, atol = atol) - - return correct1 & correct2 -end - -function test_hessian(model, parameters; rtol = 1e-4, atol = 0) - true_hessian = FiniteDiff.finite_difference_hessian(x -> objective!(model, x)[1], parameters) - - # F and H - hessian(model) .= 0 - model(parameters, true, false, true) - correct1 = isapprox(hessian(model), true_hessian; rtol = rtol, atol = atol) - - # G and H - hessian(model) .= 0 - model(parameters, false, true, true) - correct2 = isapprox(hessian(model), true_hessian; rtol = rtol, atol = atol) - - # only H - hessian(model) .= 0 - model(parameters, false, false, true) - correct3 = isapprox(hessian(model), true_hessian; rtol = rtol, atol = atol) - - return correct1 & correct2 & correct3 -end - -fitmeasure_names_ml = Dict( - :AIC => "aic", - :BIC => "bic", - :df => "df", - :χ² => "chisq", - :p_value => "pvalue", - :n_par => "npar", - :RMSEA => "rmsea", -) - -fitmeasure_names_ls = Dict( - :df => "df", - :χ² => "chisq", - :p_value => "pvalue", - :n_par => "npar", - :RMSEA => "rmsea", -) - -function test_fitmeasures(measures, measures_lav; rtol = 1e-4, atol = 0, fitmeasure_names = fitmeasure_names_ml) - correct = [] - for key in keys(fitmeasure_names) - measure = measures[key] - measure_lav = measures_lav.x[measures_lav[:, 1] .== fitmeasure_names[key]][1] - push!(correct, isapprox(measure, measure_lav; rtol = rtol, atol = atol)) - end - return correct -end - -function compare_estimates(partable::ParameterTable, partable_lav; - rtol = 1e-10, atol = 0, col = :estimate, lav_col = :est) - - correct = [] - - for i in findall(partable.columns[:free]) - - from = partable.columns[:from][i] - to = partable.columns[:to][i] - type = partable.columns[:parameter_type][i] - estimate = partable.columns[col][i] - - if from == Symbol("1") - - lav_ind = findall( - (partable_lav.lhs .== String(to)) .& - (partable_lav.op .== "~1")) - - if length(lav_ind) == 0 - throw(ErrorException("At least one parameter could not be found in the lavaan solution")) - elseif length(lav_ind) > 1 - throw(ErrorException("At least one parameter was found twice in the lavaan solution")) - else - is_correct = isapprox(estimate, partable_lav[:, lav_col][lav_ind[1]]; rtol = rtol, atol = atol) - push!(correct, is_correct) - end - - else - - if type == :↔ - type = "~~" - elseif type == :→ - if (from ∈ partable.variables[:latent_vars]) & (to ∈ partable.variables[:observed_vars]) - type = "=~" - else - type = "~" - from, to = to, from - end - end - - lav_ind = findall( - (partable_lav.lhs .== String(from)) .& - (partable_lav.rhs .== String(to)) .& - (partable_lav.op .== type)) - - if length(lav_ind) == 0 - throw(ErrorException("At least one parameter could not be found in the lavaan solution")) - elseif length(lav_ind) > 1 - throw(ErrorException("At least one parameter was found twice in the lavaan solution")) - else - is_correct = isapprox(estimate, partable_lav[:, lav_col][lav_ind[1]]; rtol = rtol, atol = atol) - push!(correct, is_correct) - end - - end - - end - - return all(correct) -end \ No newline at end of file diff --git a/test/examples/recover_parameters/recover_parameters_twofact.jl b/test/examples/recover_parameters/recover_parameters_twofact.jl index a8a83a1a8..9cfbc065d 100644 --- a/test/examples/recover_parameters/recover_parameters_twofact.jl +++ b/test/examples/recover_parameters/recover_parameters_twofact.jl @@ -1,8 +1,8 @@ using StructuralEquationModels, Distributions, Random, Optim, LineSearches import StructuralEquationModels as SEM -include("helper.jl") +include(joinpath(chop(dirname(pathof(StructuralEquationModels)), tail = 3), "test/examples/helper.jl")) -x = Symbol.("x".*string.(1:13)) +x = Symbol.("x", 1:13) S = [:x1 0 0 0 0 0 0 0 0 :x2 0 0 0 0 0 0 @@ -35,10 +35,10 @@ true_val = [repeat([1], 8) 0.4 repeat([0.8], 4)] -start_val = [repeat([1], 9) +start = [repeat([1], 9) repeat([0.5], 4)] -imply_ml = RAMSymbolic(;specification = ram_matrices, start_val = start_val) +imply_ml = RAMSymbolic(;specification = ram_matrices, start_val = start) imply_ml.Σ_function(imply_ml.Σ, true_val) @@ -48,7 +48,7 @@ Random.seed!(1234) x = transpose(rand(true_dist, 100000)) semobserved = SemObsCommon(data = x) -loss_ml = SemLoss(SEM.SemML(;observed = semobserved, n_par = length(start_val))) +loss_ml = SemLoss(SEM.SemML(;observed = semobserved, n_par = length(start))) diff = SemDiffOptim( @@ -58,7 +58,7 @@ diff = x_tol = 1.5e-8)) model_ml = Sem(semobserved, imply_ml, loss_ml, diff) -model_ml(true_val, true, false, false) +objective!(model_ml, true_val) solution_ml = sem_fit(model_ml) @test isapprox(true_val, solution(solution_ml); atol = .05) \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 4552c87a4..8f64152bd 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,5 @@ using Test, SafeTestsets +@test ENV["JULIA_EXTENDED_TESTS"] == "true" @time @safetestset "Unit Tests" begin include("unit_tests/unit_tests.jl") end @time @safetestset "Example Models" begin include("examples/examples.jl") end \ No newline at end of file