diff --git a/Make.inc b/Make.inc
index 8b7f9c1c4ac32..dfc2594c406eb 100644
--- a/Make.inc
+++ b/Make.inc
@@ -452,8 +452,8 @@ JULIACODEGEN := LLVM
 ifeq ($(FORCE_ASSERTIONS), 1)
 # C++ code needs to include LLVM header with the same assertion flag as LLVM
 # Use this flag to re-enable assertion in our code after all the LLVM headers are included
-CXX_DISABLE_ASSERTION :=
-DISABLE_ASSERTIONS :=
+CXX_DISABLE_ASSERTION := -DJL_VERIFY_PASSES
+DISABLE_ASSERTIONS := -DJL_VERIFY_PASSES
 else
 CXX_DISABLE_ASSERTION := -DJL_NDEBUG
 DISABLE_ASSERTIONS := -DNDEBUG -DJL_NDEBUG
diff --git a/NEWS.md b/NEWS.md
index 92790c54e5b35..db30021099233 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -58,6 +58,10 @@ Multi-threading changes
   An interactive task desires low latency and implicitly agrees to be short duration or to
   yield frequently. Interactive tasks will run on interactive threads, if any are specified
   when Julia is started ([#42302]).
+* Threads started outside the Julia runtime (e.g. from C or Java) can now become able to
+  call into Julia code by calling `jl_adopt_thread`. This is done automatically when
+  entering Julia code via `cfunction` or a `@ccallable` entry point. As a consequence, the
+  number of threads can now change during execution ([#46609]).
 
 Build system changes
 --------------------
diff --git a/README.md b/README.md
index aaf05e6d01237..a8716de05699a 100644
--- a/README.md
+++ b/README.md
@@ -92,7 +92,7 @@ and then use the command prompt to change into the resulting julia directory. By
 Julia. However, most users should use the [most recent stable version](https://github.com/JuliaLang/julia/releases)
 of Julia. You can get this version by running:
 
-    git checkout v1.8.0
+    git checkout v1.8.2
 
 To build the `julia` executable, run `make` from within the julia directory.
 
diff --git a/base/Base.jl b/base/Base.jl
index 63728fdba3e4e..29a6f9ed4366d 100644
--- a/base/Base.jl
+++ b/base/Base.jl
@@ -53,11 +53,11 @@ setproperty!(x::Tuple, f::Int, v, order::Symbol) = setfield!(x, f, v, order) # t
 getproperty(x, f::Symbol, order::Symbol) = (@inline; getfield(x, f, order))
 setproperty!(x, f::Symbol, v, order::Symbol) = (@inline; setfield!(x, f, convert(fieldtype(typeof(x), f), v), order))
 
-swapproperty!(x, f::Symbol, v, order::Symbol=:notatomic) =
+swapproperty!(x, f::Symbol, v, order::Symbol=:not_atomic) =
     (@inline; Core.swapfield!(x, f, convert(fieldtype(typeof(x), f), v), order))
-modifyproperty!(x, f::Symbol, op, v, order::Symbol=:notatomic) =
+modifyproperty!(x, f::Symbol, op, v, order::Symbol=:not_atomic) =
     (@inline; Core.modifyfield!(x, f, op, v, order))
-replaceproperty!(x, f::Symbol, expected, desired, success_order::Symbol=:notatomic, fail_order::Symbol=success_order) =
+replaceproperty!(x, f::Symbol, expected, desired, success_order::Symbol=:not_atomic, fail_order::Symbol=success_order) =
     (@inline; Core.replacefield!(x, f, expected, convert(fieldtype(typeof(x), f), desired), success_order, fail_order))
 
 convert(::Type{Any}, Core.@nospecialize x) = x
@@ -103,6 +103,17 @@ include("generator.jl")
 include("reflection.jl")
 include("options.jl")
 
+# define invoke(f, T, args...; kwargs...), without kwargs wrapping
+# to forward to invoke
+function Core.kwcall(kwargs, ::typeof(invoke), f, T, args...)
+    @inline
+    # prepend kwargs and f to the invoked from the user
+    T = rewrap_unionall(Tuple{Any, Core.Typeof(f), (unwrap_unionall(T)::DataType).parameters...}, T)
+    return invoke(Core.kwcall, T, kwargs, f, args...)
+end
+# invoke does not have its own call cache, but kwcall for invoke does
+typeof(invoke).name.mt.max_args = 3 # invoke, f, T, args...
+
 # core operations & types
 include("promotion.jl")
 include("tuple.jl")
diff --git a/base/abstractarray.jl b/base/abstractarray.jl
index 28e6ca8f0cdbd..b42aee1f3a36a 100644
--- a/base/abstractarray.jl
+++ b/base/abstractarray.jl
@@ -138,13 +138,25 @@ axes1(iter) = oneto(length(iter))
 
 Return an efficient array describing all valid indices for `a` arranged in the shape of `a` itself.
 
-They keys of 1-dimensional arrays (vectors) are integers, whereas all other N-dimensional
+The keys of 1-dimensional arrays (vectors) are integers, whereas all other N-dimensional
 arrays use [`CartesianIndex`](@ref) to describe their locations.  Often the special array
 types [`LinearIndices`](@ref) and [`CartesianIndices`](@ref) are used to efficiently
 represent these arrays of integers and `CartesianIndex`es, respectively.
 
 Note that the `keys` of an array might not be the most efficient index type; for maximum
 performance use  [`eachindex`](@ref) instead.
+
+# Examples
+```jldoctest
+julia> keys([4, 5, 6])
+3-element LinearIndices{1, Tuple{Base.OneTo{Int64}}}:
+ 1
+ 2
+ 3
+
+julia> keys([4 5; 6 7])
+CartesianIndices((2, 2))
+```
 """
 keys(a::AbstractArray) = CartesianIndices(axes(a))
 keys(a::AbstractVector) = LinearIndices(a)
@@ -154,7 +166,7 @@ keys(a::AbstractVector) = LinearIndices(a)
     keytype(A::AbstractArray)
 
 Return the key type of an array. This is equal to the
-`eltype` of the result of `keys(...)`, and is provided
+[`eltype`](@ref) of the result of `keys(...)`, and is provided
 mainly for compatibility with the dictionary interface.
 
 # Examples
@@ -180,7 +192,7 @@ valtype(a::AbstractArray) = valtype(typeof(a))
     valtype(T::Type{<:AbstractArray})
     valtype(A::AbstractArray)
 
-Return the value type of an array. This is identical to `eltype` and is
+Return the value type of an array. This is identical to [`eltype`](@ref) and is
 provided mainly for compatibility with the dictionary interface.
 
 # Examples
@@ -226,7 +238,7 @@ eltype(::Type{<:AbstractArray{E}}) where {E} = @isdefined(E) ? E : Any
 """
     elsize(type)
 
-Compute the memory stride in bytes between consecutive elements of `eltype`
+Compute the memory stride in bytes between consecutive elements of [`eltype`](@ref)
 stored inside the given `type`, if the array elements are stored densely with a
 uniform linear stride.
 
@@ -2870,7 +2882,7 @@ end
 """
     isless(A::AbstractVector, B::AbstractVector)
 
-Returns true when `A` is less than `B` in lexicographic order.
+Return `true` when `A` is less than `B` in lexicographic order.
 """
 isless(A::AbstractVector, B::AbstractVector) = cmp(A, B) < 0
 
diff --git a/base/array.jl b/base/array.jl
index c27b2f6c7d524..64d0ac05fd507 100644
--- a/base/array.jl
+++ b/base/array.jl
@@ -36,7 +36,7 @@ const AbstractMatrix{T} = AbstractArray{T,2}
 Union type of [`AbstractVector{T}`](@ref) and [`AbstractMatrix{T}`](@ref).
 """
 const AbstractVecOrMat{T} = Union{AbstractVector{T}, AbstractMatrix{T}}
-const RangeIndex = Union{Int, AbstractRange{Int}, AbstractUnitRange{Int}}
+const RangeIndex = Union{<:BitInteger, AbstractRange{<:BitInteger}}
 const DimOrInd = Union{Integer, AbstractUnitRange}
 const IntOrInd = Union{Int, AbstractUnitRange}
 const DimsOrInds{N} = NTuple{N,DimOrInd}
@@ -152,7 +152,7 @@ size(a::Array{<:Any,N}) where {N} = (@inline; ntuple(M -> size(a, M), Val(N))::D
 
 asize_from(a::Array, n) = n > ndims(a) ? () : (arraysize(a,n), asize_from(a, n+1)...)
 
-allocatedinline(T::Type) = (@_total_meta; ccall(:jl_stored_inline, Cint, (Any,), T) != Cint(0))
+allocatedinline(@nospecialize T::Type) = (@_total_meta; ccall(:jl_stored_inline, Cint, (Any,), T) != Cint(0))
 
 """
     Base.isbitsunion(::Type{T})
@@ -951,6 +951,18 @@ end
 
 Store the given value at the given key or index within a collection. The syntax `a[i,j,...] =
 x` is converted by the compiler to `(setindex!(a, x, i, j, ...); x)`.
+
+# Examples
+```jldoctest
+julia> a = Dict("a"=>1)
+Dict{String, Int64} with 1 entry:
+  "a" => 1
+
+julia> setindex!(a, 2, "b")
+Dict{String, Int64} with 2 entries:
+  "b" => 2
+  "a" => 1
+```
 """
 function setindex! end
 
@@ -1247,7 +1259,12 @@ end
 """
     sizehint!(s, n) -> s
 
-Suggest that collection `s` reserve capacity for at least `n` elements. This can improve performance.
+Suggest that collection `s` reserve capacity for at least `n` elements. That is, if
+you expect that you're going to have to push a lot of values onto `s`, you can avoid
+the cost of incremental reallocation by doing it once up front; this can improve
+performance.
+
+See also [`resize!`](@ref).
 
 # Notes on the performance model
 
@@ -2653,8 +2670,8 @@ julia> map(filter(iseven), [1:3, 2:4, 3:5])
  [2, 4]
  [4]
 ```
-!!! compat "Julia 1.8"
-    This method requires at least Julia 1.8.
+!!! compat "Julia 1.9"
+    This method requires at least Julia 1.9.
 """
 function filter(f)
     Fix1(filter, f)
diff --git a/base/asyncevent.jl b/base/asyncevent.jl
index d3938bd66c842..183f38613a50f 100644
--- a/base/asyncevent.jl
+++ b/base/asyncevent.jl
@@ -306,7 +306,7 @@ Waits until `testcb()` returns `true` or `timeout` seconds have passed, whicheve
 The test function is polled every `pollint` seconds. The minimum value for `pollint` is 0.001 seconds,
 that is, 1 millisecond.
 
-Returns :ok or :timed_out
+Return `:ok` or `:timed_out`.
 """
 function timedwait(testcb, timeout::Real; pollint::Real=0.1)
     pollint >= 1e-3 || throw(ArgumentError("pollint must be ≥ 1 millisecond"))
diff --git a/base/bitarray.jl b/base/bitarray.jl
index 841509a90ba44..4662c4950b077 100644
--- a/base/bitarray.jl
+++ b/base/bitarray.jl
@@ -1779,26 +1779,42 @@ end
 # map across the chunks. Otherwise, fall-back to the AbstractArray method that
 # iterates bit-by-bit.
 function bit_map!(f::F, dest::BitArray, A::BitArray) where F
-    size(A) == size(dest) || throw(DimensionMismatch("sizes of dest and A must match"))
+    length(A) <= length(dest) || throw(DimensionMismatch("length of destination must be >= length of collection"))
     isempty(A) && return dest
     destc = dest.chunks
     Ac = A.chunks
-    for i = 1:(length(Ac)-1)
+    len_Ac = length(Ac)
+    for i = 1:(len_Ac-1)
         destc[i] = f(Ac[i])
     end
-    destc[end] = f(Ac[end]) & _msk_end(A)
+    # the last effected UInt64's original content
+    dest_last = destc[len_Ac]
+    _msk = _msk_end(A)
+    # first zero out the bits mask is going to change
+    destc[len_Ac] = (dest_last & (~_msk))
+    # then update bits by `or`ing with a masked RHS
+    destc[len_Ac] |= f(Ac[len_Ac]) & _msk
     dest
 end
 function bit_map!(f::F, dest::BitArray, A::BitArray, B::BitArray) where F
-    size(A) == size(B) == size(dest) || throw(DimensionMismatch("sizes of dest, A, and B must all match"))
+    min_bitlen = min(length(A), length(B))
+    min_bitlen <= length(dest) || throw(DimensionMismatch("length of destination must be >= length of smallest input collection"))
     isempty(A) && return dest
+    isempty(B) && return dest
     destc = dest.chunks
     Ac = A.chunks
     Bc = B.chunks
-    for i = 1:(length(Ac)-1)
+    len_Ac = min(length(Ac), length(Bc))
+    for i = 1:len_Ac-1
         destc[i] = f(Ac[i], Bc[i])
     end
-    destc[end] = f(Ac[end], Bc[end]) & _msk_end(A)
+    # the last effected UInt64's original content
+    dest_last = destc[len_Ac]
+    _msk = _msk_end(min_bitlen)
+    # first zero out the bits mask is going to change
+    destc[len_Ac] = (dest_last & ~(_msk))
+    # then update bits by `or`ing with a masked RHS
+    destc[len_Ac] |= f(Ac[end], Bc[end]) & _msk
     dest
 end
 
diff --git a/base/boot.jl b/base/boot.jl
index 5f3b99df1c716..80ef23cd0fd78 100644
--- a/base/boot.jl
+++ b/base/boot.jl
@@ -369,9 +369,11 @@ include(m::Module, fname::String) = ccall(:jl_load_, Any, (Any, Any), m, fname)
 
 eval(m::Module, @nospecialize(e)) = ccall(:jl_toplevel_eval_in, Any, (Any, Any), m, e)
 
-kwfunc(@nospecialize(f)) = ccall(:jl_get_keyword_sorter, Any, (Any,), f)
-
-kwftype(@nospecialize(t)) = typeof(ccall(:jl_get_kwsorter, Any, (Any,), t))
+# dispatch token indicating a kwarg (keyword sorter) call
+function kwcall end
+# deprecated internal functions:
+kwfunc(@nospecialize(f)) = kwcall
+kwftype(@nospecialize(t)) = typeof(kwcall)
 
 mutable struct Box
     contents::Any
@@ -615,7 +617,8 @@ end
 
 NamedTuple() = NamedTuple{(),Tuple{}}(())
 
-NamedTuple{names}(args::Tuple) where {names} = NamedTuple{names,typeof(args)}(args)
+eval(Core, :(NamedTuple{names}(args::Tuple) where {names} =
+             $(Expr(:splatnew, :(NamedTuple{names,typeof(args)}), :args))))
 
 using .Intrinsics: sle_int, add_int
 
diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl
index c5be04ad69f42..b8d654d3f9a00 100644
--- a/base/compiler/abstractinterpretation.jl
+++ b/base/compiler/abstractinterpretation.jl
@@ -73,7 +73,7 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
         # which is all that's required for :consistent-cy. Of course, we don't
         # know anything else about this statement.
         effects = Effects(; consistent=ALWAYS_TRUE, nonoverlayed)
-        return CallMeta(Any, effects, false)
+        return CallMeta(Any, effects, NoCallInfo())
     end
 
     argtypes = arginfo.argtypes
@@ -81,7 +81,7 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
         InferenceParams(interp).MAX_UNION_SPLITTING, max_methods)
     if isa(matches, FailedMethodMatch)
         add_remark!(interp, sv, matches.reason)
-        return CallMeta(Any, Effects(), false)
+        return CallMeta(Any, Effects(), NoCallInfo())
     end
 
     (; valid_worlds, applicable, info) = matches
@@ -129,8 +129,8 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
                 (; rt, edge, effects) = result
                 this_argtypes = isa(matches, MethodMatches) ? argtypes : matches.applicable_argtypes[i]
                 this_arginfo = ArgInfo(fargs, this_argtypes)
-                const_call_result = abstract_call_method_with_const_args(interp, result,
-                    f, this_arginfo, si, match, sv)
+                const_call_result = abstract_call_method_with_const_args(interp,
+                    result, f, this_arginfo, si, match, sv)
                 const_result = nothing
                 if const_call_result !== nothing
                     if const_call_result.rt ⊑ᵢ rt
@@ -158,8 +158,8 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
             # this is in preparation for inlining, or improving the return result
             this_argtypes = isa(matches, MethodMatches) ? argtypes : matches.applicable_argtypes[i]
             this_arginfo = ArgInfo(fargs, this_argtypes)
-            const_call_result = abstract_call_method_with_const_args(interp, result,
-                f, this_arginfo, si, match, sv)
+            const_call_result = abstract_call_method_with_const_args(interp,
+                result, f, this_arginfo, si, match, sv)
             const_result = nothing
             if const_call_result !== nothing
                 this_const_conditional = ignorelimited(const_call_result.rt)
@@ -220,7 +220,7 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
         method = match.method
         sig = match.spec_types
         mi = specialize_method(match; preexisting=true)
-        if mi !== nothing && !const_prop_methodinstance_heuristic(interp, match, mi::MethodInstance, arginfo, sv)
+        if mi !== nothing && !const_prop_methodinstance_heuristic(interp, match, mi, arginfo, sv)
             csig = get_compileable_sig(method, sig, match.sparams)
             if csig !== nothing && csig !== sig
                 abstract_call_method(interp, method, csig, match.sparams, multiple_matches, StmtInfo(false), sv)
@@ -841,13 +841,7 @@ function concrete_eval_call(interp::AbstractInterpreter,
             # The evaluation threw. By :consistent-cy, we're guaranteed this would have happened at runtime
             return ConstCallResults(Union{}, ConcreteResult(edge, result.effects), result.effects, edge)
         end
-        if is_inlineable_constant(value) || call_result_unused(si)
-            # If the constant is not inlineable, still do the const-prop, since the
-            # code that led to the creation of the Const may be inlineable in the same
-            # circumstance and may be optimizable.
-            return ConstCallResults(Const(value), ConcreteResult(edge, EFFECTS_TOTAL, value), EFFECTS_TOTAL, edge)
-        end
-        return false
+        return ConstCallResults(Const(value), ConcreteResult(edge, EFFECTS_TOTAL, value), EFFECTS_TOTAL, edge)
     else # eligible for semi-concrete evaluation
         return true
     end
@@ -886,6 +880,12 @@ function abstract_call_method_with_const_args(interp::AbstractInterpreter,
     if !const_prop_enabled(interp, sv, match)
         return nothing
     end
+    if is_removable_if_unused(result.effects)
+        if isa(result.rt, Const) || call_result_unused(si)
+            add_remark!(interp, sv, "[constprop] No more information to be gained")
+            return nothing
+        end
+    end
     res = concrete_eval_call(interp, f, result, arginfo, si, sv, invokecall)
     isa(res, ConstCallResults) && return res
     mi = maybe_get_const_prop_profitable(interp, result, f, arginfo, si, match, sv)
@@ -926,21 +926,30 @@ function abstract_call_method_with_const_args(interp::AbstractInterpreter,
             return nothing
         end
         frame = InferenceState(inf_result, #=cache=#:local, interp)
-        frame === nothing && return nothing # this is probably a bad generated function (unsound), but just ignore it
+        if frame === nothing
+            add_remark!(interp, sv, "[constprop] Could not retrieve the source")
+            return nothing # this is probably a bad generated function (unsound), but just ignore it
+        end
         frame.parent = sv
-        typeinf(interp, frame) || return nothing
+        if !typeinf(interp, frame)
+            add_remark!(interp, sv, "[constprop] Fresh constant inference hit a cycle")
+            return nothing
+        end
+        @assert !isa(inf_result.result, InferenceState)
+    else
+        if isa(inf_result.result, InferenceState)
+            add_remark!(interp, sv, "[constprop] Found cached constant inference in a cycle")
+            return nothing
+        end
     end
-    result = inf_result.result
-    # if constant inference hits a cycle, just bail out
-    isa(result, InferenceState) && return nothing
-    return ConstCallResults(result, ConstPropResult(inf_result), inf_result.ipo_effects, mi)
+    return ConstCallResults(inf_result.result, ConstPropResult(inf_result), inf_result.ipo_effects, mi)
 end
 
 # if there's a possibility we could get a better result with these constant arguments
 # (hopefully without doing too much work), returns `MethodInstance`, or nothing otherwise
-function maybe_get_const_prop_profitable(interp::AbstractInterpreter, result::MethodCallResult,
-                                         @nospecialize(f), arginfo::ArgInfo, si::StmtInfo, match::MethodMatch,
-                                         sv::InferenceState)
+function maybe_get_const_prop_profitable(interp::AbstractInterpreter,
+    result::MethodCallResult, @nospecialize(f), arginfo::ArgInfo, si::StmtInfo,
+    match::MethodMatch, sv::InferenceState)
     method = match.method
     force = force_const_prop(interp, f, method)
     force || const_prop_entry_heuristic(interp, result, si, sv) || return nothing
@@ -1127,9 +1136,8 @@ end
 # This is a heuristic to avoid trying to const prop through complicated functions
 # where we would spend a lot of time, but are probably unlikely to get an improved
 # result anyway.
-function const_prop_methodinstance_heuristic(
-    interp::AbstractInterpreter, match::MethodMatch, mi::MethodInstance,
-    (; argtypes)::ArgInfo, sv::InferenceState)
+function const_prop_methodinstance_heuristic(interp::AbstractInterpreter,
+    match::MethodMatch, mi::MethodInstance, arginfo::ArgInfo, sv::InferenceState)
     method = match.method
     if method.is_for_opaque_closure
         # Not inlining an opaque closure can be very expensive, so be generous
@@ -1162,7 +1170,8 @@ function const_prop_methodinstance_heuristic(
                 else
                     inferred = code.inferred
                 end
-                if inlining_policy(interp, inferred, IR_FLAG_NULL, mi, argtypes) !== nothing
+                # TODO propagate a specific `CallInfo` that conveys information about this call
+                if inlining_policy(interp, inferred, NoCallInfo(), IR_FLAG_NULL, mi, arginfo.argtypes) !== nothing
                     return true
                 end
             end
@@ -1243,7 +1252,7 @@ function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft)
 
     tti0 = widenconst(typ)
     tti = unwrap_unionall(tti0)
-    if isa(tti, DataType) && tti.name === NamedTuple_typename
+    if isa(tti, DataType) && tti.name === _NAMEDTUPLE_NAME
         # A NamedTuple iteration is the same as the iteration of its Tuple parameter:
         # compute a new `tti == unwrap_unionall(tti0)` based on that Tuple type
         tti = unwraptv(tti.parameters[2])
@@ -1326,13 +1335,13 @@ function abstract_iteration(interp::AbstractInterpreter, @nospecialize(itft), @n
         if !isa(stateordonet_widened, DataType) || !(stateordonet_widened <: Tuple) || isvatuple(stateordonet_widened) || length(stateordonet_widened.parameters) != 2
             break
         end
-        nstatetype = getfield_tfunc(stateordonet, Const(2))
+        nstatetype = getfield_tfunc(typeinf_lattice(interp), stateordonet, Const(2))
         # If there's no new information in this statetype, don't bother continuing,
         # the iterator won't be finite.
         if ⊑(typeinf_lattice(interp), nstatetype, statetype)
             return Any[Bottom], nothing
         end
-        valtype = getfield_tfunc(stateordonet, Const(1))
+        valtype = getfield_tfunc(typeinf_lattice(interp), stateordonet, Const(1))
         push!(ret, valtype)
         statetype = nstatetype
         call = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), StmtInfo(true), sv)
@@ -1385,7 +1394,7 @@ function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, si::
                         max_methods::Int = get_max_methods(sv.mod, interp))
     itft = argtype_by_index(argtypes, 2)
     aft = argtype_by_index(argtypes, 3)
-    (itft === Bottom || aft === Bottom) && return CallMeta(Bottom, EFFECTS_THROWS, false)
+    (itft === Bottom || aft === Bottom) && return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo())
     aargtypes = argtype_tail(argtypes, 4)
     aftw = widenconst(aft)
     if !isa(aft, Const) && !isa(aft, PartialOpaque) && (!isType(aftw) || has_free_typevars(aftw))
@@ -1393,7 +1402,7 @@ function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, si::
             add_remark!(interp, sv, "Core._apply_iterate called on a function of a non-concrete type")
             # bail now, since it seems unlikely that abstract_call will be able to do any better after splitting
             # this also ensures we don't call abstract_call_gf_by_type below on an IntrinsicFunction or Builtin
-            return CallMeta(Any, Effects(), false)
+            return CallMeta(Any, Effects(), NoCallInfo())
         end
     end
     res = Union{}
@@ -1468,7 +1477,7 @@ function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, si::
         if bail_out_apply(interp, res, sv)
             if i != length(ctypes)
                 # No point carrying forward the info, we're not gonna inline it anyway
-                retinfo = false
+                retinfo = NoCallInfo()
             end
             break
         end
@@ -1668,21 +1677,21 @@ end
 function abstract_invoke(interp::AbstractInterpreter, (; fargs, argtypes)::ArgInfo, si::StmtInfo, sv::InferenceState)
     ft′ = argtype_by_index(argtypes, 2)
     ft = widenconst(ft′)
-    ft === Bottom && return CallMeta(Bottom, EFFECTS_THROWS, false)
+    ft === Bottom && return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo())
     (types, isexact, isconcrete, istype) = instanceof_tfunc(argtype_by_index(argtypes, 3))
-    types === Bottom && return CallMeta(Bottom, EFFECTS_THROWS, false)
-    isexact || return CallMeta(Any, Effects(), false)
+    types === Bottom && return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo())
+    isexact || return CallMeta(Any, Effects(), NoCallInfo())
     argtype = argtypes_to_type(argtype_tail(argtypes, 4))
     nargtype = typeintersect(types, argtype)
-    nargtype === Bottom && return CallMeta(Bottom, EFFECTS_THROWS, false)
-    nargtype isa DataType || return CallMeta(Any, Effects(), false) # other cases are not implemented below
-    isdispatchelem(ft) || return CallMeta(Any, Effects(), false) # check that we might not have a subtype of `ft` at runtime, before doing supertype lookup below
+    nargtype === Bottom && return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo())
+    nargtype isa DataType || return CallMeta(Any, Effects(), NoCallInfo()) # other cases are not implemented below
+    isdispatchelem(ft) || return CallMeta(Any, Effects(), NoCallInfo()) # check that we might not have a subtype of `ft` at runtime, before doing supertype lookup below
     ft = ft::DataType
     lookupsig = rewrap_unionall(Tuple{ft, unwrap_unionall(types).parameters...}, types)::Type
     nargtype = Tuple{ft, nargtype.parameters...}
     argtype = Tuple{ft, argtype.parameters...}
     match, valid_worlds, overlayed = findsup(lookupsig, method_table(interp))
-    match === nothing && return CallMeta(Any, Effects(), false)
+    match === nothing && return CallMeta(Any, Effects(), NoCallInfo())
     update_valid_age!(sv, valid_worlds)
     method = match.method
     tienv = ccall(:jl_type_intersection_with_env, Any, (Any, Any), nargtype, method.sig)::SimpleVector
@@ -1700,19 +1709,22 @@ function abstract_invoke(interp::AbstractInterpreter, (; fargs, argtypes)::ArgIn
     #     t, a = ti.parameters[i], argtypes′[i]
     #     argtypes′[i] = t ⊑ a ? t : a
     # end
+    𝕃ₚ = ipo_lattice(interp)
     f = overlayed ? nothing : singleton_type(ft′)
     invokecall = InvokeCall(types, lookupsig)
     const_call_result = abstract_call_method_with_const_args(interp,
         result, f, arginfo, si, match, sv, invokecall)
     const_result = nothing
     if const_call_result !== nothing
-        if ⊑(typeinf_lattice(interp), const_call_result.rt, rt)
+        if ⊑(𝕃ₚ, const_call_result.rt, rt)
             (; rt, effects, const_result, edge) = const_call_result
         end
     end
+    rt = from_interprocedural!(𝕃ₚ, rt, sv, arginfo, sig)
     effects = Effects(effects; nonoverlayed=!overlayed)
+    info = InvokeCallInfo(match, const_result)
     edge !== nothing && add_invoke_backedge!(sv, lookupsig, edge)
-    return CallMeta(from_interprocedural!(ipo_lattice(interp), rt, sv, arginfo, sig), effects, InvokeCallInfo(match, const_result))
+    return CallMeta(rt, effects, info)
 end
 
 function invoke_rewrite(xs::Vector{Any})
@@ -1728,7 +1740,7 @@ function abstract_finalizer(interp::AbstractInterpreter, argtypes::Vector{Any},
         call = abstract_call(interp, ArgInfo(nothing, finalizer_argvec), StmtInfo(false), sv, 1)
         return CallMeta(Nothing, Effects(), FinalizerInfo(call.info, call.effects))
     end
-    return CallMeta(Nothing, Effects(), false)
+    return CallMeta(Nothing, Effects(), NoCallInfo())
 end
 
 # call where the function is known exactly
@@ -1750,25 +1762,14 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
         end
         rt = abstract_call_builtin(interp, f, arginfo, sv, max_methods)
         effects = builtin_effects(typeinf_lattice(interp), f, argtypes[2:end], rt)
-        return CallMeta(rt, effects, false)
+        return CallMeta(rt, effects, NoCallInfo())
     elseif isa(f, Core.OpaqueClosure)
         # calling an OpaqueClosure about which we have no information returns no information
-        return CallMeta(Any, Effects(), false)
-    elseif f === Core.kwfunc
-        if la == 2
-            aty = argtypes[2]
-            if !isvarargtype(aty)
-                ft = widenconst(aty)
-                if isa(ft, DataType) && isdefined(ft.name, :mt) && isdefined(ft.name.mt, :kwsorter)
-                    return CallMeta(Const(ft.name.mt.kwsorter), EFFECTS_TOTAL, MethodResultPure())
-                end
-            end
-        end
-        return CallMeta(Any, EFFECTS_UNKNOWN, false)
+        return CallMeta(Any, Effects(), NoCallInfo())
     elseif f === TypeVar
         # Manually look through the definition of TypeVar to
         # make sure to be able to get `PartialTypeVar`s out.
-        (la < 2 || la > 4) && return CallMeta(Union{}, EFFECTS_UNKNOWN, false)
+        (la < 2 || la > 4) && return CallMeta(Union{}, EFFECTS_UNKNOWN, NoCallInfo())
         n = argtypes[2]
         ub_var = Const(Any)
         lb_var = Const(Union{})
@@ -1778,14 +1779,14 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
         elseif la == 3
             ub_var = argtypes[3]
         end
-        return CallMeta(typevar_tfunc(n, lb_var, ub_var), EFFECTS_UNKNOWN, false)
+        return CallMeta(typevar_tfunc(n, lb_var, ub_var), EFFECTS_UNKNOWN, NoCallInfo())
     elseif f === UnionAll
-        return CallMeta(abstract_call_unionall(argtypes), EFFECTS_UNKNOWN, false)
+        return CallMeta(abstract_call_unionall(argtypes), EFFECTS_UNKNOWN, NoCallInfo())
     elseif f === Tuple && la == 2
         aty = argtypes[2]
         ty = isvarargtype(aty) ? unwrapva(aty) : widenconst(aty)
         if !isconcretetype(ty)
-            return CallMeta(Tuple, EFFECTS_UNKNOWN, false)
+            return CallMeta(Tuple, EFFECTS_UNKNOWN, NoCallInfo())
         end
     elseif is_return_type(f)
         return return_type_tfunc(interp, argtypes, si, sv)
@@ -1800,11 +1801,11 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
         # mark !== as exactly a negated call to ===
         rty = abstract_call_known(interp, (===), arginfo, si, sv, max_methods).rt
         if isa(rty, Conditional)
-            return CallMeta(Conditional(rty.slot, rty.elsetype, rty.thentype), EFFECTS_TOTAL, false) # swap if-else
+            return CallMeta(Conditional(rty.slot, rty.elsetype, rty.thentype), EFFECTS_TOTAL, NoCallInfo()) # swap if-else
         elseif isa(rty, Const)
             return CallMeta(Const(rty.val === false), EFFECTS_TOTAL, MethodResultPure())
         end
-        return CallMeta(rty, EFFECTS_TOTAL, false)
+        return CallMeta(rty, EFFECTS_TOTAL, NoCallInfo())
     elseif la == 3 && istopfunction(f, :(>:))
         # mark issupertype as a exact alias for issubtype
         # swap T1 and T2 arguments and call <:
@@ -1814,7 +1815,7 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
             fargs = nothing
         end
         argtypes = Any[typeof(<:), argtypes[3], argtypes[2]]
-        return CallMeta(abstract_call_known(interp, <:, ArgInfo(fargs, argtypes), si, sv, max_methods).rt, EFFECTS_TOTAL, false)
+        return CallMeta(abstract_call_known(interp, <:, ArgInfo(fargs, argtypes), si, sv, max_methods).rt, EFFECTS_TOTAL, NoCallInfo())
     elseif la == 2 &&
            (a2 = argtypes[2]; isa(a2, Const)) && (svecval = a2.val; isa(svecval, SimpleVector)) &&
            istopfunction(f, :length)
@@ -1848,28 +1849,28 @@ function abstract_call_opaque_closure(interp::AbstractInterpreter,
     tt = closure.typ
     sigT = (unwrap_unionall(tt)::DataType).parameters[1]
     match = MethodMatch(sig, Core.svec(), closure.source, sig <: rewrap_unionall(sigT, tt))
+    𝕃ₚ = ipo_lattice(interp)
+    ⊑ₚ = ⊑(𝕃ₚ)
     const_result = nothing
     if !result.edgecycle
         const_call_result = abstract_call_method_with_const_args(interp, result,
             nothing, arginfo, si, match, sv)
         if const_call_result !== nothing
-            if const_call_result.rt ⊑ rt
+            if const_call_result.rt ⊑ₚ rt
                 (; rt, effects, const_result, edge) = const_call_result
             end
         end
     end
-    info = OpaqueClosureCallInfo(match, const_result)
-    ipo = ipo_lattice(interp)
-    ⊑ₚ = ⊑(ipo)
     if check # analyze implicit type asserts on argument and return type
         ftt = closure.typ
         (aty, rty) = (unwrap_unionall(ftt)::DataType).parameters
         rty = rewrap_unionall(rty isa TypeVar ? rty.lb : rty, ftt)
-        if !(rt ⊑ₚ rty && tuple_tfunc(ipo, arginfo.argtypes[2:end]) ⊑ₚ rewrap_unionall(aty, ftt))
+        if !(rt ⊑ₚ rty && tuple_tfunc(𝕃ₚ, arginfo.argtypes[2:end]) ⊑ₚ rewrap_unionall(aty, ftt))
             effects = Effects(effects; nothrow=false)
         end
     end
-    rt = from_interprocedural!(ipo, rt, sv, arginfo, match.spec_types)
+    rt = from_interprocedural!(𝕃ₚ, rt, sv, arginfo, match.spec_types)
+    info = OpaqueClosureCallInfo(match, const_result)
     edge !== nothing && add_backedge!(sv, edge)
     return CallMeta(rt, effects, info)
 end
@@ -1896,13 +1897,13 @@ function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, si::StmtIn
         return abstract_call_opaque_closure(interp,
             ft, ArgInfo(arginfo.fargs, newargtypes), si, sv, #=check=#true)
     elseif (uft = unwrap_unionall(widenconst(ft)); isa(uft, DataType) && uft.name === typename(Core.OpaqueClosure))
-        return CallMeta(rewrap_unionall((uft::DataType).parameters[2], widenconst(ft)), Effects(), false)
+        return CallMeta(rewrap_unionall((uft::DataType).parameters[2], widenconst(ft)), Effects(), NoCallInfo())
     elseif f === nothing
         # non-constant function, but the number of arguments is known
         # and the ft is not a Builtin or IntrinsicFunction
         if hasintersect(widenconst(ft), Union{Builtin, Core.OpaqueClosure})
             add_remark!(interp, sv, "Could not identify method table for call")
-            return CallMeta(Any, Effects(), false)
+            return CallMeta(Any, Effects(), NoCallInfo())
         end
         max_methods = max_methods === nothing ? get_max_methods(sv.mod, interp) : max_methods
         return abstract_call_gf_by_type(interp, nothing, arginfo, si, argtypes_to_type(argtypes), sv, max_methods)
@@ -1983,7 +1984,11 @@ function abstract_eval_special_value(interp::AbstractInterpreter, @nospecialize(
     elseif isa(e, SSAValue)
         return abstract_eval_ssavalue(e, sv)
     elseif isa(e, SlotNumber)
-        return vtypes[slot_id(e)].typ
+        vtyp = vtypes[slot_id(e)]
+        if vtyp.undef
+            merge_effects!(interp, sv, Effects(EFFECTS_TOTAL; nothrow=false))
+        end
+        return vtyp.typ
     elseif isa(e, Argument)
         if !isa(vtypes, Nothing)
             return vtypes[slot_id(e)].typ
@@ -2109,16 +2114,16 @@ function abstract_eval_statement_expr(interp::AbstractInterpreter, e::Expr, vtyp
     elseif ehead === :splatnew
         t, isexact = instanceof_tfunc(abstract_eval_value(interp, e.args[1], vtypes, sv))
         nothrow = false # TODO: More precision
-        if length(e.args) == 2 && isconcretetype(t) && !ismutabletype(t)
+        if length(e.args) == 2 && isconcretedispatch(t) && !ismutabletype(t)
             at = abstract_eval_value(interp, e.args[2], vtypes, sv)
             n = fieldcount(t)
             if isa(at, Const) && isa(at.val, Tuple) && n == length(at.val::Tuple) &&
-                let t = t, at = at; _all(i->getfield(at.val::Tuple, i) isa fieldtype(t, i), 1:n); end
-                nothrow = isexact && isconcretedispatch(t)
+                let t = t, at = at; all(i::Int->getfield(at.val::Tuple, i) isa fieldtype(t, i), 1:n); end
+                nothrow = isexact
                 t = Const(ccall(:jl_new_structt, Any, (Any, Any), t, at.val))
             elseif isa(at, PartialStruct) && at ⊑ᵢ Tuple && n == length(at.fields::Vector{Any}) &&
-                let t = t, at = at; _all(i->(at.fields::Vector{Any})[i] ⊑ᵢ fieldtype(t, i), 1:n); end
-                nothrow = isexact && isconcretedispatch(t)
+                let t = t, at = at; all(i::Int->(at.fields::Vector{Any})[i] ⊑ᵢ fieldtype(t, i), 1:n); end
+                nothrow = isexact
                 t = PartialStruct(t, at.fields::Vector{Any})
             end
         end
diff --git a/base/compiler/abstractlattice.jl b/base/compiler/abstractlattice.jl
index 83e64cd4a042f..0bb02263493ad 100644
--- a/base/compiler/abstractlattice.jl
+++ b/base/compiler/abstractlattice.jl
@@ -9,7 +9,8 @@ extensions.
 """
 struct JLTypeLattice <: AbstractLattice; end
 widenlattice(::JLTypeLattice) = error("Type lattice is the least-precise lattice available")
-is_valid_lattice(::JLTypeLattice, @nospecialize(elem)) = isa(elem, Type)
+is_valid_lattice(lattice::JLTypeLattice, @nospecialize(elem)) = is_valid_lattice_norec(lattice, elem)
+is_valid_lattice_norec(::JLTypeLattice, @nospecialize(elem)) = isa(elem, Type)
 
 """
     struct ConstsLattice
@@ -18,8 +19,7 @@ A lattice extending `JLTypeLattice` and adjoining `Const` and `PartialTypeVar`.
 """
 struct ConstsLattice <: AbstractLattice; end
 widenlattice(::ConstsLattice) = JLTypeLattice()
-is_valid_lattice(lattice::ConstsLattice, @nospecialize(elem)) =
-    is_valid_lattice(widenlattice(lattice), elem) || isa(elem, Const) || isa(elem, PartialTypeVar)
+is_valid_lattice_norec(lattice::ConstsLattice, @nospecialize(elem)) = isa(elem, Const) || isa(elem, PartialTypeVar)
 
 """
     struct PartialsLattice{L}
@@ -30,9 +30,7 @@ struct PartialsLattice{L <: AbstractLattice} <: AbstractLattice
     parent::L
 end
 widenlattice(L::PartialsLattice) = L.parent
-is_valid_lattice(lattice::PartialsLattice, @nospecialize(elem)) =
-    is_valid_lattice(widenlattice(lattice), elem) ||
-    isa(elem, PartialStruct) || isa(elem, PartialOpaque)
+is_valid_lattice_norec(lattice::PartialsLattice, @nospecialize(elem)) = isa(elem, PartialStruct) || isa(elem, PartialOpaque)
 
 """
     struct ConditionalsLattice{L}
@@ -43,15 +41,13 @@ struct ConditionalsLattice{L <: AbstractLattice} <: AbstractLattice
     parent::L
 end
 widenlattice(L::ConditionalsLattice) = L.parent
-is_valid_lattice(lattice::ConditionalsLattice, @nospecialize(elem)) =
-    is_valid_lattice(widenlattice(lattice), elem) || isa(elem, Conditional)
+is_valid_lattice_norec(lattice::ConditionalsLattice, @nospecialize(elem)) = isa(elem, Conditional)
 
 struct InterConditionalsLattice{L <: AbstractLattice} <: AbstractLattice
     parent::L
 end
 widenlattice(L::InterConditionalsLattice) = L.parent
-is_valid_lattice(lattice::InterConditionalsLattice, @nospecialize(elem)) =
-    is_valid_lattice(widenlattice(lattice), elem) || isa(elem, InterConditional)
+is_valid_lattice_norec(lattice::InterConditionalsLattice, @nospecialize(elem)) = isa(elem, InterConditional)
 
 const AnyConditionalsLattice{L} = Union{ConditionalsLattice{L}, InterConditionalsLattice{L}}
 const BaseInferenceLattice = typeof(ConditionalsLattice(PartialsLattice(ConstsLattice())))
@@ -67,8 +63,7 @@ struct InferenceLattice{L} <: AbstractLattice
     parent::L
 end
 widenlattice(L::InferenceLattice) = L.parent
-is_valid_lattice(lattice::InferenceLattice, @nospecialize(elem)) =
-    is_valid_lattice(widenlattice(lattice), elem) || isa(elem, LimitedAccuracy)
+is_valid_lattice_norec(lattice::InferenceLattice, @nospecialize(elem)) = isa(elem, LimitedAccuracy)
 
 """
     struct OptimizerLattice
@@ -76,10 +71,12 @@ is_valid_lattice(lattice::InferenceLattice, @nospecialize(elem)) =
 The lattice used by the optimizer. Extends
 `BaseInferenceLattice` with `MaybeUndef`.
 """
-struct OptimizerLattice <: AbstractLattice; end
-widenlattice(L::OptimizerLattice) = BaseInferenceLattice.instance
-is_valid_lattice(lattice::OptimizerLattice, @nospecialize(elem)) =
-    is_valid_lattice(widenlattice(lattice), elem) || isa(elem, MaybeUndef)
+struct OptimizerLattice{L} <: AbstractLattice
+    parent::L
+end
+OptimizerLattice() = OptimizerLattice(BaseInferenceLattice.instance)
+widenlattice(L::OptimizerLattice) = L.parent
+is_valid_lattice_norec(lattice::OptimizerLattice, @nospecialize(elem)) = isa(elem, MaybeUndef)
 
 """
     tmeet(lattice, a, b::Type)
@@ -171,3 +168,13 @@ tmerge(@nospecialize(a), @nospecialize(b)) = tmerge(fallback_lattice, a, b)
 ⊏(@nospecialize(a), @nospecialize(b)) = ⊏(fallback_lattice, a, b)
 ⋤(@nospecialize(a), @nospecialize(b)) = ⋤(fallback_lattice, a, b)
 is_lattice_equal(@nospecialize(a), @nospecialize(b)) = is_lattice_equal(fallback_lattice, a, b)
+
+is_valid_lattice(lattice::AbstractLattice, @nospecialize(elem)) = is_valid_lattice_norec(lattice, elem) &&
+    is_valid_lattice(widenlattice(lattice), elem)
+
+# Widenlattice with argument
+widenlattice(::JLTypeLattice, @nospecialize(t)) = widenconst(t)
+function widenlattice(lattice::AbstractLattice, @nospecialize(t))
+    is_valid_lattice_norec(lattice, t) && return t
+    widenlattice(widenlattice(lattice), t)
+end
diff --git a/base/compiler/compiler.jl b/base/compiler/compiler.jl
index 3c41c353e86ad..db14dbb07f6e9 100644
--- a/base/compiler/compiler.jl
+++ b/base/compiler/compiler.jl
@@ -121,12 +121,9 @@ import Core.Compiler.CoreDocs
 Core.atdoc!(CoreDocs.docm)
 
 # sorting
-function sort! end
-function issorted end
 include("ordering.jl")
 using .Order
-include("sort.jl")
-using .Sort
+include("compiler/sort.jl")
 
 # We don't include some.jl, but this definition is still useful.
 something(x::Nothing, y...) = something(y...)
diff --git a/base/compiler/inferencestate.jl b/base/compiler/inferencestate.jl
index e1d20f01042c4..6f759475e144b 100644
--- a/base/compiler/inferencestate.jl
+++ b/base/compiler/inferencestate.jl
@@ -106,7 +106,7 @@ mutable struct InferenceState
     bb_vartables::Vector{Union{Nothing,VarTable}} # nothing if not analyzed yet
     ssavaluetypes::Vector{Any}
     stmt_edges::Vector{Union{Nothing,Vector{Any}}}
-    stmt_info::Vector{Any}
+    stmt_info::Vector{CallInfo}
 
     #= intermediate states for interprocedural abstract interpretation =#
     pclimitations::IdSet{InferenceState} # causes of precision restrictions (LimitedAccuracy) on currpc ssavalue
@@ -152,7 +152,7 @@ mutable struct InferenceState
         ssavalue_uses = find_ssavalue_uses(code, nssavalues)
         nstmts = length(code)
         stmt_edges = Union{Nothing, Vector{Any}}[ nothing for i = 1:nstmts ]
-        stmt_info = Any[ nothing for i = 1:nstmts ]
+        stmt_info = CallInfo[ NoCallInfo() for i = 1:nstmts ]
 
         nslots = length(src.slotflags)
         slottypes = Vector{Any}(undef, nslots)
diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl
index 2f45893d6e5e2..86e35fb3bbd7a 100644
--- a/base/compiler/optimize.jl
+++ b/base/compiler/optimize.jl
@@ -73,18 +73,12 @@ function add_invoke_backedge!(et::EdgeTracker, @nospecialize(invokesig), mi::Met
     return nothing
 end
 
-struct InliningState{S <: Union{EdgeTracker, Nothing}, MICache, I<:AbstractInterpreter}
-    params::OptimizationParams
-    et::S
-    mi_cache::MICache # TODO move this to `OptimizationState` (as used by EscapeAnalysis as well)
-    interp::I
-end
-
 is_source_inferred(@nospecialize(src::Union{CodeInfo, Vector{UInt8}})) =
     ccall(:jl_ir_flag_inferred, Bool, (Any,), src)
 
-function inlining_policy(interp::AbstractInterpreter, @nospecialize(src), stmt_flag::UInt8,
-                         mi::MethodInstance, argtypes::Vector{Any})
+function inlining_policy(interp::AbstractInterpreter,
+    @nospecialize(src), @nospecialize(info::CallInfo), stmt_flag::UInt8, mi::MethodInstance,
+    argtypes::Vector{Any})
     if isa(src, CodeInfo) || isa(src, Vector{UInt8})
         src_inferred = is_source_inferred(src)
         src_inlineable = is_stmt_inline(stmt_flag) || is_inlineable(src)
@@ -103,64 +97,77 @@ function inlining_policy(interp::AbstractInterpreter, @nospecialize(src), stmt_f
         else
             return nothing
         end
+    elseif isa(src, IRCode)
+        return src
+    elseif isa(src, SemiConcreteResult)
+        # For NativeInterpreter, SemiConcreteResult are only produced if they're supposed
+        # to be inlined.
+        return src
     end
     return nothing
 end
 
+struct InliningState{Interp<:AbstractInterpreter}
+    params::OptimizationParams
+    et::Union{EdgeTracker,Nothing}
+    world::UInt
+    interp::Interp
+end
+function InliningState(frame::InferenceState, params::OptimizationParams, interp::AbstractInterpreter)
+    et = EdgeTracker(frame.stmt_edges[1]::Vector{Any}, frame.valid_worlds)
+    return InliningState(params, et, frame.world, interp)
+end
+function InliningState(params::OptimizationParams, interp::AbstractInterpreter)
+    return InliningState(params, nothing, get_world_counter(interp), interp)
+end
+
+# get `code_cache(::AbstractInterpreter)` from `state::InliningState`
+code_cache(state::InliningState) = WorldView(code_cache(state.interp), state.world)
+
 include("compiler/ssair/driver.jl")
 
-mutable struct OptimizationState
+mutable struct OptimizationState{Interp<:AbstractInterpreter}
     linfo::MethodInstance
     src::CodeInfo
     ir::Union{Nothing, IRCode}
-    stmt_info::Vector{Any}
+    stmt_info::Vector{CallInfo}
     mod::Module
-    sptypes::Vector{Any} # static parameters
+    sptypes::Vector{Any}
     slottypes::Vector{Any}
-    inlining::InliningState
+    inlining::InliningState{Interp}
     cfg::Union{Nothing,CFG}
-    function OptimizationState(frame::InferenceState, params::OptimizationParams,
-                               interp::AbstractInterpreter, recompute_cfg::Bool=true)
-        s_edges = frame.stmt_edges[1]::Vector{Any}
-        inlining = InliningState(params,
-            EdgeTracker(s_edges, frame.valid_worlds),
-            WorldView(code_cache(interp), frame.world),
-            interp)
-        cfg = recompute_cfg ? nothing : frame.cfg
-        return new(frame.linfo, frame.src, nothing, frame.stmt_info, frame.mod,
-                   frame.sptypes, frame.slottypes, inlining, cfg)
+end
+function OptimizationState(frame::InferenceState, params::OptimizationParams,
+                           interp::AbstractInterpreter, recompute_cfg::Bool=true)
+    inlining = InliningState(frame, params, interp)
+    cfg = recompute_cfg ? nothing : frame.cfg
+    return OptimizationState(frame.linfo, frame.src, nothing, frame.stmt_info, frame.mod,
+               frame.sptypes, frame.slottypes, inlining, cfg)
+end
+function OptimizationState(linfo::MethodInstance, src::CodeInfo, params::OptimizationParams,
+                           interp::AbstractInterpreter)
+    # prepare src for running optimization passes if it isn't already
+    nssavalues = src.ssavaluetypes
+    if nssavalues isa Int
+        src.ssavaluetypes = Any[ Any for i = 1:nssavalues ]
+    else
+        nssavalues = length(src.ssavaluetypes::Vector{Any})
     end
-    function OptimizationState(linfo::MethodInstance, src::CodeInfo, params::OptimizationParams,
-                               interp::AbstractInterpreter)
-        # prepare src for running optimization passes
-        # if it isn't already
-        nssavalues = src.ssavaluetypes
-        if nssavalues isa Int
-            src.ssavaluetypes = Any[ Any for i = 1:nssavalues ]
-        else
-            nssavalues = length(src.ssavaluetypes::Vector{Any})
-        end
-        sptypes = sptypes_from_meth_instance(linfo)
-        nslots = length(src.slotflags)
-        slottypes = src.slottypes
-        if slottypes === nothing
-            slottypes = Any[ Any for i = 1:nslots ]
-        end
-        stmt_info = Any[nothing for i = 1:nssavalues]
-        # cache some useful state computations
-        def = linfo.def
-        mod = isa(def, Method) ? def.module : def
-        # Allow using the global MI cache, but don't track edges.
-        # This method is mostly used for unit testing the optimizer
-        inlining = InliningState(params,
-            nothing,
-            WorldView(code_cache(interp), get_world_counter(interp)),
-            interp)
-        return new(linfo, src, nothing, stmt_info, mod,
-                   sptypes, slottypes, inlining, nothing)
+    sptypes = sptypes_from_meth_instance(linfo)
+    nslots = length(src.slotflags)
+    slottypes = src.slottypes
+    if slottypes === nothing
+        slottypes = Any[ Any for i = 1:nslots ]
     end
+    stmt_info = CallInfo[ NoCallInfo() for i = 1:nssavalues ]
+    # cache some useful state computations
+    def = linfo.def
+    mod = isa(def, Method) ? def.module : def
+    # Allow using the global MI cache, but don't track edges.
+    # This method is mostly used for unit testing the optimizer
+    inlining = InliningState(params, interp)
+    return OptimizationState(linfo, src, nothing, stmt_info, mod, sptypes, slottypes, inlining, nothing)
 end
-
 function OptimizationState(linfo::MethodInstance, params::OptimizationParams, interp::AbstractInterpreter)
     src = retrieve_code_info(linfo)
     src === nothing && return nothing
@@ -243,6 +250,10 @@ function stmt_effect_flags(lattice::AbstractLattice, @nospecialize(stmt), @nospe
                 nothrow = _builtin_nothrow(lattice, f, argtypes, rt)
                 return (true, nothrow, nothrow)
             end
+            if f === Intrinsics.cglobal
+                # TODO: these are not yet linearized
+                return (false, false, false)
+            end
             isa(f, Builtin) || return (false, false, false)
             # Needs to be handled in inlining to look at the callee effects
             f === Core._apply_iterate && return (false, false, false)
@@ -598,7 +609,7 @@ function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
             insert!(code, idx, Expr(:code_coverage_effect))
             insert!(codelocs, idx, codeloc)
             insert!(ssavaluetypes, idx, Nothing)
-            insert!(stmtinfo, idx, nothing)
+            insert!(stmtinfo, idx, NoCallInfo())
             insert!(ssaflags, idx, IR_FLAG_NULL)
             if ssachangemap === nothing
                 ssachangemap = fill(0, nstmts)
@@ -619,7 +630,7 @@ function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
                 insert!(code, idx + 1, ReturnNode())
                 insert!(codelocs, idx + 1, codelocs[idx])
                 insert!(ssavaluetypes, idx + 1, Union{})
-                insert!(stmtinfo, idx + 1, nothing)
+                insert!(stmtinfo, idx + 1, NoCallInfo())
                 insert!(ssaflags, idx + 1, ssaflags[idx])
                 if ssachangemap === nothing
                     ssachangemap = fill(0, nstmts)
diff --git a/base/compiler/sort.jl b/base/compiler/sort.jl
new file mode 100644
index 0000000000000..71d2f8a51cd59
--- /dev/null
+++ b/base/compiler/sort.jl
@@ -0,0 +1,100 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# reference on sorted binary search:
+#   http://www.tbray.org/ongoing/When/200x/2003/03/22/Binary
+
+# index of the first value of vector a that is greater than or equal to x;
+# returns lastindex(v)+1 if x is greater than all values in v.
+function searchsortedfirst(v::AbstractVector, x, lo::T, hi::T, o::Ordering)::keytype(v) where T<:Integer
+    hi = hi + T(1)
+    len = hi - lo
+    @inbounds while len != 0
+        half_len = len >>> 0x01
+        m = lo + half_len
+        if lt(o, v[m], x)
+            lo = m + 1
+            len -= half_len + 1
+        else
+            hi = m
+            len = half_len
+        end
+    end
+    return lo
+end
+
+# index of the last value of vector a that is less than or equal to x;
+# returns firstindex(v)-1 if x is less than all values of v.
+function searchsortedlast(v::AbstractVector, x, lo::T, hi::T, o::Ordering)::keytype(v) where T<:Integer
+    u = T(1)
+    lo = lo - u
+    hi = hi + u
+    @inbounds while lo < hi - u
+        m = midpoint(lo, hi)
+        if lt(o, x, v[m])
+            hi = m
+        else
+            lo = m
+        end
+    end
+    return lo
+end
+
+# returns the range of indices of v equal to x
+# if v does not contain x, returns a 0-length range
+# indicating the insertion point of x
+function searchsorted(v::AbstractVector, x, ilo::T, ihi::T, o::Ordering)::UnitRange{keytype(v)} where T<:Integer
+    u = T(1)
+    lo = ilo - u
+    hi = ihi + u
+    @inbounds while lo < hi - u
+        m = midpoint(lo, hi)
+        if lt(o, v[m], x)
+            lo = m
+        elseif lt(o, x, v[m])
+            hi = m
+        else
+            a = searchsortedfirst(v, x, max(lo,ilo), m, o)
+            b = searchsortedlast(v, x, m, min(hi,ihi), o)
+            return a : b
+        end
+    end
+    return (lo + 1) : (hi - 1)
+end
+
+for s in [:searchsortedfirst, :searchsortedlast, :searchsorted]
+    @eval begin
+        $s(v::AbstractVector, x, o::Ordering) = $s(v,x,firstindex(v),lastindex(v),o)
+        $s(v::AbstractVector, x;
+           lt=isless, by=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward) =
+            $s(v,x,ord(lt,by,rev,order))
+    end
+end
+
+# An unstable sorting algorithm for internal use
+function sort!(v::Vector; by::Function=identity, (<)::Function=<)
+    isempty(v) && return v # This branch is hit 95% of the time
+
+    # Of the remaining 5%, this branch is hit less than 1% of the time
+    if length(v) > 200 # Heap sort prevents quadratic runtime
+        o = ord(<, by, true)
+        heapify!(v, o)
+        for i in lastindex(v):-1:2
+            y = v[i]
+            v[i] = v[1]
+            percolate_down!(v, 1, y, o, i-1)
+        end
+        return v
+    end
+
+    @inbounds for i in 2:length(v) # Insertion sort
+        x = v[i]
+        y = by(x)
+        while i > 1 && y < by(v[i-1])
+            v[i] = v[i-1]
+            i -= 1
+        end
+        v[i] = x
+    end
+
+    v
+end
diff --git a/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl b/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl
index a60cfde597f4c..2fe364d640732 100644
--- a/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl
+++ b/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl
@@ -27,7 +27,7 @@ import ._TOP_MOD:     # Base definitions
     pop!, push!, pushfirst!, empty!, delete!, max, min, enumerate, unwrap_unionall,
     ismutabletype
 import Core.Compiler: # Core.Compiler specific definitions
-    Bottom, InferenceResult, IRCode, IR_FLAG_NOTHROW,
+    Bottom, OptimizerLattice, InferenceResult, IRCode, IR_FLAG_NOTHROW,
     isbitstype, isexpr, is_meta_expr_head, println, widenconst, argextype, singleton_type,
     fieldcount_noerror, try_compute_field, try_compute_fieldidx, hasintersect, ⊑,
     intrinsic_nothrow, array_builtin_common_typecheck, arrayset_typecheck,
@@ -1290,7 +1290,7 @@ function escape_call!(astate::AnalysisState, pc::Int, args::Vector{Any}, callinf
         # now cascade to the builtin handling
         escape_call!(astate, pc, args)
         return
-    elseif isa(info, CallInfo)
+    elseif isa(info, EACallInfo)
         for linfo in info.linfos
             escape_invoke!(astate, pc, args, linfo, 1)
         end
@@ -1596,12 +1596,16 @@ function escape_builtin!(::typeof(setfield!), astate::AnalysisState, pc::Int, ar
     add_escape_change!(astate, val, ssainfo)
     # compute the throwness of this setfield! call here since builtin_nothrow doesn't account for that
     @label add_thrown_escapes
-    argtypes = Any[]
-    for i = 2:length(args)
-        push!(argtypes, argextype(args[i], ir))
+    if length(args) == 4 && setfield!_nothrow(OptimizerLattice(),
+        argextype(args[2], ir), argextype(args[3], ir), argextype(args[4], ir))
+        return true
+    elseif length(args) == 3 && setfield!_nothrow(OptimizerLattice(),
+        argextype(args[2], ir), argextype(args[3], ir))
+        return true
+    else
+        add_thrown_escapes!(astate, pc, args, 2)
+        return true
     end
-    setfield!_nothrow(argtypes) || add_thrown_escapes!(astate, pc, args, 2)
-    return true
 end
 
 function escape_builtin!(::typeof(arrayref), astate::AnalysisState, pc::Int, args::Vector{Any})
diff --git a/base/compiler/ssair/EscapeAnalysis/interprocedural.jl b/base/compiler/ssair/EscapeAnalysis/interprocedural.jl
index dcbc37df84635..74a43e9b9ec8e 100644
--- a/base/compiler/ssair/EscapeAnalysis/interprocedural.jl
+++ b/base/compiler/ssair/EscapeAnalysis/interprocedural.jl
@@ -1,18 +1,19 @@
 # TODO this file contains many duplications with the inlining analysis code, factor them out
 
 import Core.Compiler:
-    MethodInstance, InferenceResult, Signature, ConstPropResult, ConcreteResult, SemiConcreteResult,
-    MethodResultPure, MethodMatchInfo, UnionSplitInfo, ConstCallInfo, InvokeCallInfo,
-    call_sig, argtypes_to_type, is_builtin, is_return_type, istopfunction, validate_sparams,
-    specialize_method, invoke_rewrite
+    MethodInstance, InferenceResult, Signature, ConstPropResult, ConcreteResult,
+    SemiConcreteResult, CallInfo, NoCallInfo, MethodResultPure, MethodMatchInfo,
+    UnionSplitInfo, ConstCallInfo, InvokeCallInfo,
+    call_sig, argtypes_to_type, is_builtin, is_return_type, istopfunction,
+    validate_sparams, specialize_method, invoke_rewrite
 
 const Linfo = Union{MethodInstance,InferenceResult}
-struct CallInfo
+struct EACallInfo
     linfos::Vector{Linfo}
     nothrow::Bool
 end
 
-function resolve_call(ir::IRCode, stmt::Expr, @nospecialize(info))
+function resolve_call(ir::IRCode, stmt::Expr, @nospecialize(info::CallInfo))
     sig = call_sig(ir, stmt)
     if sig === nothing
         return missing
@@ -36,7 +37,7 @@ function resolve_call(ir::IRCode, stmt::Expr, @nospecialize(info))
     end
     if info isa MethodResultPure
         return true
-    elseif info === false
+    elseif info === NoCallInfo
         return missing
     end
     # TODO handle OpaqueClosureCallInfo
@@ -63,16 +64,16 @@ function analyze_invoke_call(sig::Signature, info::InvokeCallInfo)
     end
     result = info.result
     if isa(result, ConstPropResult)
-        return CallInfo(Linfo[result.result], true)
+        return EACallInfo(Linfo[result.result], true)
     elseif isa(result, ConcreteResult)
-        return CallInfo(Linfo[result.mi], true)
+        return EACallInfo(Linfo[result.mi], true)
     elseif isa(result, SemiConcreteResult)
-        return CallInfo(Linfo[result.mi], true)
+        return EACallInfo(Linfo[result.mi], true)
     else
         argtypes = invoke_rewrite(sig.argtypes)
         mi = analyze_match(match, length(argtypes))
         mi === nothing && return missing
-        return CallInfo(Linfo[mi], true)
+        return EACallInfo(Linfo[mi], true)
     end
 end
 
@@ -110,7 +111,7 @@ function analyze_const_call(sig::Signature, cinfo::ConstCallInfo)
             nothrow &= match.fully_covers
         end
     end
-    return CallInfo(linfos, nothrow)
+    return EACallInfo(linfos, nothrow)
 end
 
 function analyze_call(sig::Signature, infos::Vector{MethodMatchInfo})
@@ -133,7 +134,7 @@ function analyze_call(sig::Signature, infos::Vector{MethodMatchInfo})
             nothrow &= match.fully_covers
         end
     end
-    return CallInfo(linfos, nothrow)
+    return EACallInfo(linfos, nothrow)
 end
 
 function analyze_match(match::MethodMatch, npassedargs::Int)
diff --git a/base/compiler/ssair/domtree.jl b/base/compiler/ssair/domtree.jl
index eaa21b52aa811..1edb8d2d5c6d4 100644
--- a/base/compiler/ssair/domtree.jl
+++ b/base/compiler/ssair/domtree.jl
@@ -162,6 +162,12 @@ function DFS!(D::DFSTree, blocks::Vector{BasicBlock}, is_post_dominator::Bool)
 
             # Push children to the stack
             for succ_bb in edges
+                if succ_bb == 0
+                    # Edge 0 indicates an error entry, but shouldn't affect
+                    # the post-dominator tree.
+                    @assert is_post_dominator
+                    continue
+                end
                 push!(to_visit, (succ_bb, pre_num, false))
             end
 
diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl
index 246dc861c6b02..5829c96c9d0ee 100644
--- a/base/compiler/ssair/inlining.jl
+++ b/base/compiler/ssair/inlining.jl
@@ -1,7 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-@nospecialize
-
 struct Signature
     f::Any
     ft::Any
@@ -9,7 +7,9 @@ struct Signature
     Signature(@nospecialize(f), @nospecialize(ft), argtypes::Vector{Any}) = new(f, ft, argtypes)
 end
 
-struct ResolvedInliningSpec
+struct InliningTodo
+    # The MethodInstance to be inlined
+    mi::MethodInstance
     # The IR of the inlinee
     ir::IRCode
     # If the function being inlined is a single basic block we can use a
@@ -18,46 +18,24 @@ struct ResolvedInliningSpec
     # Effects of the call statement
     effects::Effects
 end
-ResolvedInliningSpec(ir::IRCode, effects::Effects) =
-    ResolvedInliningSpec(ir, linear_inline_eligible(ir), effects)
-
-"""
-Represents a callsite that our analysis has determined is legal to inline,
-but did not resolve during the analysis step to allow the outer inlining
-pass to apply its own inlining policy decisions.
-"""
-struct DelayedInliningSpec
-    match::Union{MethodMatch, InferenceResult}
-    argtypes::Vector{Any}
-    invokesig    # either nothing or a signature (signature is for an `invoke` call)
-end
-DelayedInliningSpec(match, argtypes) = DelayedInliningSpec(match, argtypes, nothing)
-
-struct InliningTodo
-    # The MethodInstance to be inlined
-    mi::MethodInstance
-    spec::Union{ResolvedInliningSpec, DelayedInliningSpec}
+function InliningTodo(mi::MethodInstance, ir::IRCode, effects::Effects)
+    return InliningTodo(mi, ir, linear_inline_eligible(ir), effects)
 end
 
-InliningTodo(mi::MethodInstance, match::MethodMatch, argtypes::Vector{Any}, invokesig=nothing) =
-    InliningTodo(mi, DelayedInliningSpec(match, argtypes, invokesig))
-
-InliningTodo(result::InferenceResult, argtypes::Vector{Any}, invokesig=nothing) =
-    InliningTodo(result.linfo, DelayedInliningSpec(result, argtypes, invokesig))
-
 struct ConstantCase
     val::Any
-    ConstantCase(val) = new(val)
+    ConstantCase(@nospecialize val) = new(val)
 end
 
 struct SomeCase
     val::Any
-    SomeCase(val) = new(val)
+    SomeCase(@nospecialize val) = new(val)
 end
 
 struct InvokeCase
     invoke::MethodInstance
     effects::Effects
+    info::CallInfo
 end
 
 struct InliningCase
@@ -80,18 +58,16 @@ end
 
 struct InliningEdgeTracker
     et::Union{Nothing,EdgeTracker}
-    invokesig # ::Union{Nothing,Type}
-    InliningEdgeTracker(et::Union{Nothing,EdgeTracker}, @nospecialize(invokesig=nothing)) = new(et, invokesig)
+    invokesig::Union{Nothing,Vector{Any}}
 end
-
-@specialize
+InliningEdgeTracker(et::Union{Nothing,EdgeTracker}) = InliningEdgeTracker(et, nothing)
 
 function add_inlining_backedge!((; et, invokesig)::InliningEdgeTracker, mi::MethodInstance)
     if et !== nothing
         if invokesig === nothing
             add_backedge!(et, mi)
         else
-            add_invoke_backedge!(et, invokesig, mi)
+            add_invoke_backedge!(et, invoke_signature(invokesig), mi)
         end
     end
     return nothing
@@ -103,7 +79,7 @@ function ssa_inlining_pass!(ir::IRCode, state::InliningState, propagate_inbounds
     @timeit "analysis" todo = assemble_inline_todo!(ir, state)
     isempty(todo) && return ir
     # Do the actual inlining for every call we identified
-    @timeit "execution" ir = batch_inline!(todo, ir, propagate_inbounds, state.params)
+    @timeit "execution" ir = batch_inline!(ir, todo, propagate_inbounds, state.params)
     return ir
 end
 
@@ -147,8 +123,8 @@ function inline_into_block!(state::CFGInliningState, block::Int)
     return
 end
 
-function cfg_inline_item!(ir::IRCode, idx::Int, spec::ResolvedInliningSpec, state::CFGInliningState, from_unionsplit::Bool=false)
-    inlinee_cfg = spec.ir.cfg
+function cfg_inline_item!(ir::IRCode, idx::Int, todo::InliningTodo, state::CFGInliningState, from_unionsplit::Bool=false)
+    inlinee_cfg = todo.ir.cfg
     # Figure out if we need to split the BB
     need_split_before = false
     need_split = true
@@ -223,7 +199,7 @@ function cfg_inline_item!(ir::IRCode, idx::Int, spec::ResolvedInliningSpec, stat
     for (old_block, new_block) in enumerate(bb_rename_range)
         if (length(state.new_cfg_blocks[new_block].succs) == 0)
             terminator_idx = last(inlinee_cfg.blocks[old_block].stmts)
-            terminator = spec.ir[SSAValue(terminator_idx)][:inst]
+            terminator = todo.ir[SSAValue(terminator_idx)][:inst]
             if isa(terminator, ReturnNode) && isdefined(terminator, :val)
                 any_edges = true
                 push!(state.new_cfg_blocks[new_block].succs, post_bb_id)
@@ -256,9 +232,8 @@ function cfg_inline_unionsplit!(ir::IRCode, idx::Int,
         push!(state.new_cfg_blocks[cond_bb].succs, cond_bb+1)
         case = cases[i].item
         if isa(case, InliningTodo)
-            spec = case.spec::ResolvedInliningSpec
-            if !spec.linear_inline_eligible
-                cfg_inline_item!(ir, idx, spec, state, true)
+            if !case.linear_inline_eligible
+                cfg_inline_item!(ir, idx, case, state, true)
             end
         end
         push!(from_bbs, length(state.new_cfg_blocks))
@@ -339,7 +314,7 @@ function ir_inline_linetable!(linetable::Vector{LineInfoNode}, inlinee_ir::IRCod
     coverage_by_path = JLOptions().code_coverage == 3
     push!(linetable, LineInfoNode(inlinee.module, inlinee.name, inlinee.file, inlinee.line, inlined_at))
     oldlinetable = inlinee_ir.linetable
-    extra_coverage_line = 0
+    extra_coverage_line = zero(Int32)
     for oldline in 1:length(oldlinetable)
         entry = oldlinetable[oldline]
         if !coverage && coverage_by_path && is_file_tracked(entry.file)
@@ -395,15 +370,15 @@ end
 
 function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector{Any},
                          linetable::Vector{LineInfoNode}, item::InliningTodo,
-                         boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}})
+                         boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}},
+                         extra_flags::UInt8 = inlined_flags_for_effects(item.effects))
     # Ok, do the inlining here
-    spec = item.spec::ResolvedInliningSpec
     sparam_vals = item.mi.sparam_vals
     def = item.mi.def::Method
     inlined_at = compact.result[idx][:line]
 
-    ((sp_ssa, argexprs), linetable_offset) = ir_prepare_inlining!(InsertHere(compact), compact, linetable,
-            item.spec.ir, sparam_vals, def, inlined_at, argexprs)
+    ((sp_ssa, argexprs), linetable_offset) = ir_prepare_inlining!(InsertHere(compact),
+        compact, linetable, item.ir, sparam_vals, def, inlined_at, argexprs)
 
     if boundscheck === :default || boundscheck === :propagate
         if (compact.result[idx][:flag] & IR_FLAG_INBOUNDS) != 0
@@ -417,9 +392,9 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
     # Special case inlining that maintains the current basic block if there's only one BB in the target
     new_new_offset = length(compact.new_new_nodes)
     late_fixup_offset = length(compact.late_fixup)
-    if spec.linear_inline_eligible
+    if item.linear_inline_eligible
         #compact[idx] = nothing
-        inline_compact = IncrementalCompact(compact, spec.ir, compact.result_idx)
+        inline_compact = IncrementalCompact(compact, item.ir, compact.result_idx)
         for ((_, idx′), stmt′) in inline_compact
             # This dance is done to maintain accurate usage counts in the
             # face of rename_arguments! mutating in place - should figure out
@@ -437,19 +412,20 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
                 break
             end
             inline_compact[idx′] = stmt′
+            inline_compact[SSAValue(idx′)][:flag] |= extra_flags
         end
         just_fixup!(inline_compact, new_new_offset, late_fixup_offset)
         compact.result_idx = inline_compact.result_idx
     else
         bb_offset, post_bb_id = popfirst!(todo_bbs)
         # This implements the need_split_before flag above
-        need_split_before = !isempty(spec.ir.cfg.blocks[1].preds)
+        need_split_before = !isempty(item.ir.cfg.blocks[1].preds)
         if need_split_before
             finish_current_bb!(compact, 0)
         end
         pn = PhiNode()
         #compact[idx] = nothing
-        inline_compact = IncrementalCompact(compact, spec.ir, compact.result_idx)
+        inline_compact = IncrementalCompact(compact, item.ir, compact.result_idx)
         for ((_, idx′), stmt′) in inline_compact
             inline_compact[idx′] = nothing
             stmt′ = ssa_substitute!(InsertBefore(inline_compact, SSAValue(idx′)), inline_compact[SSAValue(idx′)], stmt′, argexprs, sig, sparam_vals, sp_ssa, linetable_offset, boundscheck)
@@ -471,6 +447,14 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
                 stmt′ = PhiNode(Int32[edge+bb_offset for edge in stmt′.edges], stmt′.values)
             end
             inline_compact[idx′] = stmt′
+            if extra_flags != 0 && !isa(stmt′, Union{GotoNode, GotoIfNot})
+                if (extra_flags & IR_FLAG_NOTHROW) != 0 && inline_compact[SSAValue(idx′)][:type] === Union{}
+                    # Shown nothrow, but also guaranteed to throw => unreachable
+                    inline_compact[idx′] = ReturnNode()
+                else
+                    inline_compact[SSAValue(idx′)][:flag] |= extra_flags
+                end
+            end
         end
         just_fixup!(inline_compact, new_new_offset, late_fixup_offset)
         compact.result_idx = inline_compact.result_idx
@@ -619,7 +603,7 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int,
         elseif isa(case, InvokeCase)
             inst = Expr(:invoke, case.invoke, argexprs′...)
             flag = flags_for_effects(case.effects)
-            val = insert_node_here!(compact, NewInstruction(inst, typ, nothing, line, flag, true))
+            val = insert_node_here!(compact, NewInstruction(inst, typ, case.info, line, flag))
         else
             case = case::ConstantCase
             val = case.val
@@ -656,7 +640,7 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int,
     return insert_node_here!(compact, NewInstruction(pn, typ, line))
 end
 
-function batch_inline!(todo::Vector{Pair{Int, Any}}, ir::IRCode, propagate_inbounds::Bool, params::OptimizationParams)
+function batch_inline!(ir::IRCode, todo::Vector{Pair{Int,Any}}, propagate_inbounds::Bool, params::OptimizationParams)
     # Compute the new CFG first (modulo statement ranges, which will be computed below)
     state = CFGInliningState(ir)
     for (idx, item) in todo
@@ -664,10 +648,9 @@ function batch_inline!(todo::Vector{Pair{Int, Any}}, ir::IRCode, propagate_inbou
             cfg_inline_unionsplit!(ir, idx, item, state, params)
         else
             item = item::InliningTodo
-            spec = item.spec::ResolvedInliningSpec
             # A linear inline does not modify the CFG
-            spec.linear_inline_eligible && continue
-            cfg_inline_item!(ir, idx, spec, state, false)
+            item.linear_inline_eligible && continue
+            cfg_inline_item!(ir, idx, item, state, false)
         end
     end
     finish_cfg_inline!(state)
@@ -683,8 +666,7 @@ function batch_inline!(todo::Vector{Pair{Int, Any}}, ir::IRCode, propagate_inbou
         nn = 0
         for (_, item) in todo
             if isa(item, InliningTodo)
-                spec = item.spec::ResolvedInliningSpec
-                nn += (length(spec.ir.stmts) + length(spec.ir.new_nodes))
+                nn += (length(item.ir.stmts) + length(item.ir.new_nodes))
             end
         end
         nnewnodes = length(compact.result) + nn
@@ -745,9 +727,9 @@ function batch_inline!(todo::Vector{Pair{Int, Any}}, ir::IRCode, propagate_inbou
 end
 
 # This assumes the caller has verified that all arguments to the _apply_iterate call are Tuples.
-function rewrite_apply_exprargs!(
+function rewrite_apply_exprargs!(todo::Vector{Pair{Int,Any}},
     ir::IRCode, idx::Int, stmt::Expr, argtypes::Vector{Any},
-    arginfos::Vector{MaybeAbstractIterationInfo}, arg_start::Int, istate::InliningState, todo::Vector{Pair{Int, Any}})
+    arginfos::Vector{MaybeAbstractIterationInfo}, arg_start::Int, istate::InliningState)
     flag = ir.stmts[idx][:flag]
     argexprs = stmt.args
     new_argexprs = Any[argexprs[arg_start]]
@@ -769,7 +751,7 @@ function rewrite_apply_exprargs!(
                     end
                 else
                     ti = widenconst(def_type)::DataType # checked by `is_valid_type_for_apply_rewrite`
-                    if ti.name === NamedTuple_typename
+                    if ti.name === _NAMEDTUPLE_NAME
                         ti = ti.parameters[2]::DataType # checked by `is_valid_type_for_apply_rewrite`
                     end
                     for p in ti.parameters
@@ -803,17 +785,8 @@ function rewrite_apply_exprargs!(
                 state1 = insert_node!(ir, idx, NewInstruction(new_stmt, call.rt))
                 new_sig = call_sig(ir, new_stmt)::Signature
                 new_info = call.info
-                if isa(new_info, ConstCallInfo)
-                    handle_const_call!(
-                        ir, state1.id, new_stmt, new_info, flag,
-                        new_sig, istate, todo)
-                elseif isa(new_info, MethodMatchInfo) || isa(new_info, UnionSplitInfo)
-                    new_infos = isa(new_info, MethodMatchInfo) ? MethodMatchInfo[new_info] : new_info.matches
-                    # See if we can inline this call to `iterate`
-                    handle_call!(
-                        ir, state1.id, new_stmt, new_infos, flag,
-                        new_sig, istate, todo)
-                end
+                # See if we can inline this call to `iterate`
+                handle_call!(todo, ir, state1.id, new_stmt, new_info, flag, new_sig, istate)
                 if i != length(thisarginfo.each)
                     valT = getfield_tfunc(call.rt, Const(1))
                     val_extracted = insert_node!(ir, idx, NewInstruction(
@@ -834,19 +807,19 @@ function rewrite_apply_exprargs!(
 end
 
 function compileable_specialization(match::MethodMatch, effects::Effects,
-    et::InliningEdgeTracker; compilesig_invokes::Bool=true)
+    et::InliningEdgeTracker, @nospecialize(info::CallInfo); compilesig_invokes::Bool=true)
     mi = specialize_method(match; compilesig=compilesig_invokes)
     mi === nothing && return nothing
     add_inlining_backedge!(et, mi)
-    return InvokeCase(mi, effects)
+    return InvokeCase(mi, effects, info)
 end
 
 function compileable_specialization(linfo::MethodInstance, effects::Effects,
-    et::InliningEdgeTracker; compilesig_invokes::Bool=true)
+    et::InliningEdgeTracker, @nospecialize(info::CallInfo); compilesig_invokes::Bool=true)
     mi = specialize_method(linfo.def::Method, linfo.specTypes, linfo.sparam_vals; compilesig=compilesig_invokes)
     mi === nothing && return nothing
     add_inlining_backedge!(et, mi)
-    return InvokeCase(mi, effects)
+    return InvokeCase(mi, effects, info)
 end
 
 compileable_specialization(result::InferenceResult, args...; kwargs...) = (@nospecialize;
@@ -858,7 +831,7 @@ struct CachedResult
     CachedResult(@nospecialize(src), effects::Effects) = new(src, effects)
 end
 @inline function get_cached_result(state::InliningState, mi::MethodInstance)
-    code = get(state.mi_cache, mi, nothing)
+    code = get(code_cache(state), mi, nothing)
     if code isa CodeInstance
         if use_const_api(code)
             # in this case function can be inlined to a constant
@@ -873,22 +846,22 @@ end
     end
 end
 
-function resolve_todo(todo::InliningTodo, state::InliningState, flag::UInt8)
-    mi = todo.mi
-    (; match, argtypes, invokesig) = todo.spec::DelayedInliningSpec
+# the general resolver for usual and const-prop'ed calls
+function resolve_todo(mi::MethodInstance, result::Union{MethodMatch,InferenceResult},
+        argtypes::Vector{Any}, @nospecialize(info::CallInfo), flag::UInt8,
+        state::InliningState; invokesig::Union{Nothing,Vector{Any}}=nothing,
+        override_effects::Effects = EFFECTS_UNKNOWN′)
     et = InliningEdgeTracker(state.et, invokesig)
 
     #XXX: update_valid_age!(min_valid[1], max_valid[1], sv)
-    if isa(match, InferenceResult)
-        inferred_src = match.src
-        if isa(inferred_src, ConstAPI)
+    if isa(result, InferenceResult)
+        src = result.src
+        if isa(src, ConstAPI)
             # use constant calling convention
             add_inlining_backedge!(et, mi)
-            return ConstantCase(quoted(inferred_src.val))
-        else
-            src = inferred_src # ::Union{Nothing,CodeInfo} for NativeInterpreter
+            return ConstantCase(quoted(src.val))
         end
-        effects = match.ipo_effects
+        effects = result.ipo_effects
     else
         cached_result = get_cached_result(state, mi)
         if cached_result isa ConstantCase
@@ -898,23 +871,38 @@ function resolve_todo(todo::InliningTodo, state::InliningState, flag::UInt8)
         (; src, effects) = cached_result
     end
 
+    if override_effects !== EFFECTS_UNKNOWN′
+        effects = override_effects
+    end
+
     # the duplicated check might have been done already within `analyze_method!`, but still
     # we need it here too since we may come here directly using a constant-prop' result
     if !state.params.inlining || is_stmt_noinline(flag)
-        return compileable_specialization(match, effects, et;
+        return compileable_specialization(result, effects, et, info;
             compilesig_invokes=state.params.compilesig_invokes)
     end
 
-    src = inlining_policy(state.interp, src, flag, mi, argtypes)
+    src = inlining_policy(state.interp, src, info, flag, mi, argtypes)
 
-    src === nothing && return compileable_specialization(match, effects, et;
+    if isa(src, ConstAPI)
+        # duplicates the check above in case inlining_policy has a better idea.
+        # We still keep the check above to make sure we can inline to ConstAPI
+        # even if is_stmt_noinline. This doesn't currently happen in Base, but
+        # can happen with external AbstractInterpreter.
+        add_inlining_backedge!(et, mi)
+        return ConstantCase(quoted(src.val))
+    end
+
+    src === nothing && return compileable_specialization(result, effects, et, info;
         compilesig_invokes=state.params.compilesig_invokes)
 
     add_inlining_backedge!(et, mi)
     return InliningTodo(mi, retrieve_ir_for_inlining(mi, src), effects)
 end
 
-function resolve_todo(mi::MethodInstance, argtypes::Vector{Any}, state::InliningState, flag::UInt8)
+# the special resolver for :invoke-d call
+function resolve_todo(mi::MethodInstance, argtypes::Vector{Any},
+    @nospecialize(info::CallInfo), flag::UInt8, state::InliningState)
     if !state.params.inlining || is_stmt_noinline(flag)
         return nothing
     end
@@ -928,7 +916,7 @@ function resolve_todo(mi::MethodInstance, argtypes::Vector{Any}, state::Inlining
     end
     (; src, effects) = cached_result
 
-    src = inlining_policy(state.interp, src, flag, mi, argtypes)
+    src = inlining_policy(state.interp, src, info, flag, mi, argtypes)
 
     src === nothing && return nothing
 
@@ -936,20 +924,10 @@ function resolve_todo(mi::MethodInstance, argtypes::Vector{Any}, state::Inlining
     return InliningTodo(mi, retrieve_ir_for_inlining(mi, src), effects)
 end
 
-function resolve_todo((; fully_covered, atype, cases, #=bbs=#)::UnionSplit, state::InliningState, flag::UInt8)
-    ncases = length(cases)
-    newcases = Vector{InliningCase}(undef, ncases)
-    for i in 1:ncases
-        (; sig, item) = cases[i]
-        newitem = resolve_todo(item, state, flag)
-        push!(newcases, InliningCase(sig, newitem))
-    end
-    return UnionSplit(fully_covered, atype, newcases)
-end
-
 function validate_sparams(sparams::SimpleVector)
     for i = 1:length(sparams)
-        (isa(sparams[i], TypeVar) || isvarargtype(sparams[i])) && return false
+        spᵢ = sparams[i]
+        (isa(spᵢ, TypeVar) || isvarargtype(spᵢ)) && return false
     end
     return true
 end
@@ -972,8 +950,10 @@ function can_inline_typevars(method::Method, argtypes::Vector{Any})
 end
 can_inline_typevars(m::MethodMatch, argtypes::Vector{Any}) = can_inline_typevars(m.method, argtypes)
 
-function analyze_method!(match::MethodMatch, argtypes::Vector{Any}, @nospecialize(invokesig),
-                         flag::UInt8, state::InliningState, allow_typevars::Bool = false)
+function analyze_method!(match::MethodMatch, argtypes::Vector{Any},
+    @nospecialize(info::CallInfo), flag::UInt8, state::InliningState;
+    allow_typevars::Bool, invokesig::Union{Nothing,Vector{Any}}=nothing,
+    override_effects::Effects=EFFECTS_UNKNOWN′)
     method = match.method
     spec_types = match.spec_types
 
@@ -1003,19 +983,13 @@ function analyze_method!(match::MethodMatch, argtypes::Vector{Any}, @nospecializ
     mi = specialize_method(match; preexisting=true) # Union{Nothing, MethodInstance}
     if mi === nothing
         et = InliningEdgeTracker(state.et, invokesig)
-        return compileable_specialization(match, Effects(), et;
+        effects = override_effects
+        effects === EFFECTS_UNKNOWN′ && (effects = info_effects(nothing, match, state))
+        return compileable_specialization(match, effects, et, info;
             compilesig_invokes=state.params.compilesig_invokes)
     end
 
-    todo = InliningTodo(mi, match, argtypes, invokesig)
-    # If we don't have caches here, delay resolving this MethodInstance
-    # until the batch inlining step (or an external post-processing pass)
-    state.mi_cache === nothing && return todo
-    return resolve_todo(todo, state, flag)
-end
-
-function InliningTodo(mi::MethodInstance, ir::IRCode, effects::Effects)
-    return InliningTodo(mi, ResolvedInliningSpec(ir, effects))
+    return resolve_todo(mi, match, argtypes, info, flag, state; invokesig, override_effects)
 end
 
 function retrieve_ir_for_inlining(mi::MethodInstance, src::Array{UInt8, 1})
@@ -1038,9 +1012,40 @@ function flags_for_effects(effects::Effects)
     return flags
 end
 
-function handle_single_case!(
-    ir::IRCode, idx::Int, stmt::Expr,
-    @nospecialize(case), todo::Vector{Pair{Int, Any}}, params::OptimizationParams, isinvoke::Bool = false)
+"""
+    inlined_flags_for_effects(effects::Effects)
+
+This function answers the query:
+
+    Given a call site annotated as `effects`, what can we say about each inlined
+    statement after the inlining?
+
+Note that this is different from `flags_for_effects`, which just talks about
+the call site itself. Consider for example:
+
+````
+    function foo()
+        V = Any[]
+        push!(V, 1)
+        tuple(V...)
+    end
+```
+
+This function is properly inferred effect_free, because it has no global effects.
+However, we may not inline each statement with an :effect_free flag, because
+that would incorrectly lose the `push!`.
+"""
+function inlined_flags_for_effects(effects::Effects)
+    flags::UInt8 = 0
+    if is_nothrow(effects)
+        flags |= IR_FLAG_NOTHROW
+    end
+    return flags
+end
+
+function handle_single_case!(todo::Vector{Pair{Int,Any}},
+    ir::IRCode, idx::Int, stmt::Expr, @nospecialize(case), params::OptimizationParams,
+    isinvoke::Bool = false)
     if isa(case, ConstantCase)
         ir[SSAValue(idx)][:inst] = case.val
     elseif isa(case, InvokeCase)
@@ -1069,7 +1074,7 @@ function is_valid_type_for_apply_rewrite(@nospecialize(typ), params::Optimizatio
         return true
     end
     typ = widenconst(typ)
-    if isa(typ, DataType) && typ.name === NamedTuple_typename
+    if isa(typ, DataType) && typ.name === _NAMEDTUPLE_NAME
         typ = typ.parameters[2]
         typ = unwraptv(typ)
     end
@@ -1130,22 +1135,21 @@ function call_sig(ir::IRCode, stmt::Expr)
     return Signature(f, ft, argtypes)
 end
 
-function inline_apply!(
-    ir::IRCode, idx::Int, stmt::Expr, sig::Signature,
-    state::InliningState, todo::Vector{Pair{Int, Any}})
+function inline_apply!(todo::Vector{Pair{Int,Any}},
+    ir::IRCode, idx::Int, stmt::Expr, sig::Signature, state::InliningState)
     while sig.f === Core._apply_iterate
         info = ir.stmts[idx][:info]
         if isa(info, UnionSplitApplyCallInfo)
             if length(info.infos) != 1
                 # TODO: Handle union split applies?
-                new_info = info = false
+                new_info = info = NoCallInfo()
             else
                 info = info.infos[1]
                 new_info = info.call
             end
         else
-            @assert info === nothing || info === false
-            new_info = info = false
+            @assert info === NoCallInfo()
+            new_info = info = NoCallInfo()
         end
         arg_start = 3
         argtypes = sig.argtypes
@@ -1188,9 +1192,8 @@ function inline_apply!(
         end
         # Independent of whether we can inline, the above analysis allows us to rewrite
         # this apply call to a regular call
-        argtypes = rewrite_apply_exprargs!(
-            ir, idx, stmt, argtypes,
-            arginfos, arg_start, state, todo)
+        argtypes = rewrite_apply_exprargs!(todo,
+            ir, idx, stmt, argtypes, arginfos, arg_start, state)
         ir.stmts[idx][:info] = new_info
         has_free_typevars(ft) && return nothing
         f = singleton_type(ft)
@@ -1206,32 +1209,37 @@ is_builtin(s::Signature) =
     isa(s.f, Builtin) ||
     s.ft ⊑ₒ Builtin
 
-function inline_invoke!(
+function handle_invoke_call!(todo::Vector{Pair{Int,Any}},
     ir::IRCode, idx::Int, stmt::Expr, info::InvokeCallInfo, flag::UInt8,
-    sig::Signature, state::InliningState, todo::Vector{Pair{Int, Any}})
+    sig::Signature, state::InliningState)
     match = info.match
     if !match.fully_covers
         # TODO: We could union split out the signature check and continue on
         return nothing
     end
     result = info.result
-    invokesig = invoke_signature(sig.argtypes)
+    invokesig = sig.argtypes
+    override_effects = EFFECTS_UNKNOWN′
     if isa(result, ConcreteResult)
-        item = concrete_result_item(result, state, invokesig)
-    else
-        argtypes = invoke_rewrite(sig.argtypes)
-        if isa(result, ConstPropResult)
-            (; mi) = item = InliningTodo(result.result, argtypes, invokesig)
-            validate_sparams(mi.sparam_vals) || return nothing
-            if argtypes_to_type(argtypes) <: mi.def.sig
-                state.mi_cache !== nothing && (item = resolve_todo(item, state, flag))
-                handle_single_case!(ir, idx, stmt, item, todo, state.params, true)
-                return nothing
-            end
+        if may_inline_concrete_result(result)
+            item = concrete_result_item(result, state; invokesig)
+            handle_single_case!(todo, ir, idx, stmt, item, state.params, true)
+            return nothing
+        end
+        override_effects = result.effects
+    end
+    argtypes = invoke_rewrite(sig.argtypes)
+    if isa(result, ConstPropResult)
+        mi = result.result.linfo
+        validate_sparams(mi.sparam_vals) || return nothing
+        if argtypes_to_type(argtypes) <: mi.def.sig
+            item = resolve_todo(mi, result.result, argtypes, info, flag, state; invokesig, override_effects)
+            handle_single_case!(todo, ir, idx, stmt, item, state.params, true)
+            return nothing
         end
-        item = analyze_method!(match, argtypes, invokesig, flag, state)
     end
-    handle_single_case!(ir, idx, stmt, item, todo, state.params, true)
+    item = analyze_method!(match, argtypes, info, flag, state; allow_typevars=false, invokesig, override_effects)
+    handle_single_case!(todo, ir, idx, stmt, item, state.params, true)
     return nothing
 end
 
@@ -1240,7 +1248,7 @@ function invoke_signature(argtypes::Vector{Any})
     return rewrap_unionall(Tuple{ft, unwrap_unionall(argtyps).parameters...}, argtyps)
 end
 
-function narrow_opaque_closure!(ir::IRCode, stmt::Expr, @nospecialize(info), state::InliningState)
+function narrow_opaque_closure!(ir::IRCode, stmt::Expr, @nospecialize(info::CallInfo), state::InliningState)
     if isa(info, OpaqueClosureCreateInfo)
         lbt = argextype(stmt.args[2], ir)
         lb, exact = instanceof_tfunc(lbt)
@@ -1278,7 +1286,7 @@ end
 # Handles all analysis and inlining of intrinsics and builtins. In particular,
 # this method does not access the method table or otherwise process generic
 # functions.
-function process_simple!(ir::IRCode, idx::Int, state::InliningState, todo::Vector{Pair{Int, Any}})
+function process_simple!(todo::Vector{Pair{Int,Any}}, ir::IRCode, idx::Int, state::InliningState)
     stmt = ir.stmts[idx][:inst]
     rt = ir.stmts[idx][:type]
     if !(stmt isa Expr)
@@ -1304,7 +1312,7 @@ function process_simple!(ir::IRCode, idx::Int, state::InliningState, todo::Vecto
     sig === nothing && return nothing
 
     # Handle _apply_iterate
-    sig = inline_apply!(ir, idx, stmt, sig, state, todo)
+    sig = inline_apply!(todo, ir, idx, stmt, sig, state)
     sig === nothing && return nothing
 
     # Check if we match any of the early inliners
@@ -1338,18 +1346,30 @@ function process_simple!(ir::IRCode, idx::Int, state::InliningState, todo::Vecto
     return stmt, sig
 end
 
-function handle_any_const_result!(cases::Vector{InliningCase}, @nospecialize(result), match::MethodMatch, argtypes::Vector{Any}, flag::UInt8, state::InliningState, allow_typevars::Bool=false)
+function handle_any_const_result!(cases::Vector{InliningCase},
+    @nospecialize(result), match::MethodMatch, argtypes::Vector{Any},
+    @nospecialize(info::CallInfo), flag::UInt8, state::InliningState;
+    allow_abstract::Bool, allow_typevars::Bool)
+    override_effects = EFFECTS_UNKNOWN′
     if isa(result, ConcreteResult)
-        case = concrete_result_item(result, state)
-        push!(cases, InliningCase(result.mi.specTypes, case))
-        return true
-    elseif isa(result, ConstPropResult)
-        return handle_const_prop_result!(result, argtypes, flag, state, cases, #=allow_abstract=#true, allow_typevars)
-    elseif isa(result, SemiConcreteResult)
-        return handle_semi_concrete_result!(result, cases, #=allow_abstract=#true)
+        if may_inline_concrete_result(result)
+            return handle_concrete_result!(cases, result, state)
+        else
+            override_effects = result.effects
+            result = nothing
+        end
+    end
+    if isa(result, SemiConcreteResult)
+        result = inlining_policy(state.interp, result, info, flag, result.mi, argtypes)
+        if isa(result, SemiConcreteResult)
+            return handle_semi_concrete_result!(cases, result; allow_abstract)
+        end
+    end
+    if isa(result, ConstPropResult)
+        return handle_const_prop_result!(cases, result, argtypes, info, flag, state; allow_abstract, allow_typevars)
     else
         @assert result === nothing
-        return handle_match!(match, argtypes, flag, state, cases, #=allow_abstract=#true, allow_typevars)
+        return handle_match!(cases, match, argtypes, info, flag, state; allow_abstract, allow_typevars, override_effects)
     end
 end
 
@@ -1363,7 +1383,7 @@ function info_effects(@nospecialize(result), match::MethodMatch, state::Inlining
     else
         mi = specialize_method(match; preexisting=true)
         if isa(mi, MethodInstance)
-            code = get(state.mi_cache, mi, nothing)
+            code = get(code_cache(state), mi, nothing)
             if code isa CodeInstance
                 return decode_effects(code.ipo_purity_bits)
             end
@@ -1372,17 +1392,12 @@ function info_effects(@nospecialize(result), match::MethodMatch, state::Inlining
     end
 end
 
-function compute_inlining_cases(info::Union{ConstCallInfo, Vector{MethodMatchInfo}},
-    flag::UInt8, sig::Signature, state::InliningState)
-    argtypes = sig.argtypes
-    if isa(info, ConstCallInfo)
-        (; call, results) = info
-        infos = isa(call, MethodMatchInfo) ? MethodMatchInfo[call] : call.matches
-    else
-        results = nothing
-        infos = info
-    end
+function compute_inlining_cases(@nospecialize(info::CallInfo), flag::UInt8, sig::Signature,
+    state::InliningState)
+    nunion = nsplit(info)
+    nunion === nothing && return nothing
     cases = InliningCase[]
+    argtypes = sig.argtypes
     local any_fully_covered = false
     local handled_all_cases::Bool = true
     local revisit_idx = nothing
@@ -1391,8 +1406,8 @@ function compute_inlining_cases(info::Union{ConstCallInfo, Vector{MethodMatchInf
     local all_result_count = 0
     local joint_effects::Effects = EFFECTS_TOTAL
     local nothrow::Bool = true
-    for i in 1:length(infos)
-        meth = infos[i].results
+    for i = 1:nunion
+        meth = getsplit(info, i)
         if meth.ambig
             # Too many applicable methods
             # Or there is a (partial?) ambiguity
@@ -1414,7 +1429,7 @@ function compute_inlining_cases(info::Union{ConstCallInfo, Vector{MethodMatchInf
         end
         for (j, match) in enumerate(meth)
             all_result_count += 1
-            result = results === nothing ? nothing : results[all_result_count]
+            result = getresult(info, all_result_count)
             joint_effects = merge_effects(joint_effects, info_effects(result, match, state))
             nothrow &= match.fully_covers
             any_fully_covered |= match.fully_covers
@@ -1430,7 +1445,8 @@ function compute_inlining_cases(info::Union{ConstCallInfo, Vector{MethodMatchInf
                     revisit_idx = nothing
                 end
             else
-                handled_all_cases &= handle_any_const_result!(cases, result, match, argtypes, flag, state, false)
+                handled_all_cases &= handle_any_const_result!(cases,
+                    result, match, argtypes, info, flag, state; allow_abstract=true, allow_typevars=false)
             end
         end
     end
@@ -1441,15 +1457,16 @@ function compute_inlining_cases(info::Union{ConstCallInfo, Vector{MethodMatchInf
         # we handled everything except one match with unmatched sparams,
         # so try to handle it by bypassing validate_sparams
         (i, j, k) = revisit_idx
-        match = infos[i].results[j]
-        result = results === nothing ? nothing : results[k]
-        handled_all_cases &= handle_any_const_result!(cases, result, match, argtypes, flag, state, true)
+        match = getsplit(info, i)[j]
+        result = getresult(info, k)
+        handled_all_cases &= handle_any_const_result!(cases,
+            result, match, argtypes, info, flag, state; allow_abstract=true, allow_typevars=true)
     elseif length(cases) == 0 && only_method isa Method
         # if the signature is fully covered and there is only one applicable method,
         # we can try to inline it even in the presence of unmatched sparams
         # -- But don't try it if we already tried to handle the match in the revisit_idx
         # case, because that'll (necessarily) be the same method.
-        if length(infos) > 1
+        if nsplit(info)::Int > 1
             atype = argtypes_to_type(argtypes)
             (metharg, methsp) = ccall(:jl_type_intersection_with_env, Any, (Any, Any), atype, only_method.sig)::SimpleVector
             match = MethodMatch(metharg, methsp::SimpleVector, only_method, true)
@@ -1457,9 +1474,10 @@ function compute_inlining_cases(info::Union{ConstCallInfo, Vector{MethodMatchInf
         else
             @assert length(meth) == 1
             match = meth[1]
-            result = results === nothing ? nothing : results[1]
+            result = getresult(info, 1)
         end
-        handle_any_const_result!(cases, result, match, argtypes, flag, state, true)
+        handle_any_const_result!(cases,
+            result, match, argtypes, info, flag, state; allow_abstract=true, allow_typevars=true)
         any_fully_covered = handled_all_cases = match.fully_covers
     elseif !handled_all_cases
         # if we've not seen all candidates, union split is valid only for dispatch tuples
@@ -1469,57 +1487,49 @@ function compute_inlining_cases(info::Union{ConstCallInfo, Vector{MethodMatchInf
     return cases, (handled_all_cases & any_fully_covered), joint_effects
 end
 
-function handle_call!(
-    ir::IRCode, idx::Int, stmt::Expr, infos::Vector{MethodMatchInfo}, flag::UInt8,
-    sig::Signature, state::InliningState, todo::Vector{Pair{Int, Any}})
-    cases = compute_inlining_cases(infos, flag, sig, state)
-    cases === nothing && return nothing
-    cases, all_covered, joint_effects = cases
-    handle_cases!(ir, idx, stmt, argtypes_to_type(sig.argtypes), cases,
-        all_covered, todo, state.params, joint_effects)
-end
-
-function handle_const_call!(
-    ir::IRCode, idx::Int, stmt::Expr, info::ConstCallInfo, flag::UInt8,
-    sig::Signature, state::InliningState, todo::Vector{Pair{Int, Any}})
+function handle_call!(todo::Vector{Pair{Int,Any}},
+    ir::IRCode, idx::Int, stmt::Expr, @nospecialize(info::CallInfo), flag::UInt8, sig::Signature,
+    state::InliningState)
     cases = compute_inlining_cases(info, flag, sig, state)
     cases === nothing && return nothing
     cases, all_covered, joint_effects = cases
-    handle_cases!(ir, idx, stmt, argtypes_to_type(sig.argtypes), cases,
-        all_covered, todo, state.params, joint_effects)
+    handle_cases!(todo, ir, idx, stmt, argtypes_to_type(sig.argtypes), cases,
+        all_covered, joint_effects, state.params)
 end
 
-function handle_match!(
-    match::MethodMatch, argtypes::Vector{Any}, flag::UInt8, state::InliningState,
-    cases::Vector{InliningCase}, allow_abstract::Bool, allow_typevars::Bool)
+function handle_match!(cases::Vector{InliningCase},
+    match::MethodMatch, argtypes::Vector{Any}, @nospecialize(info::CallInfo), flag::UInt8,
+    state::InliningState;
+    allow_abstract::Bool, allow_typevars::Bool, override_effects::Effects)
     spec_types = match.spec_types
     allow_abstract || isdispatchtuple(spec_types) || return false
     # We may see duplicated dispatch signatures here when a signature gets widened
     # during abstract interpretation: for the purpose of inlining, we can just skip
     # processing this dispatch candidate (unless unmatched type parameters are present)
     !allow_typevars && _any(case->case.sig === spec_types, cases) && return true
-    item = analyze_method!(match, argtypes, nothing, flag, state, allow_typevars)
+    item = analyze_method!(match, argtypes, info, flag, state; allow_typevars, override_effects)
     item === nothing && return false
     push!(cases, InliningCase(spec_types, item))
     return true
 end
 
-function handle_const_prop_result!(
-    result::ConstPropResult, argtypes::Vector{Any}, flag::UInt8, state::InliningState,
-    cases::Vector{InliningCase}, allow_abstract::Bool, allow_typevars::Bool = false)
-    (; mi) = item = InliningTodo(result.result, argtypes)
+function handle_const_prop_result!(cases::Vector{InliningCase},
+    result::ConstPropResult, argtypes::Vector{Any}, @nospecialize(info::CallInfo),
+    flag::UInt8, state::InliningState;
+    allow_abstract::Bool, allow_typevars::Bool)
+    mi = result.result.linfo
     spec_types = mi.specTypes
     allow_abstract || isdispatchtuple(spec_types) || return false
     if !validate_sparams(mi.sparam_vals)
         (allow_typevars && can_inline_typevars(mi.def, argtypes)) || return false
     end
-    state.mi_cache !== nothing && (item = resolve_todo(item, state, flag))
+    item = resolve_todo(mi, result.result, argtypes, info, flag, state)
     item === nothing && return false
     push!(cases, InliningCase(spec_types, item))
     return true
 end
 
-function handle_semi_concrete_result!(result::SemiConcreteResult, cases::Vector{InliningCase}, allow_abstract::Bool = false)
+function handle_semi_concrete_result!(cases::Vector{InliningCase}, result::SemiConcreteResult; allow_abstract::Bool)
     mi = result.mi
     spec_types = mi.specTypes
     allow_abstract || isdispatchtuple(spec_types) || return false
@@ -1528,26 +1538,30 @@ function handle_semi_concrete_result!(result::SemiConcreteResult, cases::Vector{
     return true
 end
 
-function concrete_result_item(result::ConcreteResult, state::InliningState, @nospecialize(invokesig=nothing))
-    if !isdefined(result, :result) || !is_inlineable_constant(result.result)
-        et = InliningEdgeTracker(state.et, invokesig)
-        case = compileable_specialization(result.mi, result.effects, et;
-            compilesig_invokes=state.params.compilesig_invokes)
-        @assert case !== nothing "concrete evaluation should never happen for uncompileable callsite"
-        return case
-    end
+function handle_concrete_result!(cases::Vector{InliningCase}, result::ConcreteResult, state::InliningState)
+    case = concrete_result_item(result, state)
+    push!(cases, InliningCase(result.mi.specTypes, case))
+    return true
+end
+
+may_inline_concrete_result(result::ConcreteResult) =
+    isdefined(result, :result) && is_inlineable_constant(result.result)
+
+function concrete_result_item(result::ConcreteResult, state::InliningState;
+    invokesig::Union{Nothing,Vector{Any}}=nothing)
+    @assert may_inline_concrete_result(result)
     @assert result.effects === EFFECTS_TOTAL
     return ConstantCase(quoted(result.result))
 end
 
-function handle_cases!(ir::IRCode, idx::Int, stmt::Expr, @nospecialize(atype),
-    cases::Vector{InliningCase}, fully_covered::Bool, todo::Vector{Pair{Int, Any}},
-    params::OptimizationParams, joint_effects::Effects)
+function handle_cases!(todo::Vector{Pair{Int,Any}}, ir::IRCode, idx::Int, stmt::Expr,
+    @nospecialize(atype), cases::Vector{InliningCase}, fully_covered::Bool,
+    joint_effects::Effects, params::OptimizationParams)
     # If we only have one case and that case is fully covered, we may either
     # be able to do the inlining now (for constant cases), or push it directly
     # onto the todo list
     if fully_covered && length(cases) == 1
-        handle_single_case!(ir, idx, stmt, cases[1].item, todo, params)
+        handle_single_case!(todo, ir, idx, stmt, cases[1].item, params)
     elseif length(cases) > 0
         isa(atype, DataType) || return nothing
         for case in cases
@@ -1560,13 +1574,25 @@ function handle_cases!(ir::IRCode, idx::Int, stmt::Expr, @nospecialize(atype),
     return nothing
 end
 
-function handle_const_opaque_closure_call!(
-    ir::IRCode, idx::Int, stmt::Expr, result::ConstPropResult, flag::UInt8,
-    sig::Signature, state::InliningState, todo::Vector{Pair{Int, Any}})
-    item = InliningTodo(result.result, sig.argtypes)
-    validate_sparams(item.mi.sparam_vals) || return nothing
-    state.mi_cache !== nothing && (item = resolve_todo(item, state, flag))
-    handle_single_case!(ir, idx, stmt, item, todo, state.params)
+function handle_opaque_closure_call!(todo::Vector{Pair{Int,Any}},
+    ir::IRCode, idx::Int, stmt::Expr, info::OpaqueClosureCallInfo,
+    flag::UInt8, sig::Signature, state::InliningState)
+    result = info.result
+    if isa(result, ConstPropResult)
+        mi = result.result.linfo
+        validate_sparams(mi.sparam_vals) || return nothing
+        item = resolve_todo(mi, result.result, sig.argtypes, info, flag, state)
+    elseif isa(result, ConcreteResult)
+        if may_inline_concrete_result(result)
+            item = concrete_result_item(result, state)
+        else
+            override_effects = result.effects
+            item = analyze_method!(info.match, sig.argtypes, info, flag, state; allow_typevars=false, override_effects)
+        end
+    else
+        item = analyze_method!(info.match, sig.argtypes, info, flag, state; allow_typevars=false)
+    end
+    handle_single_case!(todo, ir, idx, stmt, item, state.params)
     return nothing
 end
 
@@ -1578,7 +1604,7 @@ function handle_modifyfield!_call!(ir::IRCode, idx::Int, stmt::Expr, info::Modif
     length(info.results) == 1 || return nothing
     match = info.results[1]::MethodMatch
     match.fully_covers || return nothing
-    case = compileable_specialization(match, Effects(), InliningEdgeTracker(state.et);
+    case = compileable_specialization(match, Effects(), InliningEdgeTracker(state.et), info;
         compilesig_invokes=state.params.compilesig_invokes)
     case === nothing && return nothing
     stmt.head = :invoke_modify
@@ -1587,8 +1613,8 @@ function handle_modifyfield!_call!(ir::IRCode, idx::Int, stmt::Expr, info::Modif
     return nothing
 end
 
-function handle_finalizer_call!(
-    ir::IRCode, idx::Int, stmt::Expr, info::FinalizerInfo, state::InliningState)
+function handle_finalizer_call!(ir::IRCode, idx::Int, stmt::Expr, info::FinalizerInfo,
+    state::InliningState)
 
     # Finalizers don't return values, so if their execution is not observable,
     # we can just not register them
@@ -1601,20 +1627,6 @@ function handle_finalizer_call!(
     # This avoids having to set up state for finalizer isolation
     is_finalizer_inlineable(info.effects) || return nothing
 
-    info = info.info
-    if isa(info, ConstCallInfo)
-        # NOTE currently mutable objects are not represented as `Const`
-        # but `finalizer` function can be
-        info = info.call
-    end
-    if isa(info, MethodMatchInfo)
-        infos = MethodMatchInfo[info]
-    elseif isa(info, UnionSplitInfo)
-        infos = info.matches
-    else
-        return nothing
-    end
-
     ft = argextype(stmt.args[2], ir)
     has_free_typevars(ft) && return nothing
     f = singleton_type(ft)
@@ -1623,7 +1635,7 @@ function handle_finalizer_call!(
     argtypes[2] = argextype(stmt.args[3], ir)
     sig = Signature(f, ft, argtypes)
 
-    cases = compute_inlining_cases(infos, #=flag=#UInt8(0), sig, state)
+    cases = compute_inlining_cases(info.info, #=flag=#UInt8(0), sig, state)
     cases === nothing && return nothing
     cases, all_covered, _ = cases
     if all_covered && length(cases) == 1
@@ -1644,10 +1656,10 @@ function handle_finalizer_call!(
     return nothing
 end
 
-function handle_invoke!(todo::Vector{Pair{Int,Any}},
-    idx::Int, stmt::Expr, flag::UInt8, sig::Signature, state::InliningState)
+function handle_invoke_expr!(todo::Vector{Pair{Int,Any}},
+    idx::Int, stmt::Expr, @nospecialize(info::CallInfo), flag::UInt8, sig::Signature, state::InliningState)
     mi = stmt.args[1]::MethodInstance
-    case = resolve_todo(mi, sig.argtypes, state, flag)
+    case = resolve_todo(mi, sig.argtypes, info, flag, state)
     if case !== nothing
         push!(todo, idx=>(case::InliningTodo))
     end
@@ -1668,78 +1680,43 @@ function assemble_inline_todo!(ir::IRCode, state::InliningState)
     todo = Pair{Int, Any}[]
 
     for idx in 1:length(ir.stmts)
-        simpleres = process_simple!(ir, idx, state, todo)
+        simpleres = process_simple!(todo, ir, idx, state)
         simpleres === nothing && continue
         stmt, sig = simpleres
 
         flag = ir.stmts[idx][:flag]
+        info = ir.stmts[idx][:info]
 
         # `NativeInterpreter` won't need this, but provide a support for `:invoke` exprs here
         # for external `AbstractInterpreter`s that may run the inlining pass multiple times
         if isexpr(stmt, :invoke)
-            handle_invoke!(todo, idx, stmt, flag, sig, state)
+            handle_invoke_expr!(todo, idx, stmt, info, flag, sig, state)
             continue
         end
 
-        info = ir.stmts[idx][:info]
-
         # Check whether this call was @pure and evaluates to a constant
         if info isa MethodResultPure
             inline_const_if_inlineable!(ir[SSAValue(idx)]) && continue
             info = info.info
         end
-        if info === false
+        if info === NoCallInfo()
             # Inference determined this couldn't be analyzed. Don't question it.
             continue
         end
 
-        if isa(info, OpaqueClosureCallInfo)
-            result = info.result
-            if isa(result, ConstPropResult)
-                handle_const_opaque_closure_call!(
-                    ir, idx, stmt, result, flag,
-                    sig, state, todo)
-            else
-                if isa(result, ConcreteResult)
-                    item = concrete_result_item(result, state)
-                else
-                    item = analyze_method!(info.match, sig.argtypes, nothing, flag, state)
-                end
-                handle_single_case!(ir, idx, stmt, item, todo, state.params)
-            end
-            continue
-        end
-
         # handle special cased builtins
-        if isa(info, ModifyFieldInfo)
+        if isa(info, OpaqueClosureCallInfo)
+            handle_opaque_closure_call!(todo, ir, idx, stmt, info, flag, sig, state)
+        elseif isa(info, ModifyFieldInfo)
             handle_modifyfield!_call!(ir, idx, stmt, info, state)
-            continue
         elseif isa(info, InvokeCallInfo)
-            inline_invoke!(ir, idx, stmt, info, flag, sig, state, todo)
-            continue
+            handle_invoke_call!(todo, ir, idx, stmt, info, flag, sig, state)
         elseif isa(info, FinalizerInfo)
             handle_finalizer_call!(ir, idx, stmt, info, state)
-        end
-
-        # if inference arrived here with constant-prop'ed result(s),
-        # we can perform a specialized analysis for just this case
-        if isa(info, ConstCallInfo)
-            handle_const_call!(
-                ir, idx, stmt, info, flag,
-                sig, state, todo)
-            continue
-        end
-
-        # Ok, now figure out what method to call
-        if isa(info, MethodMatchInfo)
-            infos = MethodMatchInfo[info]
-        elseif isa(info, UnionSplitInfo)
-            infos = info.matches
         else
-            continue # isa(info, ReturnTypeCallInfo), etc.
+            # cascade to the generic (and extendable) handler
+            handle_call!(todo, ir, idx, stmt, info, flag, sig, state)
         end
-
-        handle_call!(ir, idx, stmt, infos, flag, sig, state, todo)
     end
 
     return todo
@@ -1798,6 +1775,15 @@ function early_inline_special_case(
         setting === :const || setting === :conditional || setting === :type || return nothing
         # barriered successfully already, eliminate it
         return SomeCase(stmt.args[3])
+    elseif f === Core.ifelse && length(argtypes) == 4
+        cond = argtypes[2]
+        if isa(cond, Const)
+            if cond.val === true
+                return SomeCase(stmt.args[3])
+            elseif cond.val === false
+                return SomeCase(stmt.args[4])
+            end
+        end
     end
     return nothing
 end
@@ -1890,7 +1876,7 @@ function ssa_substitute_op!(insert_node!::Inserter, subst_inst::Instruction,
             else
                 flag = subst_inst[:flag]
                 maybe_undef = (flag & IR_FLAG_NOTHROW) == 0 && isa(val, TypeVar)
-                (ret, tcheck_not) = insert_spval!(insert_node!, spvals_ssa, spidx, maybe_undef)
+                (ret, tcheck_not) = insert_spval!(insert_node!, spvals_ssa::SSAValue, spidx, maybe_undef)
                 if maybe_undef
                     insert_node!(
                         non_effect_free(NewInstruction(Expr(:throw_undef_if_not, val.name, tcheck_not), Nothing)))
@@ -1903,7 +1889,7 @@ function ssa_substitute_op!(insert_node!::Inserter, subst_inst::Instruction,
             if !isa(val, TypeVar)
                 return true
             else
-                (_, tcheck_not) = insert_spval!(insert_node!, spvals_ssa, spidx, true)
+                (_, tcheck_not) = insert_spval!(insert_node!, spvals_ssa::SSAValue, spidx, true)
                 return tcheck_not
             end
         elseif head === :cfunction && spvals_ssa === nothing
diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl
index aad00edebaaca..6496b8eca41a1 100644
--- a/base/compiler/ssair/ir.jl
+++ b/base/compiler/ssair/ir.jl
@@ -144,7 +144,7 @@ function compute_basic_blocks(stmts::Vector{Any})
 end
 
 # this function assumes insert position exists
-function first_insert_for_bb(code, cfg::CFG, block::Int)
+function first_insert_for_bb(code::Vector{Any}, cfg::CFG, block::Int)
     for idx in cfg.blocks[block].stmts
         stmt = code[idx]
         if !isa(stmt, PhiNode)
@@ -188,15 +188,15 @@ const AnySSAValue = Union{SSAValue, OldSSAValue, NewSSAValue}
 struct InstructionStream
     inst::Vector{Any}
     type::Vector{Any}
-    info::Vector{Any}
+    info::Vector{CallInfo}
     line::Vector{Int32}
     flag::Vector{UInt8}
 end
 function InstructionStream(len::Int)
-    insts = Array{Any}(undef, len)
-    types = Array{Any}(undef, len)
-    info = Array{Any}(undef, len)
-    fill!(info, nothing)
+    insts = Vector{Any}(undef, len)
+    types = Vector{Any}(undef, len)
+    info = Vector{CallInfo}(undef, len)
+    fill!(info, NoCallInfo())
     lines = fill(Int32(0), len)
     flags = fill(IR_FLAG_NULL, len)
     return InstructionStream(insts, types, info, lines, flags)
@@ -204,7 +204,7 @@ end
 InstructionStream() = InstructionStream(0)
 length(is::InstructionStream) = length(is.inst)
 isempty(is::InstructionStream) = isempty(is.inst)
-function add!(is::InstructionStream)
+function add_new_idx!(is::InstructionStream)
     ninst = length(is) + 1
     resize!(is, ninst)
     return ninst
@@ -227,7 +227,7 @@ function resize!(stmts::InstructionStream, len)
     for i in (old_length + 1):len
         stmts.line[i] = 0
         stmts.flag[i] = IR_FLAG_NULL
-        stmts.info[i] = nothing
+        stmts.info[i] = NoCallInfo()
     end
     return stmts
 end
@@ -236,7 +236,7 @@ struct Instruction
     data::InstructionStream
     idx::Int
 end
-Instruction(is::InstructionStream) = Instruction(is, add!(is))
+Instruction(is::InstructionStream) = Instruction(is, add_new_idx!(is))
 
 @inline function getindex(node::Instruction, fld::Symbol)
     isdefined(node, fld) && return getfield(node, fld)
@@ -278,7 +278,7 @@ end
 NewNodeStream(len::Int=0) = NewNodeStream(InstructionStream(len), fill(NewNodeInfo(0, false), len))
 length(new::NewNodeStream) = length(new.stmts)
 isempty(new::NewNodeStream) = isempty(new.stmts)
-function add!(new::NewNodeStream, pos::Int, attach_after::Bool)
+function add_inst!(new::NewNodeStream, pos::Int, attach_after::Bool)
     push!(new.info, NewNodeInfo(pos, attach_after))
     return Instruction(new.stmts)
 end
@@ -287,35 +287,49 @@ copy(nns::NewNodeStream) = NewNodeStream(copy(nns.stmts), copy(nns.info))
 struct NewInstruction
     stmt::Any
     type::Any
-    info::Any
-    # If nothing, copy the line from previous statement
-    # in the insertion location
-    line::Union{Int32, Nothing}
-    flag::UInt8
-
-    ## Insertion options
-
-    # The IR_FLAG_EFFECT_FREE flag has already been computed (or forced).
-    # Don't bother redoing so on insertion.
-    effect_free_computed::Bool
-    NewInstruction(@nospecialize(stmt), @nospecialize(type), @nospecialize(info),
-            line::Union{Int32, Nothing}, flag::UInt8, effect_free_computed::Bool) =
-        new(stmt, type, info, line, flag, effect_free_computed)
-end
-NewInstruction(@nospecialize(stmt), @nospecialize(type)) =
-    NewInstruction(stmt, type, nothing)
-NewInstruction(@nospecialize(stmt), @nospecialize(type), line::Union{Nothing, Int32}) =
-    NewInstruction(stmt, type, nothing, line, IR_FLAG_NULL, false)
-NewInstruction(@nospecialize(stmt), meta::Instruction; line::Union{Int32, Nothing}=nothing) =
-    NewInstruction(stmt, meta[:type], meta[:info], line === nothing ? meta[:line] : line, meta[:flag], true)
-
-effect_free(inst::NewInstruction) =
-    NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag | IR_FLAG_EFFECT_FREE, true)
-non_effect_free(inst::NewInstruction) =
-    NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag & ~IR_FLAG_EFFECT_FREE, true)
-with_flags(inst::NewInstruction, flags::UInt8) =
-    NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag | flags, true)
-
+    info::CallInfo
+    line::Union{Int32,Nothing} # if nothing, copy the line from previous statement in the insertion location
+    flag::Union{UInt8,Nothing} # if nothing, IR flags will be recomputed on insertion
+    function NewInstruction(@nospecialize(stmt), @nospecialize(type), @nospecialize(info::CallInfo),
+                            line::Union{Int32,Nothing}, flag::Union{UInt8,Nothing})
+        return new(stmt, type, info, line, flag)
+    end
+end
+function NewInstruction(@nospecialize(stmt), @nospecialize(type), line::Union{Int32,Nothing}=nothing)
+    return NewInstruction(stmt, type, NoCallInfo(), line, nothing)
+end
+@nospecialize
+function NewInstruction(newinst::NewInstruction;
+    stmt::Any=newinst.stmt,
+    type::Any=newinst.type,
+    info::CallInfo=newinst.info,
+    line::Union{Int32,Nothing}=newinst.line,
+    flag::Union{UInt8,Nothing}=newinst.flag)
+    return NewInstruction(stmt, type, info, line, flag)
+end
+function NewInstruction(inst::Instruction;
+    stmt::Any=inst[:inst],
+    type::Any=inst[:type],
+    info::CallInfo=inst[:info],
+    line::Union{Int32,Nothing}=inst[:line],
+    flag::Union{UInt8,Nothing}=inst[:flag])
+    return NewInstruction(stmt, type, info, line, flag)
+end
+@specialize
+effect_free(newinst::NewInstruction) = NewInstruction(newinst; flag=add_flag(newinst, IR_FLAG_EFFECT_FREE))
+non_effect_free(newinst::NewInstruction) = NewInstruction(newinst; flag=sub_flag(newinst, IR_FLAG_EFFECT_FREE))
+with_flags(newinst::NewInstruction, flags::UInt8) = NewInstruction(newinst; flag=add_flag(newinst, flags))
+without_flags(newinst::NewInstruction, flags::UInt8) = NewInstruction(newinst; flag=sub_flag(newinst, flags))
+function add_flag(newinst::NewInstruction, newflag::UInt8)
+    flag = newinst.flag
+    flag === nothing && return newflag
+    return flag | newflag
+end
+function sub_flag(newinst::NewInstruction, newflag::UInt8)
+    flag = newinst.flag
+    flag === nothing && return IR_FLAG_NULL
+    return flag & ~newflag
+end
 
 struct IRCode
     stmts::InstructionStream
@@ -332,11 +346,26 @@ struct IRCode
     function IRCode(ir::IRCode, stmts::InstructionStream, cfg::CFG, new_nodes::NewNodeStream)
         return new(stmts, ir.argtypes, ir.sptypes, ir.linetable, cfg, new_nodes, ir.meta)
     end
-    global copy
-    copy(ir::IRCode) = new(copy(ir.stmts), copy(ir.argtypes), copy(ir.sptypes),
+    global copy(ir::IRCode) = new(copy(ir.stmts), copy(ir.argtypes), copy(ir.sptypes),
         copy(ir.linetable), copy(ir.cfg), copy(ir.new_nodes), copy(ir.meta))
 end
 
+"""
+    IRCode()
+
+Create an empty IRCode object with a single `return nothing` statement. This method is mostly intended
+for debugging and unit testing of IRCode APIs. The compiler itself should generally obtain an IRCode
+from the frontend or one of the caches.
+"""
+function IRCode()
+    ir = IRCode(InstructionStream(1), CFG([BasicBlock(1:1, Int[], Int[])], Int[1]), LineInfoNode[], Any[], Expr[], Any[])
+    ir[SSAValue(1)][:inst] = ReturnNode(nothing)
+    ir[SSAValue(1)][:type] = Nothing
+    ir[SSAValue(1)][:flag] = 0x00
+    ir[SSAValue(1)][:line] = Int32(0)
+    return ir
+end
+
 function block_for_inst(ir::IRCode, inst::Int)
     if inst > length(ir.stmts)
         inst = ir.new_nodes.info[inst - length(ir.stmts)].pos
@@ -513,36 +542,26 @@ scan_ssa_use!(@specialize(push!), used, @nospecialize(stmt)) = foreachssa(ssa::S
 # Manually specialized copy of the above with push! === Compiler.push!
 scan_ssa_use!(used::IdSet, @nospecialize(stmt)) = foreachssa(ssa::SSAValue -> push!(used, ssa.id), stmt)
 
-function insert_node!(ir::IRCode, pos::SSAValue, inst::NewInstruction, attach_after::Bool=false)
-    node = add!(ir.new_nodes, pos.id, attach_after)
-    node[:line] = something(inst.line, ir[pos][:line])
-    flag = inst.flag
-    if !inst.effect_free_computed
-        (consistent, effect_free_and_nothrow, nothrow) = stmt_effect_flags(fallback_lattice, inst.stmt, inst.type, ir)
-        if consistent
-            flag |= IR_FLAG_CONSISTENT
-        end
-        if effect_free_and_nothrow
-            flag |= IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW
-        elseif nothrow
-            flag |= IR_FLAG_NOTHROW
+function insert_node!(ir::IRCode, pos::SSAValue, newinst::NewInstruction, attach_after::Bool=false)
+    posid = pos.id
+    if pos.id > length(ir.stmts)
+        if attach_after
+            info = ir.new_nodes.info[pos.id-length(ir.stmts)];
+            posid = info.pos
+            attach_after = info.attach_after
+        else
+            error("Cannot attach before a pending node.")
         end
     end
-    node[:inst], node[:type], node[:flag] = inst.stmt, inst.type, flag
+    node = add_inst!(ir.new_nodes, posid, attach_after)
+    newline = something(newinst.line, ir[pos][:line])
+    newflag = recompute_inst_flag(newinst, ir)
+    node = inst_from_newinst!(node, newinst, newline, newflag)
     return SSAValue(length(ir.stmts) + node.idx)
 end
-insert_node!(ir::IRCode, pos::Int, inst::NewInstruction, attach_after::Bool=false) =
-    insert_node!(ir, SSAValue(pos), inst, attach_after)
+insert_node!(ir::IRCode, pos::Int, newinst::NewInstruction, attach_after::Bool=false) =
+    insert_node!(ir, SSAValue(pos), newinst, attach_after)
 
-# For bootstrapping
-function my_sortperm(v)
-    p = Vector{Int}(undef, length(v))
-    for i = 1:length(v)
-        p[i] = i
-    end
-    sort!(p, Sort.DEFAULT_UNSTABLE, Order.Perm(Sort.Forward,v))
-    p
-end
 
 mutable struct IncrementalCompact
     ir::IRCode
@@ -560,13 +579,13 @@ mutable struct IncrementalCompact
     # This supports insertion while compacting
     new_new_nodes::NewNodeStream  # New nodes that were before the compaction point at insertion time
     new_new_used_ssas::Vector{Int}
-    # TODO: Switch these two to a min-heap of some sort
     pending_nodes::NewNodeStream  # New nodes that were after the compaction point at insertion time
-    pending_perm::Vector{Int}
+    pending_perm::Vector{Int} # pending_nodes.info[pending_perm] is in min-heap order by pos
 
     # State
     idx::Int
     result_idx::Int
+    active_bb::Int
     active_result_bb::Int
     renamed_new_nodes::Bool
     cfg_transforms_enabled::Bool
@@ -574,10 +593,9 @@ mutable struct IncrementalCompact
 
     function IncrementalCompact(code::IRCode, allow_cfg_transforms::Bool=false)
         # Sort by position with attach after nodes after regular ones
-        perm = my_sortperm(Int[let new_node = code.new_nodes.info[i]
-            (new_node.pos * 2 + Int(new_node.attach_after))
-            end for i in 1:length(code.new_nodes)])
-        new_len = length(code.stmts) + length(code.new_nodes)
+        info = code.new_nodes.info
+        perm = sort!(collect(eachindex(info)); by=i->(2info[i].pos+info[i].attach_after, i))
+        new_len = length(code.stmts) + length(info)
         result = InstructionStream(new_len)
         used_ssas = fill(0, new_len)
         new_new_used_ssas = Vector{Int}()
@@ -624,13 +642,14 @@ mutable struct IncrementalCompact
         pending_perm = Int[]
         return new(code, result, result_bbs, ssa_rename, bb_rename, bb_rename, used_ssas, late_fixup, perm, 1,
             new_new_nodes, new_new_used_ssas, pending_nodes, pending_perm,
-            1, 1, 1, false, allow_cfg_transforms, allow_cfg_transforms)
+            1, 1, 1, 1, false, allow_cfg_transforms, allow_cfg_transforms)
     end
 
     # For inlining
     function IncrementalCompact(parent::IncrementalCompact, code::IRCode, result_offset)
-        perm = my_sortperm(Int[code.new_nodes.info[i].pos for i in 1:length(code.new_nodes)])
-        new_len = length(code.stmts) + length(code.new_nodes)
+        info = code.new_nodes.info
+        perm = sort!(collect(eachindex(info)); by=i->(info[i].pos, i))
+        new_len = length(code.stmts) + length(info)
         ssa_rename = Any[SSAValue(i) for i = 1:new_len]
         bb_rename = Vector{Int}()
         pending_nodes = NewNodeStream()
@@ -639,7 +658,7 @@ mutable struct IncrementalCompact
             parent.result_bbs, ssa_rename, bb_rename, bb_rename, parent.used_ssas,
             parent.late_fixup, perm, 1,
             parent.new_new_nodes, parent.new_new_used_ssas, pending_nodes, pending_perm,
-            1, result_offset, parent.active_result_bb, false, false, false)
+            1, result_offset, 1, parent.active_result_bb, false, false, false)
     end
 end
 
@@ -752,43 +771,73 @@ function dominates_ssa(compact::IncrementalCompact, domtree::DomTree, x::AnySSAV
     return dominates(domtree, xb, yb)
 end
 
+function _count_added_node!(compact,  @nospecialize(val))
+    if isa(val, SSAValue)
+        compact.used_ssas[val.id] += 1
+        return false
+    elseif isa(val, NewSSAValue)
+        @assert val.id < 0 # Newly added nodes should be canonicalized
+        compact.new_new_used_ssas[-val.id] += 1
+        return true
+    end
+    return false
+end
+
 function count_added_node!(compact::IncrementalCompact, @nospecialize(v))
     needs_late_fixup = false
     for ops in userefs(v)
-        val = ops[]
-        if isa(val, SSAValue)
-            compact.used_ssas[val.id] += 1
-        elseif isa(val, NewSSAValue)
-            @assert val.id < 0 # Newly added nodes should be canonicalized
-            compact.new_new_used_ssas[-val.id] += 1
-            needs_late_fixup = true
-        end
+        needs_late_fixup |= _count_added_node!(compact, ops[])
     end
     return needs_late_fixup
 end
 
 function add_pending!(compact::IncrementalCompact, pos::Int, attach_after::Bool)
-    node = add!(compact.pending_nodes, pos, attach_after)
-    # TODO: switch this to `l = length(pending_nodes); splice!(pending_perm, searchsorted(pending_perm, l), l)`
-    push!(compact.pending_perm, length(compact.pending_nodes))
-    sort!(compact.pending_perm, DEFAULT_STABLE, Order.By(x->compact.pending_nodes.info[x].pos, Order.Forward))
+    node = add_inst!(compact.pending_nodes, pos, attach_after)
+    heappush!(compact.pending_perm, length(compact.pending_nodes), By(x -> compact.pending_nodes.info[x].pos))
     return node
 end
 
-function insert_node!(compact::IncrementalCompact, before, inst::NewInstruction, attach_after::Bool=false)
-    @assert inst.effect_free_computed
+function inst_from_newinst!(node::Instruction, newinst::NewInstruction,
+    newline::Int32=newinst.line::Int32, newflag::UInt8=newinst.flag::UInt8)
+    node[:inst] = newinst.stmt
+    node[:type] = newinst.type
+    node[:info] = newinst.info
+    node[:line] = newline
+    node[:flag] = newflag
+    return node
+end
+
+function recompute_inst_flag(newinst::NewInstruction, src::Union{IRCode,IncrementalCompact})
+    flag = newinst.flag
+    flag !== nothing && return flag
+    flag = IR_FLAG_NULL
+    (consistent, effect_free_and_nothrow, nothrow) = stmt_effect_flags(
+        fallback_lattice, newinst.stmt, newinst.type, src)
+    if consistent
+        flag |= IR_FLAG_CONSISTENT
+    end
+    if effect_free_and_nothrow
+        flag |= IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW
+    elseif nothrow
+        flag |= IR_FLAG_NOTHROW
+    end
+    return flag
+end
+
+function insert_node!(compact::IncrementalCompact, @nospecialize(before), newinst::NewInstruction, attach_after::Bool=false)
+    newflag = newinst.flag::UInt8
     if isa(before, SSAValue)
         if before.id < compact.result_idx
-            count_added_node!(compact, inst.stmt)
-            line = something(inst.line, compact.result[before.id][:line])
-            node = add!(compact.new_new_nodes, before.id, attach_after)
+            count_added_node!(compact, newinst.stmt)
+            newline = something(newinst.line, compact.result[before.id][:line])
+            node = add_inst!(compact.new_new_nodes, before.id, attach_after)
+            node = inst_from_newinst!(node, newinst, newline, newflag)
             push!(compact.new_new_used_ssas, 0)
-            node[:inst], node[:type], node[:line], node[:flag] = inst.stmt, inst.type, line, inst.flag
             return NewSSAValue(-node.idx)
         else
-            line = something(inst.line, compact.ir.stmts[before.id][:line])
+            newline = something(newinst.line, compact.ir.stmts[before.id][:line])
             node = add_pending!(compact, before.id, attach_after)
-            node[:inst], node[:type], node[:line], node[:flag] = inst.stmt, inst.type, line, inst.flag
+            node = inst_from_newinst!(node, newinst, newline, newflag)
             os = OldSSAValue(length(compact.ir.stmts) + length(compact.ir.new_nodes) + length(compact.pending_nodes))
             push!(compact.ssa_rename, os)
             push!(compact.used_ssas, 0)
@@ -798,11 +847,11 @@ function insert_node!(compact::IncrementalCompact, before, inst::NewInstruction,
         pos = before.id
         if pos < compact.idx
             renamed = compact.ssa_rename[pos]::AnySSAValue
-            count_added_node!(compact, inst.stmt)
-            line = something(inst.line, compact.result[renamed.id][:line])
-            node = add!(compact.new_new_nodes, renamed.id, attach_after)
+            count_added_node!(compact, newinst.stmt)
+            newline = something(newinst.line, compact.result[renamed.id][:line])
+            node = add_inst!(compact.new_new_nodes, renamed.id, attach_after)
+            node = inst_from_newinst!(node, newinst, newline, newflag)
             push!(compact.new_new_used_ssas, 0)
-            node[:inst], node[:type], node[:line], node[:flag] = inst.stmt, inst.type, line, inst.flag
             return NewSSAValue(-node.idx)
         else
             if pos > length(compact.ir.stmts)
@@ -810,9 +859,9 @@ function insert_node!(compact::IncrementalCompact, before, inst::NewInstruction,
                 info = compact.pending_nodes.info[pos - length(compact.ir.stmts) - length(compact.ir.new_nodes)]
                 pos, attach_after = info.pos, info.attach_after
             end
-            line = something(inst.line, compact.ir.stmts[pos][:line])
+            newline = something(newinst.line, compact.ir.stmts[pos][:line])
             node = add_pending!(compact, pos, attach_after)
-            node[:inst], node[:type], node[:line], node[:flag] = inst.stmt, inst.type, line, inst.flag
+            node = inst_from_newinst!(node, newinst, newline, newflag)
             os = OldSSAValue(length(compact.ir.stmts) + length(compact.ir.new_nodes) + length(compact.pending_nodes))
             push!(compact.ssa_rename, os)
             push!(compact.used_ssas, 0)
@@ -821,9 +870,9 @@ function insert_node!(compact::IncrementalCompact, before, inst::NewInstruction,
     elseif isa(before, NewSSAValue)
         # TODO: This is incorrect and does not maintain ordering among the new nodes
         before_entry = compact.new_new_nodes.info[-before.id]
-        line = something(inst.line, compact.new_new_nodes.stmts[-before.id][:line])
-        new_entry = add!(compact.new_new_nodes, before_entry.pos, attach_after)
-        new_entry[:inst], new_entry[:type], new_entry[:line], new_entry[:flag] = inst.stmt, inst.type, line, inst.flag
+        newline = something(newinst.line, compact.new_new_nodes.stmts[-before.id][:line])
+        new_entry = add_inst!(compact.new_new_nodes, before_entry.pos, attach_after)
+        new_entry = inst_from_newinst!(new_entry, newinst, newline, newflag)
         push!(compact.new_new_used_ssas, 0)
         return NewSSAValue(-new_entry.idx)
     else
@@ -831,8 +880,8 @@ function insert_node!(compact::IncrementalCompact, before, inst::NewInstruction,
     end
 end
 
-function insert_node_here!(compact::IncrementalCompact, inst::NewInstruction, reverse_affinity::Bool=false)
-    @assert inst.line !== nothing
+function insert_node_here!(compact::IncrementalCompact, newinst::NewInstruction, reverse_affinity::Bool=false)
+    newline = newinst.line::Int32
     refinish = false
     result_idx = compact.result_idx
     if reverse_affinity &&
@@ -845,21 +894,9 @@ function insert_node_here!(compact::IncrementalCompact, inst::NewInstruction, re
         @assert result_idx == length(compact.result) + 1
         resize!(compact, result_idx)
     end
-    flag = inst.flag
-    if !inst.effect_free_computed
-        (consistent, effect_free_and_nothrow, nothrow) = stmt_effect_flags(fallback_lattice, inst.stmt, inst.type, compact)
-        if consistent
-            flag |= IR_FLAG_CONSISTENT
-        end
-        if effect_free_and_nothrow
-            flag |= IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW
-        elseif nothrow
-            flag |= IR_FLAG_NOTHROW
-        end
-    end
-    node = compact.result[result_idx]
-    node[:inst], node[:type], node[:line], node[:flag] = inst.stmt, inst.type, inst.line, flag
-    count_added_node!(compact, inst.stmt) && push!(compact.late_fixup, result_idx)
+    newflag = recompute_inst_flag(newinst, compact)
+    node = inst_from_newinst!(compact.result[result_idx], newinst, newline, newflag)
+    count_added_node!(compact, newinst.stmt) && push!(compact.late_fixup, result_idx)
     compact.result_idx = result_idx + 1
     inst = SSAValue(result_idx)
     refinish && finish_current_bb!(compact, 0)
@@ -881,17 +918,20 @@ function getindex(view::TypesView, v::OldSSAValue)
     return view.ir.pending_nodes.stmts[id][:type]
 end
 
-function kill_current_uses(compact::IncrementalCompact, @nospecialize(stmt))
+function kill_current_use!(compact::IncrementalCompact, @nospecialize(val))
+    if isa(val, SSAValue)
+        @assert compact.used_ssas[val.id] >= 1
+        compact.used_ssas[val.id] -= 1
+    elseif isa(val, NewSSAValue)
+        @assert val.id < 0
+        @assert compact.new_new_used_ssas[-val.id] >= 1
+        compact.new_new_used_ssas[-val.id] -= 1
+    end
+end
+
+function kill_current_uses!(compact::IncrementalCompact, @nospecialize(stmt))
     for ops in userefs(stmt)
-        val = ops[]
-        if isa(val, SSAValue)
-            @assert compact.used_ssas[val.id] >= 1
-            compact.used_ssas[val.id] -= 1
-        elseif isa(val, NewSSAValue)
-            @assert val.id < 0
-            @assert compact.new_new_used_ssas[-val.id] >= 1
-            compact.new_new_used_ssas[-val.id] -= 1
-        end
+        kill_current_use!(compact, ops[])
     end
 end
 
@@ -899,7 +939,7 @@ function setindex!(compact::IncrementalCompact, @nospecialize(v), idx::SSAValue)
     @assert idx.id < compact.result_idx
     (compact.result[idx.id][:inst] === v) && return
     # Kill count for current uses
-    kill_current_uses(compact, compact.result[idx.id][:inst])
+    kill_current_uses!(compact, compact.result[idx.id][:inst])
     compact.result[idx.id][:inst] = v
     # Add count for new use
     count_added_node!(compact, v) && push!(compact.late_fixup, idx.id)
@@ -911,7 +951,7 @@ function setindex!(compact::IncrementalCompact, @nospecialize(v), idx::OldSSAVal
     if id < compact.idx
         new_idx = compact.ssa_rename[id]
         (compact.result[new_idx][:inst] === v) && return
-        kill_current_uses(compact, compact.result[new_idx][:inst])
+        kill_current_uses!(compact, compact.result[new_idx][:inst])
         compact.result[new_idx][:inst] = v
         count_added_node!(compact, v) && push!(compact.late_fixup, new_idx)
         return compact
@@ -941,21 +981,63 @@ end
 __set_check_ssa_counts(onoff::Bool) = __check_ssa_counts__[] = onoff
 const __check_ssa_counts__ = fill(false)
 
+should_check_ssa_counts() = __check_ssa_counts__[]
+
+# specifically meant to be used with body1 = compact.result and body2 = compact.new_new_nodes, with nvals == length(compact.used_ssas)
+function find_ssavalue_uses1(compact)
+    body1, body2 = compact.result.inst, compact.new_new_nodes.stmts.inst
+    nvals = length(compact.used_ssas)
+    nvalsnew = length(compact.new_new_used_ssas)
+    nbody1 = compact.result_idx
+    nbody2 = length(body2)
+
+    uses = zeros(Int, nvals)
+    usesnew = zeros(Int, nvalsnew)
+    function increment_uses(ssa::AnySSAValue)
+        if isa(ssa, NewSSAValue)
+            usesnew[-ssa.id] += 1
+        elseif isa(ssa, SSAValue)
+            uses[ssa.id] += 1
+        end
+    end
+
+    for line in 1:(nbody1 + nbody2)
+        # index into the right body
+        if line <= nbody1
+            isassigned(body1, line) || continue
+            e = body1[line]
+        else
+            line -= nbody1
+            isassigned(body2, line) || continue
+            e = body2[line]
+        end
+
+        foreach_anyssa(increment_uses, e)
+    end
+
+    return (uses, usesnew)
+end
+
 function _oracle_check(compact::IncrementalCompact)
-    observed_used_ssas = Core.Compiler.find_ssavalue_uses1(compact)
+    (observed_used_ssas, observed_used_newssas) = Core.Compiler.find_ssavalue_uses1(compact)
     for i = 1:length(observed_used_ssas)
         if observed_used_ssas[i] != compact.used_ssas[i]
-            return observed_used_ssas
+            return (observed_used_ssas, observed_used_newssas, SSAValue(i))
         end
     end
-    return nothing
+    for i = 1:length(observed_used_newssas)
+        if observed_used_newssas[i] != compact.new_new_used_ssas[i]
+            return (observed_used_ssas, observed_used_newssas, NewSSAValue(i))
+        end
+    end
+    return (nothing, nothing, 0)
 end
 
 function oracle_check(compact::IncrementalCompact)
-    maybe_oracle_used_ssas = _oracle_check(compact)
+    (maybe_oracle_used_ssas, observed_used_newssas, oracle_error_ssa) = _oracle_check(compact)
     if maybe_oracle_used_ssas !== nothing
-        @eval Main (compact = $compact; oracle_used_ssas = $maybe_oracle_used_ssas)
-        error("Oracle check failed, inspect Main.compact and Main.oracle_used_ssas")
+        @eval Main (compact = $compact; oracle_used_ssas = $maybe_oracle_used_ssas; observed_used_newssas = $observed_used_newssas; oracle_error_ssa = $(QuoteNode(oracle_error_ssa)))
+        error("Oracle check failed, inspect Main.{compact, oracle_used_ssas, observed_used_newssas, oracle_error_ssa}")
     end
 end
 
@@ -1178,6 +1260,7 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
             if !isa(cond, Bool)
                 condT = widenconditional(argextype(cond, compact))
                 isa(condT, Const) || @goto bail
+                kill_current_use!(compact, cond)
                 cond = condT.val
                 isa(cond, Bool) || @goto bail
             end
@@ -1205,6 +1288,13 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
             label = compact.bb_rename_succ[stmt.args[1]::Int]
             @assert label > 0
             stmt.args[1] = label
+        elseif isexpr(stmt, :throw_undef_if_not)
+            cond = stmt.args[2]
+            if isa(cond, Bool) && cond === true
+                # cond was folded to true - this statement
+                # is dead.
+                return result_idx
+            end
         end
         result[result_idx][:inst] = stmt
         result_idx += 1
@@ -1257,7 +1347,7 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
             values = Vector{Any}(undef, length(stmt.values))
             new_index = 1
             for old_index in 1:length(stmt.edges)
-                if stmt.edges[old_index] != -1
+                if stmt.edges[old_index] > 0
                     edges[new_index] = stmt.edges[old_index]
                     if isassigned(stmt.values, old_index)
                         values[new_index] = stmt.values[old_index]
@@ -1377,7 +1467,7 @@ function process_newnode!(compact::IncrementalCompact, new_idx::Int, new_node_en
         active_bb += 1
         finish_current_bb!(compact, active_bb, old_result_idx)
     end
-    return (new_idx, old_result_idx, result_idx, idx, active_bb)
+    return (old_result_idx, result_idx, active_bb)
 end
 
 struct CompactPeekIterator
@@ -1420,18 +1510,22 @@ function iterate(it::CompactPeekIterator, (idx, aidx, bidx)::NTuple{3, Int}=(it.
     return (compact.ir.stmts[idx][:inst], (idx + 1, aidx, bidx))
 end
 
-# This Union{Nothing, Pair{Pair{Int,Int},Any}} cannot be stack allocated, so we inline it
-@inline function iterate(compact::IncrementalCompact, st::Tuple{Int, Int}=(compact.idx, 1))
-    st = iterate_compact(compact, st)
-    st === nothing && return nothing
-    old_result_idx = st[1][2]
-    return Pair{Pair{Int,Int},Any}(st[1], compact.result[old_result_idx][:inst]), st[2]
+# the returned Union{Nothing, Pair{Pair{Int,Int},Any}} cannot be stack allocated,
+# so we inline this function into the caller
+@inline function iterate(compact::IncrementalCompact, state=nothing)
+    idxs = iterate_compact(compact)
+    idxs === nothing && return nothing
+    old_result_idx = idxs[2]
+    return Pair{Pair{Int,Int},Any}(idxs, compact.result[old_result_idx][:inst]), nothing
 end
 
-function iterate_compact(compact::IncrementalCompact, (idx, active_bb)::Tuple{Int, Int})
+function iterate_compact(compact::IncrementalCompact)
     # Create label to dodge recursion so that we don't stack overflow
     @label restart
 
+    idx = compact.idx
+    active_bb = compact.active_bb
+
     old_result_idx = compact.result_idx
     if idx > length(compact.ir.stmts) && (compact.new_nodes_idx > length(compact.perm))
         return nothing
@@ -1456,14 +1550,15 @@ function iterate_compact(compact::IncrementalCompact, (idx, active_bb)::Tuple{In
             if !(info.attach_after ? info.pos <= compact.idx - 1 : info.pos <= compact.idx)
                 break
             end
-            popfirst!(compact.pending_perm)
+            heappop!(compact.pending_perm, By(x -> compact.pending_nodes.info[x].pos))
         end
         # Move to next block
         compact.idx += 1
+        compact.active_bb += 1
         if finish_current_bb!(compact, active_bb, old_result_idx, true)
-            return iterate_compact(compact, (compact.idx, active_bb + 1))
+            return iterate_compact(compact)
         else
-            return Pair{Int,Int}(compact.idx-1, old_result_idx), (compact.idx, active_bb + 1)
+            return Pair{Int,Int}(compact.idx-1, old_result_idx)
         end
     end
     if compact.new_nodes_idx <= length(compact.perm) &&
@@ -1474,21 +1569,23 @@ function iterate_compact(compact::IncrementalCompact, (idx, active_bb)::Tuple{In
         new_node_entry = compact.ir.new_nodes.stmts[new_idx]
         new_node_info = compact.ir.new_nodes.info[new_idx]
         new_idx += length(compact.ir.stmts)
-        (new_idx, old_result_idx, result_idx, idx, active_bb) =
+        (old_result_idx, result_idx, active_bb) =
                 process_newnode!(compact, new_idx, new_node_entry, new_node_info, idx, active_bb, true)
+        compact.active_bb = active_bb
         old_result_idx == result_idx && @goto restart
-        return Pair{Int,Int}(new_idx, old_result_idx), (idx, active_bb)
+        return Pair{Int,Int}(new_idx, old_result_idx)
     elseif !isempty(compact.pending_perm) &&
         (info = compact.pending_nodes.info[compact.pending_perm[1]];
          info.attach_after ? info.pos == idx - 1 : info.pos == idx)
-        new_idx = popfirst!(compact.pending_perm)
+        new_idx = heappop!(compact.pending_perm, By(x -> compact.pending_nodes.info[x].pos))
         new_node_entry = compact.pending_nodes.stmts[new_idx]
         new_node_info = compact.pending_nodes.info[new_idx]
         new_idx += length(compact.ir.stmts) + length(compact.ir.new_nodes)
-        (new_idx, old_result_idx, result_idx, idx, active_bb) =
+        (old_result_idx, result_idx, active_bb) =
                 process_newnode!(compact, new_idx, new_node_entry, new_node_info, idx, active_bb, false)
+        compact.active_bb = active_bb
         old_result_idx == result_idx && @goto restart
-        return Pair{Int,Int}(new_idx, old_result_idx), (idx, active_bb)
+        return Pair{Int,Int}(new_idx, old_result_idx)
     end
     # This will get overwritten in future iterations if
     # result_idx is not, incremented, but that's ok and expected
@@ -1500,12 +1597,13 @@ function iterate_compact(compact::IncrementalCompact, (idx, active_bb)::Tuple{In
         active_bb += 1
     end
     compact.idx = idx + 1
+    compact.active_bb = active_bb
     if old_result_idx == compact.result_idx
         idx += 1
         @goto restart
     end
     @assert isassigned(compact.result.inst, old_result_idx)
-    return Pair{Int,Int}(compact.idx-1, old_result_idx), (compact.idx, active_bb)
+    return Pair{Int,Int}(compact.idx-1, old_result_idx)
 end
 
 function maybe_erase_unused!(
@@ -1556,7 +1654,6 @@ function fixup_phinode_values!(compact::IncrementalCompact, old_values::Vector{A
     return (values, fixup)
 end
 
-
 function fixup_node(compact::IncrementalCompact, @nospecialize(stmt), reify_new_nodes::Bool)
     if isa(stmt, PhiNode)
         (node, needs_fixup) = fixup_phinode_values!(compact, stmt.values, reify_new_nodes)
@@ -1660,7 +1757,7 @@ end
 function complete(compact::IncrementalCompact)
     result_bbs = resize!(compact.result_bbs, compact.active_result_bb-1)
     cfg = CFG(result_bbs, Int[first(result_bbs[i].stmts) for i in 2:length(result_bbs)])
-    if __check_ssa_counts__[]
+    if should_check_ssa_counts()
         oracle_check(compact)
     end
 
@@ -1709,10 +1806,10 @@ abstract type Inserter; end
 struct InsertHere <: Inserter
     compact::IncrementalCompact
 end
-(i::InsertHere)(new_inst::NewInstruction) = insert_node_here!(i.compact, new_inst)
+(i::InsertHere)(newinst::NewInstruction) = insert_node_here!(i.compact, newinst)
 
 struct InsertBefore{T<:Union{IRCode, IncrementalCompact}} <: Inserter
     src::T
     pos::SSAValue
 end
-(i::InsertBefore)(new_inst::NewInstruction) = insert_node!(i.src, i.pos, new_inst)
+(i::InsertBefore)(newinst::NewInstruction) = insert_node!(i.src, i.pos, newinst)
diff --git a/base/compiler/ssair/irinterp.jl b/base/compiler/ssair/irinterp.jl
index bef82684f735a..e5aeec35f37f2 100644
--- a/base/compiler/ssair/irinterp.jl
+++ b/base/compiler/ssair/irinterp.jl
@@ -119,7 +119,7 @@ end
 function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
                                   arginfo::ArgInfo, si::StmtInfo, @nospecialize(atype),
                                   sv::IRCode, max_methods::Int)
-    return CallMeta(Any, Effects(), false)
+    return CallMeta(Any, Effects(), NoCallInfo())
 end
 
 function collect_limitations!(@nospecialize(typ), ::IRCode)
@@ -133,6 +133,7 @@ function concrete_eval_invoke(interp::AbstractInterpreter,
     code = get(mi_cache, mi, nothing)
     code === nothing && return nothing
     argtypes = collect_argtypes(interp, inst.args[2:end], nothing, irsv.ir)
+    argtypes === nothing && return Union{}
     effects = decode_effects(code.ipo_purity_bits)
     if is_foldable(effects) && is_all_const_arg(argtypes, #=start=#1)
         args = collect_const_args(argtypes, #=start=#1)
@@ -164,15 +165,16 @@ function reprocess_instruction!(interp::AbstractInterpreter,
     irsv::IRInterpretationState)
     ir = irsv.ir
     if isa(inst, GotoIfNot)
-        cond = argextype(inst.cond, ir)
-        if isa(cond, Const)
+        cond = inst.cond
+        condval = maybe_extract_const_bool(argextype(cond, ir))
+        if condval isa Bool
             function update_phi!(from::Int, to::Int)
                 if length(ir.cfg.blocks[to].preds) == 0
                     return
                 end
                 for idx in ir.cfg.blocks[to].stmts
                     stmt = ir.stmts[idx][:inst]
-                    isa(stmt, Nothing) && continue
+                    isa(stmt, Nothing) && continue # allowed between `PhiNode`s
                     isa(stmt, PhiNode) || break
                     for (i, edge) in enumerate(stmt.edges)
                         if edge == from
@@ -184,14 +186,15 @@ function reprocess_instruction!(interp::AbstractInterpreter,
                     end
                 end
             end
-            if isa(inst.cond, SSAValue)
-                kill_def_use!(irsv.tpdum, inst.cond::SSAValue, idx)
+            if isa(cond, SSAValue)
+                kill_def_use!(irsv.tpdum, cond, idx)
             end
             if bb === nothing
                 bb = block_for_inst(ir, idx)
             end
-            if (cond.val)::Bool
+            if condval
                 ir.stmts[idx][:inst] = nothing
+                ir.stmts[idx][:type] = Any
                 kill_edge!(ir, bb, inst.dest, update_phi!)
             else
                 ir.stmts[idx][:inst] = GotoNode(inst.dest)
@@ -204,7 +207,8 @@ function reprocess_instruction!(interp::AbstractInterpreter,
 
     rt = nothing
     if isa(inst, Expr)
-        if inst.head === :call || inst.head === :foreigncall || inst.head === :new
+        head = inst.head
+        if head === :call || head === :foreigncall || head === :new
             (; rt, effects) = abstract_eval_statement_expr(interp, inst, nothing, ir, irsv.mi)
             # All other effects already guaranteed effect free by construction
             if is_nothrow(effects)
@@ -214,14 +218,18 @@ function reprocess_instruction!(interp::AbstractInterpreter,
                     ir.stmts[idx][:flag] |= IR_FLAG_EFFECT_FREE
                 end
             end
-        elseif inst.head === :invoke
+        elseif head === :invoke
             mi′ = inst.args[1]::MethodInstance
             if mi′ !== irsv.mi # prevent infinite loop
                 rt = concrete_eval_invoke(interp, inst, mi′, irsv)
             end
+        elseif head === :throw_undef_if_not || # TODO: Terminate interpretation early if known false?
+               head === :gc_preserve_begin ||
+               head === :gc_preserve_end
+            return false
         else
             ccall(:jl_, Cvoid, (Any,), inst)
-            error()
+            error("reprocess_instruction!: unhandled expression found")
         end
     elseif isa(inst, PhiNode)
         rt = abstract_eval_phi_stmt(interp, inst, idx, irsv)
@@ -229,7 +237,7 @@ function reprocess_instruction!(interp::AbstractInterpreter,
         # Handled at the very end
         return false
     elseif isa(inst, PiNode)
-        rt = tmeet(argextype(inst.val, ir), inst.typ)
+        rt = tmeet(typeinf_lattice(interp), argextype(inst.val, ir), inst.typ)
     else
         ccall(:jl_, Cvoid, (Any,), inst)
         error()
@@ -241,6 +249,36 @@ function reprocess_instruction!(interp::AbstractInterpreter,
     return false
 end
 
+# Process the terminator and add the successor to `ip`. Returns whether a backedge was seen.
+function process_terminator!(ir::IRCode, idx::Int, bb::Int,
+    all_rets::Vector{Int}, ip::BitSetBoundedMinPrioritySet)
+    inst = ir.stmts[idx][:inst]
+    if isa(inst, ReturnNode)
+        if isdefined(inst, :val)
+            push!(all_rets, idx)
+        end
+        return false
+    elseif isa(inst, GotoNode)
+        backedge = inst.label < bb
+        !backedge && push!(ip, inst.label)
+        return backedge
+    elseif isa(inst, GotoIfNot)
+        backedge = inst.dest < bb
+        !backedge && push!(ip, inst.dest)
+        push!(ip, bb + 1)
+        return backedge
+    elseif isexpr(inst, :enter)
+        dest = inst.args[1]::Int
+        @assert dest > bb
+        push!(ip, dest)
+        push!(ip, bb + 1)
+        return false
+    else
+        push!(ip, bb + 1)
+        return false
+    end
+end
+
 function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IRInterpretationState;
     extra_reprocess::Union{Nothing,BitSet} = nothing)
     (; ir, tpdum, ssa_refined) = irsv
@@ -250,40 +288,6 @@ function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IR
     push!(ip, 1)
     all_rets = Int[]
 
-    """
-        process_terminator!
-
-    Process the terminator and add the successor to `ip`. Returns whether a
-    backedge was seen.
-    """
-    function process_terminator!(ip::BitSetBoundedMinPrioritySet, bb::Int, idx::Int)
-        inst = ir.stmts[idx][:inst]
-        if isa(inst, ReturnNode)
-            if isdefined(inst, :val)
-                push!(all_rets, idx)
-            end
-            return false
-        elseif isa(inst, GotoNode)
-            backedge = inst.label < bb
-            !backedge && push!(ip, inst.label)
-            return backedge
-        elseif isa(inst, GotoIfNot)
-            backedge = inst.dest < bb
-            !backedge && push!(ip, inst.dest)
-            push!(ip, bb + 1)
-            return backedge
-        elseif isexpr(inst, :enter)
-            dest = inst.args[1]::Int
-            @assert dest > bb
-            push!(ip, dest)
-            push!(ip, bb + 1)
-            return false
-        else
-            push!(ip, bb + 1)
-            return false
-        end
-    end
-
     # Fast path: Scan both use counts and refinement in one single pass of
     #            of the instructions. In the absence of backedges, this will
     #            converge.
@@ -312,7 +316,7 @@ function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IR
                 idx, bb, inst, typ, irsv)
                 push!(ssa_refined, idx)
             end
-            if idx == lstmt && process_terminator!(ip, bb, idx)
+            if idx == lstmt && process_terminator!(ir, idx, bb, all_rets, ip)
                 @goto residual_scan
             end
             if typ === Bottom && !isa(inst, PhiNode)
@@ -343,7 +347,7 @@ function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IR
                         count!(tpdum, val)
                     end
                 end
-                idx == lstmt && process_terminator!(ip, bb, idx)
+                idx == lstmt && process_terminator!(ir, idx, bb, all_rets, ip)
             end
         end
 
@@ -362,7 +366,7 @@ function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IR
                         push!(tpdum[val.id], idx)
                     end
                 end
-                idx == lstmt && process_terminator!(ip, bb, idx)
+                idx == lstmt && process_terminator!(ir, idx, bb, all_rets, ip)
             end
         end
 
diff --git a/base/compiler/ssair/legacy.jl b/base/compiler/ssair/legacy.jl
index f8fce66bde49c..4a8e299179ecb 100644
--- a/base/compiler/ssair/legacy.jl
+++ b/base/compiler/ssair/legacy.jl
@@ -39,7 +39,7 @@ function inflate_ir!(ci::CodeInfo, sptypes::Vector{Any}, argtypes::Vector{Any})
     if !isa(ssavaluetypes, Vector{Any})
         ssavaluetypes = Any[ Any for i = 1:ssavaluetypes::Int ]
     end
-    info = Any[nothing for i = 1:nstmts]
+    info = CallInfo[NoCallInfo() for i = 1:nstmts]
     stmts = InstructionStream(code, ssavaluetypes, info, ci.codelocs, ci.ssaflags)
     linetable = ci.linetable
     if !isa(linetable, Vector{LineInfoNode})
diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl
index d594112b239e3..87a6fe6fec21a 100644
--- a/base/compiler/ssair/passes.jl
+++ b/base/compiler/ssair/passes.jl
@@ -72,7 +72,7 @@ function try_compute_fieldidx_stmt(ir::Union{IncrementalCompact,IRCode}, stmt::E
     return try_compute_fieldidx(typ, field)
 end
 
-function find_curblock(domtree::DomTree, allblocks::Vector{Int}, curblock::Int)
+function find_curblock(domtree::DomTree, allblocks::BitSet, curblock::Int)
     # TODO: This can be much faster by looking at current level and only
     # searching for those blocks in a sorted order
     while !(curblock in allblocks) && curblock !== 0
@@ -92,7 +92,7 @@ function val_for_def_expr(ir::IRCode, def::Int, fidx::Int)
     end
 end
 
-function compute_value_for_block(ir::IRCode, domtree::DomTree, allblocks::Vector{Int}, du::SSADefUse, phinodes::IdDict{Int, SSAValue}, fidx::Int, curblock::Int)
+function compute_value_for_block(ir::IRCode, domtree::DomTree, allblocks::BitSet, du::SSADefUse, phinodes::IdDict{Int, SSAValue}, fidx::Int, curblock::Int)
     curblock = find_curblock(domtree, allblocks, curblock)
     def = 0
     for stmt in du.defs
@@ -103,7 +103,7 @@ function compute_value_for_block(ir::IRCode, domtree::DomTree, allblocks::Vector
     def == 0 ? phinodes[curblock] : val_for_def_expr(ir, def, fidx)
 end
 
-function compute_value_for_use(ir::IRCode, domtree::DomTree, allblocks::Vector{Int},
+function compute_value_for_use(ir::IRCode, domtree::DomTree, allblocks::BitSet,
     du::SSADefUse, phinodes::IdDict{Int, SSAValue}, fidx::Int, use::Int)
     def, useblock, curblock = find_def_for_use(ir, domtree, allblocks, du, use)
     if def == 0
@@ -122,7 +122,7 @@ end
 # even when the allocation contains an uninitialized field, we try an extra effort to check
 # if this load at `idx` have any "safe" `setfield!` calls that define the field
 function has_safe_def(
-    ir::IRCode, domtree::DomTree, allblocks::Vector{Int}, du::SSADefUse,
+    ir::IRCode, domtree::DomTree, allblocks::BitSet, du::SSADefUse,
     newidx::Int, idx::Int)
     def, _, _ = find_def_for_use(ir, domtree, allblocks, du, idx)
     # will throw since we already checked this `:new` site doesn't define this field
@@ -157,7 +157,7 @@ end
 
 # find the first dominating def for the given use
 function find_def_for_use(
-    ir::IRCode, domtree::DomTree, allblocks::Vector{Int}, du::SSADefUse, use::Int, inclusive::Bool=false)
+    ir::IRCode, domtree::DomTree, allblocks::BitSet, du::SSADefUse, use::Int, inclusive::Bool=false)
     useblock = block_for_inst(ir.cfg, use)
     curblock = find_curblock(domtree, allblocks, useblock)
     local def = 0
@@ -401,6 +401,16 @@ function lift_leaves(compact::IncrementalCompact,
                 end
                 lift_arg!(compact, leaf, cache_key, def, 1+field, lifted_leaves)
                 continue
+            # NOTE we can enable this, but most `:splatnew` expressions are transformed into
+            #      `:new` expressions by the inlinear
+            # elseif isexpr(def, :splatnew) && length(def.args) == 2 && isa(def.args[2], AnySSAValue)
+            #     tplssa = def.args[2]::AnySSAValue
+            #     tplexpr = compact[tplssa][:inst]
+            #     if is_known_call(tplexpr, tuple, compact) && 1 ≤ field < length(tplexpr.args)
+            #         lift_arg!(compact, tplssa, cache_key, tplexpr, 1+field, lifted_leaves)
+            #         continue
+            #     end
+            #     return nothing
             elseif is_getfield_captures(def, compact)
                 # Walk to new_opaque_closure
                 ocleaf = def.args[2]
@@ -469,7 +479,7 @@ function lift_arg!(
         end
     end
     lifted_leaves[cache_key] = LiftedValue(lifted)
-    nothing
+    return nothing
 end
 
 function walk_to_def(compact::IncrementalCompact, @nospecialize(leaf))
@@ -509,7 +519,8 @@ end
 function lift_comparison! end
 
 function lift_comparison!(::typeof(===), compact::IncrementalCompact,
-    idx::Int, stmt::Expr, lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue})
+    idx::Int, stmt::Expr, lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue},
+    opt_lattice::AbstractLattice = OptimizerLattice())
     args = stmt.args
     length(args) == 3 || return
     lhs, rhs = args[2], args[3]
@@ -528,19 +539,20 @@ function lift_comparison!(::typeof(===), compact::IncrementalCompact,
     lift_comparison_leaves!(egal_tfunc, compact, val, cmp, lifting_cache, idx)
 end
 
-isa_tfunc_opt(@nospecialize(v), @nospecialize(t)) = isa_tfunc(OptimizerLattice(), v, t)
-
 function lift_comparison!(::typeof(isa), compact::IncrementalCompact,
-    idx::Int, stmt::Expr, lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue})
+    idx::Int, stmt::Expr, lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue},
+    opt_lattice::AbstractLattice = OptimizerLattice())
     args = stmt.args
     length(args) == 3 || return
     cmp = argextype(args[3], compact)
     val = args[2]
+    isa_tfunc_opt(@nospecialize(v), @nospecialize(typ)) = isa_tfunc(opt_lattice, v, typ)
     lift_comparison_leaves!(isa_tfunc_opt, compact, val, cmp, lifting_cache, idx)
 end
 
 function lift_comparison!(::typeof(isdefined), compact::IncrementalCompact,
-    idx::Int, stmt::Expr, lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue})
+    idx::Int, stmt::Expr, lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue},
+    opt_lattice::AbstractLattice = OptimizerLattice())
     args = stmt.args
     length(args) == 3 || return
     cmp = argextype(args[3], compact)
@@ -679,6 +691,7 @@ function perform_lifting!(compact::IncrementalCompact,
         (old_node_ssa, lf) = visited_phinodes[i], lifted_phis[i]
         old_node = compact[old_node_ssa][:inst]::PhiNode
         new_node = lf.node
+        should_count = !isa(lf.ssa, OldSSAValue) || already_inserted(compact, lf.ssa)
         lf.need_argupdate || continue
         for i = 1:length(old_node.edges)
             edge = old_node.edges[i]
@@ -702,15 +715,17 @@ function perform_lifting!(compact::IncrementalCompact,
                     callback = (@nospecialize(pi), @nospecialize(idx)) -> true
                     val = simple_walk(compact, val, callback)
                 end
+                should_count && _count_added_node!(compact, val)
                 push!(new_node.values, val)
             elseif isa(val, AnySSAValue) && val in keys(reverse_mapping)
                 push!(new_node.edges, edge)
-                push!(new_node.values, lifted_phis[reverse_mapping[val]].ssa)
+                newval = lifted_phis[reverse_mapping[val]].ssa
+                should_count && _count_added_node!(compact, newval)
+                push!(new_node.values, newval)
             else
                 # Probably ignored by path condition, skip this
             end
         end
-        count_added_node!(compact, new_node)
     end
 
     # Fixup the stmt itself
@@ -837,6 +852,7 @@ In a case when all usages are fully eliminated, `struct` allocation may also be
 a result of succeeding dead code elimination.
 """
 function sroa_pass!(ir::IRCode, inlining::Union{Nothing, InliningState} = nothing)
+    opt_lattice = inlining === nothing ? OptimizerLattice() : optimizer_lattice(inlining.interp)
     compact = IncrementalCompact(ir)
     defuses = nothing # will be initialized once we encounter mutability in order to reduce dynamic allocations
     lifting_cache = IdDict{Pair{AnySSAValue, Any}, AnySSAValue}()
@@ -930,9 +946,9 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing, InliningState} = nothin
             elseif is_known_call(stmt, Core._svec_ref, compact)
                 lift_svec_ref!(compact, idx, stmt)
             elseif is_known_call(stmt, (===), compact)
-                lift_comparison!(===, compact, idx, stmt, lifting_cache)
+                lift_comparison!(===, compact, idx, stmt, lifting_cache, opt_lattice)
             elseif is_known_call(stmt, isa, compact)
-                lift_comparison!(isa, compact, idx, stmt, lifting_cache)
+                lift_comparison!(isa, compact, idx, stmt, lifting_cache, opt_lattice)
             end
             continue
         end
@@ -952,7 +968,7 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing, InliningState} = nothin
             struct_typ = unswitchtupleunion(struct_typ)
         end
         if isa(struct_typ, Union) && is_isdefined
-            lift_comparison!(isdefined, compact, idx, stmt, lifting_cache)
+            lift_comparison!(isdefined, compact, idx, stmt, lifting_cache, opt_lattice)
             continue
         end
         isa(struct_typ, DataType) || continue
@@ -1048,8 +1064,10 @@ end
 
 # NOTE we resolve the inlining source here as we don't want to serialize `Core.Compiler`
 # data structure into the global cache (see the comment in `handle_finalizer_call!`)
-function try_inline_finalizer!(ir::IRCode, argexprs::Vector{Any}, idx::Int, mi::MethodInstance, inlining::InliningState, attach_after::Bool)
-    code = get(inlining.mi_cache, mi, nothing)
+function try_inline_finalizer!(ir::IRCode, argexprs::Vector{Any}, idx::Int,
+    mi::MethodInstance, @nospecialize(info::CallInfo), inlining::InliningState,
+    attach_after::Bool)
+    code = get(code_cache(inlining), mi, nothing)
     et = InliningEdgeTracker(inlining.et)
     if code isa CodeInstance
         if use_const_api(code)
@@ -1062,7 +1080,7 @@ function try_inline_finalizer!(ir::IRCode, argexprs::Vector{Any}, idx::Int, mi::
         src = code
     end
 
-    src = inlining_policy(inlining.interp, src, IR_FLAG_NULL, mi, Any[])
+    src = inlining_policy(inlining.interp, src, info, IR_FLAG_NULL, mi, Any[])
     src === nothing && return false
     src = retrieve_ir_for_inlining(mi, src)
 
@@ -1088,7 +1106,9 @@ function try_inline_finalizer!(ir::IRCode, argexprs::Vector{Any}, idx::Int, mi::
             ssa_rename[ssa.id]
         end
         stmt′ = ssa_substitute_op!(InsertBefore(ir, SSAValue(idx)), inst, stmt′, argexprs, mi.specTypes, mi.sparam_vals, sp_ssa, :default)
-        ssa_rename[idx′] = insert_node!(ir, idx, NewInstruction(stmt′, inst; line = inst[:line] + linetable_offset), attach_after)
+        ssa_rename[idx′] = insert_node!(ir, idx,
+            NewInstruction(inst; stmt=stmt′, line=inst[:line]+linetable_offset),
+            attach_after)
     end
 
     return true
@@ -1116,7 +1136,7 @@ end
 
 function try_resolve_finalizer!(ir::IRCode, idx::Int, finalizer_idx::Int, defuse::SSADefUse,
         inlining::InliningState, lazydomtree::LazyDomtree,
-        lazypostdomtree::LazyPostDomtree, info::Union{FinalizerInfo, Nothing})
+        lazypostdomtree::LazyPostDomtree, @nospecialize(info::CallInfo))
     # For now, require that:
     # 1. The allocation dominates the finalizer registration
     # 2. The finalizer registration dominates all uses reachable from the
@@ -1142,7 +1162,7 @@ function try_resolve_finalizer!(ir::IRCode, idx::Int, finalizer_idx::Int, defuse
             bb_insert_block, usebb)
         if new_bb_insert_block == bb_insert_block == usebb
             if bb_insert_idx !== nothing
-                bb_insert_idx = max(bb_insert_idx, useidx)
+                bb_insert_idx = max(bb_insert_idx::Int, useidx)
             else
                 bb_insert_idx = useidx
             end
@@ -1212,14 +1232,14 @@ function try_resolve_finalizer!(ir::IRCode, idx::Int, finalizer_idx::Int, defuse
 
     finalizer_stmt = ir[SSAValue(finalizer_idx)][:inst]
     argexprs = Any[finalizer_stmt.args[2], finalizer_stmt.args[3]]
-    flags = info === nothing ? UInt8(0) : flags_for_effects(info.effects)
+    flags = info isa FinalizerInfo ? flags_for_effects(info.effects) : IR_FLAG_NULL
     if length(finalizer_stmt.args) >= 4
         inline = finalizer_stmt.args[4]
         if inline === nothing
             # No code in the function - Nothing to do
         else
             mi = finalizer_stmt.args[5]::MethodInstance
-            if inline::Bool && try_inline_finalizer!(ir, argexprs, loc, mi, inlining, attach_after)
+            if inline::Bool && try_inline_finalizer!(ir, argexprs, loc, mi, info, inlining, attach_after)
                 # the finalizer body has been inlined
             else
                 insert_node!(ir, loc, with_flags(NewInstruction(Expr(:invoke, mi, argexprs...), Nothing), flags), attach_after)
@@ -1306,7 +1326,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
         # but we should come up with semantics for well defined semantics
         # for uninitialized fields first.
         ndefuse = length(fielddefuse)
-        blocks = Vector{Tuple{#=phiblocks=# Vector{Int}, #=allblocks=# Vector{Int}}}(undef, ndefuse)
+        blocks = Vector{Tuple{#=phiblocks=# Vector{Int}, #=allblocks=# BitSet}}(undef, ndefuse)
         for fidx in 1:ndefuse
             du = fielddefuse[fidx]
             isempty(du.uses) && continue
@@ -1317,7 +1337,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
             else
                 phiblocks = iterated_dominance_frontier(ir.cfg, ldu, get!(lazydomtree))
             end
-            allblocks = sort!(vcat(phiblocks, ldu.def_bbs); alg=QuickSort)
+            allblocks = union!(BitSet(phiblocks), ldu.def_bbs)
             blocks[fidx] = phiblocks, allblocks
             if fidx + 1 > length(defexpr.args)
                 for i = 1:length(du.uses)
@@ -1457,7 +1477,7 @@ function canonicalize_typeassert!(compact::IncrementalCompact, idx::Int, stmt::E
         NewInstruction(
             PiNode(stmt.args[2], compact.result[idx][:type]),
             compact.result[idx][:type],
-            compact.result[idx][:line]), true)
+            compact.result[idx][:line]), #=reverse_affinity=#true)
     compact.ssa_rename[compact.idx-1] = pi
 end
 
@@ -1497,6 +1517,30 @@ function is_union_phi(compact::IncrementalCompact, idx::Int)
     return is_some_union(inst[:type])
 end
 
+function kill_phi!(compact::IncrementalCompact, phi_uses::Vector{Int},
+                    to_drop::Union{Vector{Int}, UnitRange{Int}},
+                    ssa::SSAValue, phi::PhiNode, delete_inst::Bool = false)
+    for d in to_drop
+        if isassigned(phi.values, d)
+            val = phi.values[d]
+            if !delete_inst
+                # Deleting the inst will update compact's use count, so
+                # don't do it here.
+                kill_current_use!(compact, val)
+            end
+            if isa(val, SSAValue)
+                phi_uses[val.id] -= 1
+            end
+        end
+    end
+    if delete_inst
+        compact[ssa] = nothing
+    elseif !isempty(to_drop)
+        deleteat!(phi.values, to_drop)
+        deleteat!(phi.edges, to_drop)
+    end
+end
+
 """
     adce_pass!(ir::IRCode) -> newir::IRCode
 
@@ -1582,7 +1626,8 @@ function adce_pass!(ir::IRCode)
         phi = unionphi[1]
         t = unionphi[2]
         if t === Union{}
-            compact.result[phi][:inst] = nothing
+            stmt = compact[SSAValue(phi)][:inst]::PhiNode
+            kill_phi!(compact, phi_uses, 1:length(stmt.values), SSAValue(phi), stmt, true)
             continue
         elseif t === Any
             continue
@@ -1603,9 +1648,7 @@ function adce_pass!(ir::IRCode)
             end
         end
         compact.result[phi][:type] = t
-        isempty(to_drop) && continue
-        deleteat!(stmt.values, to_drop)
-        deleteat!(stmt.edges, to_drop)
+        kill_phi!(compact, phi_uses, to_drop, SSAValue(phi), stmt, false)
     end
     # Perform simple DCE for unused values
     extra_worklist = Int[]
@@ -1710,6 +1753,8 @@ function type_lift_pass!(ir::IRCode)
                         first = false
                     end
                     local id::Int = 0
+                    all_same = true
+                    local last_val
                     for i = 1:length(values)
                         if !isassigned(def.values, i)
                             val = false
@@ -1740,6 +1785,8 @@ function type_lift_pass!(ir::IRCode)
                                         if haskey(processed, id)
                                             val = processed[id]
                                         else
+                                            # TODO: Re-check after convergence whether all the values are the same
+                                            all_same = false
                                             push!(worklist, (id, up_id, new_phi::SSAValue, i))
                                             continue
                                         end
@@ -1750,17 +1797,26 @@ function type_lift_pass!(ir::IRCode)
                             end
                         end
                         if isa(def, PhiNode)
+                            if !@isdefined(last_val)
+                                last_val = val
+                            elseif all_same
+                                all_same &= last_val === val
+                            end
                             values[i] = val
                         else
                             values[i] = insert_node!(ir, up_id, NewInstruction(UpsilonNode(val), Bool))
                         end
                     end
+                    if all_same && @isdefined(last_val)
+                        # Decay the PhiNode back to the single value
+                        ir[new_phi][:inst] = last_val
+                        isa(last_val, Bool) && (processed[item] = last_val)
+                    end
                     if which !== SSAValue(0)
                         phi = ir[which][:inst]
                         if isa(phi, PhiNode)
                             phi.values[use] = new_phi
-                        else
-                            phi = phi::PhiCNode
+                        elseif isa(phi, PhiCNode)
                             phi.values[use] = insert_node!(ir, w_up_id, NewInstruction(UpsilonNode(new_phi), Bool))
                         end
                     end
@@ -1787,6 +1843,8 @@ end
 
 # TODO: This is terrible, we should change the IR for GotoIfNot to gain an else case
 function is_legal_bb_drop(ir::IRCode, bbidx::Int, bb::BasicBlock)
+    # For the time being, don't drop the first bb, because it has special predecessor semantics.
+    bbidx == 1 && return false
     # If the block we're going to is the same as the fallthrow, it's always legal to drop
     # the block.
     length(bb.stmts) == 0 && return true
@@ -1795,21 +1853,45 @@ function is_legal_bb_drop(ir::IRCode, bbidx::Int, bb::BasicBlock)
         stmt === nothing && return true
         ((stmt::GotoNode).label == bbidx + 1) && return true
     end
-    # Otherwise make sure we're not the fallthrough case of any predecessor
-    for pred in bb.preds
-        if pred == bbidx - 1
-            terminator = ir[SSAValue(first(bb.stmts)-1)][:inst]
-            if isa(terminator, GotoIfNot)
-                if terminator.dest != bbidx
-                    return false
-                end
+    return true
+end
+
+function legalize_bb_drop_pred!(ir::IRCode, bb::BasicBlock, bbidx::Int, bbs::Vector{BasicBlock}, dropped_bbs::Vector{Int})
+    (bbidx-1) in bb.preds || return true
+    last_fallthrough = bbidx-1
+    dbi = length(dropped_bbs)
+    while dbi != 0 && dropped_bbs[dbi] == last_fallthrough && (last_fallthrough-1 in bbs[last_fallthrough].preds)
+        last_fallthrough -= 1
+        dbi -= 1
+    end
+    last_fallthrough_term_ssa = SSAValue(last(bbs[last_fallthrough].stmts))
+    terminator = ir[last_fallthrough_term_ssa][:inst]
+    if isa(terminator, GotoIfNot)
+        if terminator.dest != bbidx
+            # The previous terminator's destination matches our fallthrough.
+            # If we're also a fallthrough terminator, then we just have
+            # to delete the GotoIfNot.
+            our_terminator = ir[SSAValue(last(bb.stmts))][:inst]
+            if terminator.dest != (isa(our_terminator, GotoNode) ? our_terminator.label : bbidx + 1)
+                return false
             end
-            break
         end
+        ir[last_fallthrough_term_ssa] = nothing
+        kill_edge!(bbs, last_fallthrough, terminator.dest)
+    elseif isexpr(terminator, :enter)
+        return false
+    elseif isa(terminator, GotoNode)
+        return true
     end
+    # Hack, but effective. If we have a predecessor with a fall-through terminator, change the
+    # instruction numbering to merge the blocks now such that below processing will properly
+    # update it.
+    bbs[last_fallthrough] = BasicBlock(first(bbs[last_fallthrough].stmts):last(bb.stmts), bbs[last_fallthrough].preds, bbs[last_fallthrough].succs)
     return true
 end
 
+is_terminator(@nospecialize(inst)) = isa(inst, GotoNode) || isa(inst, GotoIfNot) || isexpr(inst, :enter)
+
 function cfg_simplify!(ir::IRCode)
     bbs = ir.cfg.blocks
     merge_into = zeros(Int, length(bbs))
@@ -1838,14 +1920,19 @@ function cfg_simplify!(ir::IRCode)
     for (idx, bb) in enumerate(bbs)
         if length(bb.succs) == 1
             succ = bb.succs[1]
-            if length(bbs[succ].preds) == 1
+            if length(bbs[succ].preds) == 1 && succ != 1
+                # Can't merge blocks with :enter terminator even if they
+                # only have one successor.
+                if isexpr(ir[SSAValue(last(bb.stmts))][:inst], :enter)
+                    continue
+                end
                 # Prevent cycles by making sure we don't end up back at `idx`
                 # by following what is to be merged into `succ`
                 if follow_merged_succ(succ) != idx
                     merge_into[succ] = idx
                     merged_succ[idx] = succ
                 end
-            elseif is_bb_empty(ir, bb) && is_legal_bb_drop(ir, idx, bb)
+            elseif merge_into[idx] == 0 && is_bb_empty(ir, bb) && is_legal_bb_drop(ir, idx, bb)
                 # If this BB is empty, we can still merge it as long as none of our successor's phi nodes
                 # reference our predecessors.
                 found_interference = false
@@ -1865,9 +1952,9 @@ function cfg_simplify!(ir::IRCode)
                     end
                 end
                 @label done
-                if !found_interference
-                    push!(dropped_bbs, idx)
-                end
+                found_interference && continue
+                legalize_bb_drop_pred!(ir, bb, idx, bbs, dropped_bbs) || continue
+                push!(dropped_bbs, idx)
             end
         end
     end
@@ -1912,10 +1999,15 @@ function cfg_simplify!(ir::IRCode)
                     if bb_rename_succ[terminator.dest] == 0
                         push!(worklist, terminator.dest)
                     end
+                elseif isexpr(terminator, :enter)
+                    if bb_rename_succ[terminator.args[1]] == 0
+                        push!(worklist, terminator.args[1])
+                    end
                 end
                 ncurr = curr + 1
-                if !isempty(searchsorted(dropped_bbs, ncurr))
-                    break
+                while !isempty(searchsorted(dropped_bbs, ncurr))
+                    bb_rename_succ[ncurr] = -bbs[ncurr].succs[1]
+                    ncurr += 1
                 end
                 curr = ncurr
             end
@@ -2034,8 +2126,12 @@ function cfg_simplify!(ir::IRCode)
             res = Int[]
             function scan_preds!(preds)
                 for pred in preds
+                    if pred == 0
+                        push!(res, 0)
+                        continue
+                    end
                     r = bb_rename_pred[pred]
-                    r == -2 && continue
+                    (r == -2 || r == -1) && continue
                     if r == -3
                         scan_preds!(bbs[pred].preds)
                     else
@@ -2063,7 +2159,7 @@ function cfg_simplify!(ir::IRCode)
         if new_bb.succs[1] == new_bb.succs[2]
             old_bb2 = findfirst(x->x==bbidx, bb_rename_pred)
             terminator = ir[SSAValue(last(bbs[old_bb2].stmts))]
-            @assert isa(terminator[:inst], GotoIfNot)
+            @assert terminator[:inst] isa GotoIfNot
             # N.B.: The dest will be renamed in process_node! below
             terminator[:inst] = GotoNode(terminator[:inst].dest)
             pop!(new_bb.succs)
@@ -2095,8 +2191,65 @@ function cfg_simplify!(ir::IRCode)
                 if isa(node[:inst], GotoNode) && merged_succ[ms] != 0
                     # If we merged a basic block, we need remove the trailing GotoNode (if any)
                     compact.result[compact.result_idx][:inst] = nothing
+                elseif isa(node[:inst], PhiNode)
+                    phi = node[:inst]
+                    values = phi.values
+                    (; ssa_rename, late_fixup, used_ssas, new_new_used_ssas) = compact
+                    ssa_rename[i] = SSAValue(compact.result_idx)
+                    processed_idx = i
+                    renamed_values = process_phinode_values(values, late_fixup, processed_idx, compact.result_idx, ssa_rename, used_ssas, new_new_used_ssas, true)
+                    edges = Int32[]
+                    values = Any[]
+                    sizehint!(edges, length(phi.edges)); sizehint!(values, length(renamed_values))
+                    for old_index in 1:length(phi.edges)
+                        old_edge = phi.edges[old_index]
+                        new_edge = bb_rename_pred[old_edge]
+                        if new_edge > 0
+                            push!(edges, new_edge)
+                            if isassigned(renamed_values, old_index)
+                                push!(values, renamed_values[old_index])
+                            else
+                                resize!(values, length(values)+1)
+                            end
+                        elseif new_edge == -3
+                            # Mutliple predecessors, we need to expand out this phi
+                            all_new_preds = Int32[]
+                            function add_preds!(old_edge)
+                                for old_edge′ in bbs[old_edge].preds
+                                    new_edge = bb_rename_pred[old_edge′]
+                                    if new_edge > 0 && !in(new_edge, all_new_preds)
+                                        push!(all_new_preds, new_edge)
+                                    elseif new_edge == -3
+                                        add_preds!(old_edge′)
+                                    end
+                                end
+                            end
+                            add_preds!(old_edge)
+                            append!(edges, all_new_preds)
+                            if isassigned(renamed_values, old_index)
+                                val = renamed_values[old_index]
+                                for _ in 1:length(all_new_preds)
+                                    push!(values, val)
+                                end
+                                length(all_new_preds) == 0 && kill_current_use!(compact, val)
+                                for _ in 2:length(all_new_preds)
+                                    count_added_node!(compact, val)
+                                end
+                            else
+                                resize!(values, length(values)+length(all_new_preds))
+                            end
+                        else
+                            isassigned(renamed_values, old_index) && kill_current_use!(compact, renamed_values[old_index])
+                        end
+                    end
+                    compact.result[compact.result_idx][:inst] = PhiNode(edges, values)
                 else
-                    process_node!(compact, compact.result_idx, node, i, i, ms, true)
+                    ri = process_node!(compact, compact.result_idx, node, i, i, ms, true)
+                    if ri == compact.result_idx
+                        # process_node! wanted this statement dropped. We don't do this,
+                        # but we still need to erase the node
+                        compact.result[compact.result_idx][:inst] = nothing
+                    end
                 end
                 # We always increase the result index to ensure a predicatable
                 # placement of the resulting nodes.
diff --git a/base/compiler/ssair/show.jl b/base/compiler/ssair/show.jl
index 242e133cbd35f..71a075f30f821 100644
--- a/base/compiler/ssair/show.jl
+++ b/base/compiler/ssair/show.jl
@@ -16,10 +16,18 @@ import Base: show_unquoted
 using Base: printstyled, with_output_color, prec_decl, @invoke
 
 function Base.show(io::IO, cfg::CFG)
+    print(io, "CFG with $(length(cfg.blocks)) blocks:")
     for (idx, block) in enumerate(cfg.blocks)
-        print(io, idx, "\t=>\t")
-        join(io, block.succs, ", ")
-        println(io)
+        print(io, "\n  bb ", idx)
+        if block.stmts.start == block.stmts.stop
+            print(io, " (stmt ", block.stmts.start, ")")
+        else
+            print(io, " (stmts ", block.stmts.start, ":", block.stmts.stop, ")")
+        end
+        if !isempty(block.succs)
+            print(io, " → bb ")
+            join(io, block.succs, ", ")
+        end
     end
 end
 
@@ -571,17 +579,18 @@ end
 
 # Show a single statement, code.stmts[idx]/code.code[idx], in the context of the whole IRCode/CodeInfo.
 # Returns the updated value of bb_idx.
-# pop_new_node!(idx::Int) -> (node_idx, new_node_inst, new_node_type) may return a new
-#   node at the current index `idx`, which is printed before the statement at index
-#   `idx`. This function is repeatedly called until it returns `nothing`
+# pop_new_node!(idx::Int; attach_after=false) -> (node_idx, new_node_inst, new_node_type)
+#   may return a new node at the current index `idx`, which is printed before the statement
+#   at index `idx`. This function is repeatedly called until it returns `nothing`.
+#   to iterate nodes that are to be inserted after the statement, set `attach_after=true`.
 function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo, IncrementalCompact}, idx::Int, config::IRShowConfig,
-                      used::BitSet, cfg::CFG, bb_idx::Int; pop_new_node! = Returns(nothing))
+                      used::BitSet, cfg::CFG, bb_idx::Int; pop_new_node! = Returns(nothing), only_after::Bool=false)
     return show_ir_stmt(io, code, idx, config.line_info_preprinter, config.line_info_postprinter,
-                        used, cfg, bb_idx; pop_new_node!, config.bb_color)
+                        used, cfg, bb_idx; pop_new_node!, only_after, config.bb_color)
 end
 
 function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo, IncrementalCompact}, idx::Int, line_info_preprinter, line_info_postprinter,
-                      used::BitSet, cfg::CFG, bb_idx::Int; pop_new_node! = Returns(nothing), bb_color=:light_black)
+                      used::BitSet, cfg::CFG, bb_idx::Int; pop_new_node! = Returns(nothing), only_after::Bool=false, bb_color=:light_black)
     stmt = _stmt(code, idx)
     type = _type(code, idx)
     max_bb_idx_size = length(string(length(cfg.blocks)))
@@ -601,8 +610,7 @@ function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo, IncrementalCompact},
     end
 
     i = 1
-    while true
-        next = pop_new_node!(idx)
+    function print_indentation(final::Bool=true)
         # Compute BB guard rail
         if bb_idx > length(cfg.blocks)
             # If invariants are violated, print a special leader
@@ -611,7 +619,6 @@ function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo, IncrementalCompact},
             printstyled(io, "!!! ", "─"^max_bb_idx_size, color=bb_color)
         else
             bbrange = cfg.blocks[bb_idx].stmts
-            bbrange = bbrange.start:bbrange.stop
             # Print line info update
             linestart = idx == first(bbrange) ? "  " : sprint(io -> printstyled(io, "│ ", color=bb_color), context=io)
             linestart *= " "^max_bb_idx_size
@@ -624,24 +631,20 @@ function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo, IncrementalCompact},
                 bb_pad = max_bb_idx_size - length(bb_idx_str)
                 bb_type = length(cfg.blocks[bb_idx].preds) <= 1 ? "─" : "┄"
                 printstyled(io, bb_idx_str, " ", bb_type, "─"^bb_pad, color=bb_color)
-            elseif next === nothing && idx == last(bbrange) # print separator
+            elseif final && idx == last(bbrange) # print separator
                 printstyled(io, "└", "─"^(1 + max_bb_idx_size), color=bb_color)
             else
                 printstyled(io, "│ ", " "^max_bb_idx_size, color=bb_color)
             end
         end
         print(io, inlining_indent, " ")
+    end
 
-        if next === nothing
-            if bb_idx <= length(cfg.blocks) && idx == last(bbrange)
-                bb_idx += 1
-            end
-            break
-        end
-
-        # print new nodes first in the right position
-        node_idx, new_node_inst, new_node_type = next
+    # first, print new nodes that are to be inserted before the current statement
+    function print_new_node(node; final::Bool=true)
+        print_indentation(final)
 
+        node_idx, new_node_inst, new_node_type = node
         @assert new_node_inst !== UNDEF # we filtered these out earlier
         show_type = should_print_ssa_type(new_node_inst)
         let maxlength_idx=maxlength_idx, show_type=show_type
@@ -656,43 +659,84 @@ function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo, IncrementalCompact},
             line_info_postprinter(io; type = new_node_type, used = node_idx in used, show_type, idx = node_idx)
         end
         println(io)
+    end
+    while (next = pop_new_node!(idx)) !== nothing
+        only_after || print_new_node(next; final=false)
         i += 1
     end
-    if code isa CodeInfo
-        stmt = statement_indices_to_labels(stmt, cfg)
+
+    # peek at the nodes to be inserted after the current statement
+    # (to determine of the statement itself is the final one)
+    next = pop_new_node!(idx; attach_after=true)
+
+    # then, print the current statement
+    # FIXME: `only_after` is hack so that we can call this function to print uncompacted
+    #        attach-after nodes when the current node has already been compated already
+    if !only_after
+        print_indentation(next===nothing)
+        if code isa CodeInfo
+            stmt = statement_indices_to_labels(stmt, cfg)
+        end
+        show_type = type !== nothing && should_print_ssa_type(stmt)
+        print_stmt(io, idx, stmt, used, maxlength_idx, true, show_type)
+        if type !== nothing # ignore types for pre-inference code
+            if type === UNDEF
+                # This is an error, but can happen if passes don't update their type information
+                printstyled(io, "::#UNDEF", color=:red)
+            else
+                line_info_postprinter(io; type, used = idx in used, show_type, idx)
+            end
+        end
+        println(io)
     end
-    show_type = type !== nothing && should_print_ssa_type(stmt)
-    print_stmt(io, idx, stmt, used, maxlength_idx, true, show_type)
-    if type !== nothing # ignore types for pre-inference code
-        if type === UNDEF
-            # This is an error, but can happen if passes don't update their type information
-            printstyled(io, "::#UNDEF", color=:red)
-        else
-            line_info_postprinter(io; type, used = idx in used, show_type, idx)
+    i += 1
+
+    # finally, print new nodes that are to be inserted after the current statement
+    while next !== nothing
+        print_new_node(next)
+        i += 1
+        next = pop_new_node!(idx; attach_after=true)
+    end
+
+    # increment the basic block counter
+    if bb_idx <= length(cfg.blocks)
+        bbrange = cfg.blocks[bb_idx].stmts
+        if bb_idx <= length(cfg.blocks) && idx == last(bbrange)
+            bb_idx += 1
         end
     end
-    println(io)
+
     return bb_idx
 end
 
 function _new_nodes_iter(stmts, new_nodes, new_nodes_info, new_nodes_idx)
     new_nodes_perm = filter(i -> isassigned(new_nodes.inst, i), 1:length(new_nodes))
     sort!(new_nodes_perm, by = x -> (x = new_nodes_info[x]; (x.pos, x.attach_after)))
-    perm_idx = Ref(1)
-
-    return function get_new_node(idx::Int)
-        perm_idx[] <= length(new_nodes_perm) || return nothing
-        node_idx = new_nodes_perm[perm_idx[]]
-        if node_idx < new_nodes_idx
-            # skip new nodes that have already been processed by incremental compact
-            # (but don't just return nothing because there may be multiple at this pos)
-            perm_idx[] += 1
-            return get_new_node(idx)
+
+    # separate iterators for the nodes that are inserted before resp. after each statement
+    before_iter = Ref(1)
+    after_iter = Ref(1)
+
+    return function get_new_node(idx::Int; attach_after=false)
+        iter = attach_after ? after_iter : before_iter
+        iter[] <= length(new_nodes_perm) || return nothing
+        node_idx = new_nodes_perm[iter[]]
+
+        # skip nodes
+        while node_idx < new_nodes_idx ||                           # already compacted
+              idx > new_nodes_info[node_idx].pos ||                 # not interested in
+              new_nodes_info[node_idx].attach_after != attach_after
+            iter[] += 1
+            iter[] > length(new_nodes_perm) && return nothing
+            node_idx = new_nodes_perm[iter[]]
         end
-        if new_nodes_info[node_idx].pos != idx
+
+        if new_nodes_info[node_idx].pos != idx ||
+           new_nodes_info[node_idx].attach_after != attach_after
             return nothing
         end
-        perm_idx[] += 1
+
+        iter[] += 1
         new_node = new_nodes[node_idx]
         new_node_inst = isassigned(new_nodes.inst, node_idx) ? new_node[:inst] : UNDEF
         new_node_type = isassigned(new_nodes.type, node_idx) ? new_node[:type] : UNDEF
@@ -842,9 +886,8 @@ function show_ir(io::IO, ci::CodeInfo, config::IRShowConfig=default_config(ci);
 end
 
 function show_ir(io::IO, compact::IncrementalCompact, config::IRShowConfig=default_config(compact.ir))
-    compact_cfg = CFG(compact.result_bbs, Int[first(compact.result_bbs[i].stmts) for i in 2:length(compact.result_bbs)])
     cfg = compact.ir.cfg
-    (_, width) = displaysize(io)
+
 
     # First print everything that has already been compacted
 
@@ -856,27 +899,78 @@ function show_ir(io::IO, compact::IncrementalCompact, config::IRShowConfig=defau
             push!(used_compacted, i)
         end
     end
+
+    # while compacting, the end of the active result bb will not have been determined
+    # (this is done post-hoc by `finish_current_bb!`), so determine it here from scratch.
+    result_bbs = copy(compact.result_bbs)
+    if compact.active_result_bb <= length(result_bbs)
+        # count the total number of nodes we'll add to this block
+        input_bb_idx = block_for_inst(compact.ir.cfg, compact.idx)
+        input_bb = compact.ir.cfg.blocks[input_bb_idx]
+        count = 0
+        for input_idx in input_bb.stmts.start:input_bb.stmts.stop
+            pop_new_node! = new_nodes_iter(compact.ir)
+            while pop_new_node!(input_idx) !== nothing
+                count += 1
+            end
+            while pop_new_node!(input_idx; attach_after=true) !== nothing
+                count += 1
+            end
+        end
+
+        still_to_be_inserted = (last(input_bb.stmts) - compact.idx) + count
+
+        result_bb = result_bbs[compact.active_result_bb]
+        result_bbs[compact.active_result_bb] = Core.Compiler.BasicBlock(result_bb,
+            Core.Compiler.StmtRange(first(result_bb.stmts), compact.result_idx+still_to_be_inserted))
+    end
+    compact_cfg = CFG(result_bbs, Int[first(result_bbs[i].stmts) for i in 2:length(result_bbs)])
+
     pop_new_node! = new_nodes_iter(compact)
     maxssaid = length(compact.result) + Core.Compiler.length(compact.new_new_nodes)
     bb_idx = let io = IOContext(io, :maxssaid=>maxssaid)
-        show_ir_stmts(io, compact, 1:compact.result_idx-1, config, used_compacted, compact_cfg, 1; pop_new_node!)
+        show_ir_stmts(io, compact, 1:compact.result_idx-1, config, used_compacted,
+                      compact_cfg, 1; pop_new_node!)
     end
 
+
     # Print uncompacted nodes from the original IR
 
     # print a separator
+    (_, width) = displaysize(io)
     stmts = compact.ir.stmts
     indent = length(string(length(stmts)))
     # config.line_info_preprinter(io, "", compact.idx)
     printstyled(io, "─"^(width-indent-1), '\n', color=:red)
 
+    # while compacting, the start of the active uncompacted bb will have been overwritten.
+    # this manifests as a stmt range end that is less than the start, so correct that.
+    inputs_bbs = copy(cfg.blocks)
+    for (i, bb) in enumerate(inputs_bbs)
+        if bb.stmts.stop < bb.stmts.start
+            inputs_bbs[i] = Core.Compiler.BasicBlock(bb,
+                Core.Compiler.StmtRange(last(bb.stmts), last(bb.stmts)))
+            # this is not entirely correct, and will result in the bb starting again,
+            # but is the best we can do without changing how `finish_current_bb!` works.
+        end
+    end
+    uncompacted_cfg = CFG(inputs_bbs, Int[first(inputs_bbs[i].stmts) for i in 2:length(inputs_bbs)])
+
     pop_new_node! = new_nodes_iter(compact.ir, compact.new_nodes_idx)
     maxssaid = length(compact.ir.stmts) + Core.Compiler.length(compact.ir.new_nodes)
     let io = IOContext(io, :maxssaid=>maxssaid)
-        show_ir_stmts(io, compact.ir, compact.idx:length(stmts), config, used_uncompacted, cfg, bb_idx; pop_new_node!)
+        # first show any new nodes to be attached after the last compacted statement
+        if compact.idx > 1
+            show_ir_stmt(io, compact.ir, compact.idx-1, config, used_uncompacted,
+                        uncompacted_cfg, bb_idx; pop_new_node!, only_after=true)
+        end
+
+        # then show the actual uncompacted IR
+        show_ir_stmts(io, compact.ir, compact.idx:length(stmts), config, used_uncompacted,
+                      uncompacted_cfg, bb_idx; pop_new_node!)
     end
 
-    finish_show_ir(io, cfg, config)
+    finish_show_ir(io, uncompacted_cfg, config)
 end
 
 function effectbits_letter(effects::Effects, name::Symbol, suffix::Char)
diff --git a/base/compiler/ssair/slot2ssa.jl b/base/compiler/ssair/slot2ssa.jl
index 6a9a128104b30..02306495f651d 100644
--- a/base/compiler/ssair/slot2ssa.jl
+++ b/base/compiler/ssair/slot2ssa.jl
@@ -172,7 +172,7 @@ function rename_uses!(ir::IRCode, ci::CodeInfo, idx::Int, @nospecialize(stmt), r
     return fixemup!(stmt->true, stmt->renames[slot_id(stmt)], ir, ci, idx, stmt)
 end
 
-function strip_trailing_junk!(ci::CodeInfo, code::Vector{Any}, info::Vector{Any})
+function strip_trailing_junk!(ci::CodeInfo, code::Vector{Any}, info::Vector{CallInfo})
     # Remove `nothing`s at the end, we don't handle them well
     # (we expect the last instruction to be a terminator)
     ssavaluetypes = ci.ssavaluetypes::Vector{Any}
@@ -194,7 +194,7 @@ function strip_trailing_junk!(ci::CodeInfo, code::Vector{Any}, info::Vector{Any}
         push!(code, ReturnNode())
         push!(ssavaluetypes, Union{})
         push!(codelocs, 0)
-        push!(info, nothing)
+        push!(info, NoCallInfo())
         push!(ssaflags, IR_FLAG_NULL)
     end
     nothing
@@ -580,36 +580,53 @@ function recompute_type(node::Union{PhiNode, PhiCNode}, ci::CodeInfo, ir::IRCode
     return new_typ
 end
 
+struct TryCatchRegion
+    enter_block::Int
+    leave_block::Int
+end
+struct NewPhiNode
+    ssaval::NewSSAValue
+    node::PhiNode
+end
+struct NewPhiCNode
+    slot::SlotNumber
+    ssaval::NewSSAValue
+    node::PhiCNode
+end
+
 function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
                         defuses::Vector{SlotInfo}, slottypes::Vector{Any})
     code = ir.stmts.inst
     cfg = ir.cfg
-    catch_entry_blocks = Tuple{Int, Int}[]
+    catch_entry_blocks = TryCatchRegion[]
     lattice = OptimizerLattice()
     ⊑ₒ = ⊑(lattice)
     for idx in 1:length(code)
         stmt = code[idx]
         if isexpr(stmt, :enter)
-            push!(catch_entry_blocks, (block_for_inst(cfg, idx), block_for_inst(cfg, stmt.args[1]::Int)))
+            push!(catch_entry_blocks, TryCatchRegion(
+                block_for_inst(cfg, idx),
+                block_for_inst(cfg, stmt.args[1]::Int)))
         end
     end
 
-    exc_handlers = IdDict{Int, Tuple{Int, Int}}()
+    exc_handlers = IdDict{Int, TryCatchRegion}()
     # Record the correct exception handler for all cricitcal sections
-    for (enter_block, exc) in catch_entry_blocks
-        exc_handlers[enter_block+1] = (enter_block, exc)
+    for catch_entry_block in catch_entry_blocks
+        (; enter_block, leave_block) = catch_entry_block
+        exc_handlers[enter_block+1] = catch_entry_block
         # TODO: Cut off here if the terminator is a leave corresponding to this enter
         for block in dominated(domtree, enter_block+1)
-            exc_handlers[block] = (enter_block, exc)
+            exc_handlers[block] = catch_entry_block
         end
     end
 
-    phi_slots = Vector{Int}[Vector{Int}() for _ = 1:length(ir.cfg.blocks)]
-    phi_nodes = Vector{Pair{NewSSAValue,PhiNode}}[Vector{Pair{NewSSAValue,PhiNode}}() for _ = 1:length(cfg.blocks)]
+    phi_slots = Vector{Int}[Int[] for _ = 1:length(ir.cfg.blocks)]
+    new_phi_nodes = Vector{NewPhiNode}[NewPhiNode[] for _ = 1:length(cfg.blocks)]
     phi_ssas = SSAValue[]
-    phicnodes = IdDict{Int, Vector{Tuple{SlotNumber, NewSSAValue, PhiCNode}}}()
-    for (_, exc) in catch_entry_blocks
-        phicnodes[exc] = Vector{Tuple{SlotNumber, NewSSAValue, PhiCNode}}()
+    new_phic_nodes = IdDict{Int, Vector{NewPhiCNode}}()
+    for (; leave_block) in catch_entry_blocks
+        new_phic_nodes[leave_block] = NewPhiCNode[]
     end
     @timeit "idf" for (idx, slot) in Iterators.enumerate(defuses)
         # No uses => no need for phi nodes
@@ -638,7 +655,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
         end
         @timeit "liveness" (live = compute_live_ins(cfg, slot))
         for li in live.live_in_bbs
-            cidx = findfirst(x->x[2] == li, catch_entry_blocks)
+            cidx = findfirst(x::TryCatchRegion->x.leave_block==li, catch_entry_blocks)
             if cidx !== nothing
                 # The slot is live-in into this block. We need to
                 # Create a PhiC node in the catch entry block and
@@ -647,7 +664,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
                 phic_ssa = NewSSAValue(
                     insert_node!(ir, first_insert_for_bb(code, cfg, li),
                         NewInstruction(node, Union{})).id - length(ir.stmts))
-                push!(phicnodes[li], (SlotNumber(idx), phic_ssa, node))
+                push!(new_phic_nodes[li], NewPhiCNode(SlotNumber(idx), phic_ssa, node))
                 # Inform IDF that we now have a def in the catch block
                 if !(li in live.def_bbs)
                     push!(live.def_bbs, li)
@@ -658,9 +675,9 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
         for block in phiblocks
             push!(phi_slots[block], idx)
             node = PhiNode()
-            ssa = NewSSAValue(insert_node!(ir,
+            ssaval = NewSSAValue(insert_node!(ir,
                 first_insert_for_bb(code, cfg, block), NewInstruction(node, Union{})).id - length(ir.stmts))
-            push!(phi_nodes[block], ssa=>node)
+            push!(new_phi_nodes[block], NewPhiNode(ssaval, node))
         end
     end
     # Perform SSA renaming
@@ -697,7 +714,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
         end
         # Insert phi nodes if necessary
         for (idx, slot) in Iterators.enumerate(phi_slots[item])
-            ssaval, node = phi_nodes[item][idx]
+            (; ssaval, node) = new_phi_nodes[item][idx]
             incoming_val = incoming_vals[slot]
             if incoming_val === SSAValue(-1)
                 # Optimistically omit this path.
@@ -727,22 +744,22 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
         end
         (item in visited) && continue
         # Record phi_C nodes if necessary
-        if haskey(phicnodes, item)
-            for (slot, ssa, _) in phicnodes[item]
-                incoming_vals[slot_id(slot)] = ssa
+        if haskey(new_phic_nodes, item)
+            for (; slot, ssaval) in new_phic_nodes[item]
+                incoming_vals[slot_id(slot)] = ssaval
             end
         end
         # Record initial upsilon nodes if necessary
-        eidx = findfirst(x->x[1] == item, catch_entry_blocks)
+        eidx = findfirst((; enter_block)::TryCatchRegion->enter_block==item, catch_entry_blocks)
         if eidx !== nothing
-            for (slot, _, node) in phicnodes[catch_entry_blocks[eidx][2]]
+            for (; slot, node) in new_phic_nodes[catch_entry_blocks[eidx].leave_block]
                 ival = incoming_vals[slot_id(slot)]
                 ivalundef = ival === UNDEF_TOKEN
-                unode = ivalundef ? UpsilonNode() : UpsilonNode(ival)
-                typ = ivalundef ? MaybeUndef(Union{}) : typ_for_val(ival, ci, ir.sptypes, -1, slottypes)
-                push!(node.values,
-                    NewSSAValue(insert_node!(ir, first_insert_for_bb(code, cfg, item),
-                                 NewInstruction(unode, typ), true).id - length(ir.stmts)))
+                Υ = NewInstruction(ivalundef ? UpsilonNode() : UpsilonNode(ival),
+                                   ivalundef ? MaybeUndef(Union{}) : typ_for_val(ival, ci, ir.sptypes, -1, slottypes))
+                # insert `UpsilonNode` immediately before the `:enter` expression
+                Υssa = insert_node!(ir, first_insert_for_bb(code, cfg, item), Υ)
+                push!(node.values, NewSSAValue(Υssa.id - length(ir.stmts)))
             end
         end
         push!(visited, item)
@@ -772,18 +789,18 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
                         code[idx] = nothing
                         incoming_vals[id] = UNDEF_TOKEN
                     end
-                    eidx = item
-                    while haskey(exc_handlers, eidx)
-                        (eidx, exc) = exc_handlers[eidx]
-                        cidx = findfirst(x->slot_id(x[1]) == id, phicnodes[exc])
+                    enter_block = item
+                    while haskey(exc_handlers, enter_block)
+                        (; enter_block, leave_block) = exc_handlers[enter_block]
+                        cidx = findfirst((; slot)::NewPhiCNode->slot_id(slot)==id, new_phic_nodes[leave_block])
                         if cidx !== nothing
                             node = UpsilonNode(incoming_vals[id])
                             if incoming_vals[id] === UNDEF_TOKEN
                                 node = UpsilonNode()
                                 typ = MaybeUndef(Union{})
                             end
-                            push!(phicnodes[exc][cidx][3].values,
-                                NewSSAValue(insert_node!(ir, idx, NewInstruction(node, typ), true).id - length(ir.stmts)))
+                            push!(new_phic_nodes[leave_block][cidx].node.values,
+                                  NewSSAValue(insert_node!(ir, idx, NewInstruction(node, typ), true).id - length(ir.stmts)))
                         end
                     end
                 end
@@ -840,12 +857,12 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
             end
         end
     end
-    for (_, nodes) in phicnodes
-        for (_, ssa, node) in nodes
+    for (_, nodes) in new_phic_nodes
+        for (; ssaval, node) in nodes
             new_typ = Union{}
             # TODO: This could just be the ones that depend on other phis
-            push!(type_refine_phi, ssa.id)
-            new_idx = ssa.id
+            push!(type_refine_phi, ssaval.id)
+            new_idx = ssaval.id
             node = new_nodes.stmts[new_idx]
             phic_values = (node[:inst]::PhiCNode).values
             for i = 1:length(phic_values)
diff --git a/base/compiler/ssair/verify.jl b/base/compiler/ssair/verify.jl
index ca460b10ca67d..57d60ec2ce980 100644
--- a/base/compiler/ssair/verify.jl
+++ b/base/compiler/ssair/verify.jl
@@ -20,7 +20,7 @@ if !isdefined(@__MODULE__, Symbol("@verify_error"))
     end
 end
 
-function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int, use_idx::Int, print::Bool, isforeigncall::Bool, arg_idx::Int, allow_frontend_forms::Bool)
+function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int, use_idx::Int, printed_use_idx::Int, print::Bool, isforeigncall::Bool, arg_idx::Int, allow_frontend_forms::Bool)
     if isa(op, SSAValue)
         if op.id > length(ir.stmts)
             def_bb = block_for_inst(ir.cfg, ir.new_nodes.info[op.id - length(ir.stmts)].pos)
@@ -39,8 +39,7 @@ function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int,
         else
             if !dominates(domtree, def_bb, use_bb) && !(bb_unreachable(domtree, def_bb) && bb_unreachable(domtree, use_bb))
                 # At the moment, we allow GC preserve tokens outside the standard domination notion
-                #@Base.show ir
-                @verify_error "Basic Block $def_bb does not dominate block $use_bb (tried to use value $(op.id))"
+                @verify_error "Basic Block $def_bb does not dominate block $use_bb (tried to use value %$(op.id) at %$(printed_use_idx))"
                 error("")
             end
         end
@@ -62,7 +61,6 @@ function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int,
             end
         end
     elseif isa(op, Union{OldSSAValue, NewSSAValue})
-        #@Base.show ir
         @verify_error "Left over SSA marker"
         error("")
     elseif isa(op, Union{SlotNumber, TypedSlot})
@@ -87,20 +85,14 @@ function verify_ir(ir::IRCode, print::Bool=true, allow_frontend_forms::Bool=fals
     # @assert isempty(ir.new_nodes)
     # Verify CFG
     last_end = 0
-    # Verify statements
-    domtree = construct_domtree(ir.cfg.blocks)
+    # Verify CFG graph. Must be well formed to construct domtree
     for (idx, block) in pairs(ir.cfg.blocks)
-        if first(block.stmts) != last_end + 1
-            #ranges = [(idx,first(bb.stmts),last(bb.stmts)) for (idx, bb) in pairs(ir.cfg.blocks)]
-            @verify_error "First statement of BB $idx ($(first(block.stmts))) does not match end of previous ($last_end)"
-            error("")
-        end
-        last_end = last(block.stmts)
-        terminator = ir.stmts[last_end][:inst]
-
-        bb_unreachable(domtree, idx) && continue
         for p in block.preds
             p == 0 && continue
+            if !(1 <= p <= length(ir.cfg.blocks))
+                @verify_error "Predecessor $p of block $idx out of bounds for IR"
+                error("")
+            end
             c = count_int(idx, ir.cfg.blocks[p].succs)
             if c == 0
                 @verify_error "Predecessor $p of block $idx not in successor list"
@@ -112,6 +104,32 @@ function verify_ir(ir::IRCode, print::Bool=true, allow_frontend_forms::Bool=fals
                 end
             end
         end
+        for s in block.succs
+            if !(1 <= s <= length(ir.cfg.blocks))
+                @verify_error "Successor $s of block $idx out of bounds for IR"
+                error("")
+            end
+            if !(idx in ir.cfg.blocks[s].preds)
+                #@Base.show ir.cfg
+                #@Base.show ir
+                #@Base.show ir.argtypes
+                @verify_error "Successor $s of block $idx not in predecessor list"
+                error("")
+            end
+        end
+    end
+    # Verify statements
+    domtree = construct_domtree(ir.cfg.blocks)
+    for (idx, block) in pairs(ir.cfg.blocks)
+        if first(block.stmts) != last_end + 1
+            #ranges = [(idx,first(bb.stmts),last(bb.stmts)) for (idx, bb) in pairs(ir.cfg.blocks)]
+            @verify_error "First statement of BB $idx ($(first(block.stmts))) does not match end of previous ($last_end)"
+            error("")
+        end
+        last_end = last(block.stmts)
+        terminator = ir.stmts[last_end][:inst]
+
+        bb_unreachable(domtree, idx) && continue
         if isa(terminator, ReturnNode)
             if !isempty(block.succs)
                 @verify_error "Block $idx ends in return or unreachable, but has successors"
@@ -153,15 +171,6 @@ function verify_ir(ir::IRCode, print::Bool=true, allow_frontend_forms::Bool=fals
                 error("")
             end
         end
-        for s in block.succs
-            if !(idx in ir.cfg.blocks[s].preds)
-                #@Base.show ir.cfg
-                #@Base.show ir
-                #@Base.show ir.argtypes
-                @verify_error "Successor $s of block $idx not in predecessor list"
-                error("")
-            end
-        end
     end
     for (bb, idx) in bbidxiter(ir)
         # We allow invalid IR in dead code to avoid passes having to detect when
@@ -188,6 +197,12 @@ function verify_ir(ir::IRCode, print::Bool=true, allow_frontend_forms::Bool=fals
                     error("")
                 end
                 edge == 0 && continue
+                if bb_unreachable(domtree, Int(edge))
+                    # TODO: Disallow?
+                    #@verify_error "Unreachable edge from #$edge should have been cleaned up at idx $idx"
+                    #error("")
+                    continue
+                end
                 isassigned(stmt.values, i) || continue
                 val = stmt.values[i]
                 phiT = ir.stmts[idx][:type]
@@ -201,7 +216,7 @@ function verify_ir(ir::IRCode, print::Bool=true, allow_frontend_forms::Bool=fals
                         #error("")
                     end
                 end
-                check_op(ir, domtree, val, Int(edge), last(ir.cfg.blocks[stmt.edges[i]].stmts)+1, print, false, i, allow_frontend_forms)
+                check_op(ir, domtree, val, Int(edge), last(ir.cfg.blocks[stmt.edges[i]].stmts)+1, idx, print, false, i, allow_frontend_forms)
             end
         elseif isa(stmt, PhiCNode)
             for i = 1:length(stmt.values)
@@ -215,11 +230,21 @@ function verify_ir(ir::IRCode, print::Bool=true, allow_frontend_forms::Bool=fals
                     error("")
                 end
             end
+        elseif (isa(stmt, GotoNode) || isa(stmt, GotoIfNot) || isexpr(stmt, :enter)) && idx != last(ir.cfg.blocks[bb].stmts)
+            @verify_error "Terminator $idx in bb $bb is not the last statement in the block"
+            error("")
         else
             if isa(stmt, Expr) || isa(stmt, ReturnNode) # TODO: make sure everything has line info
+                if (stmt isa ReturnNode)
+                    if isdefined(stmt, :val)
+                        # TODO: Disallow unreachable returns?
+                        # bb_unreachable(domtree, Int64(edge))
+                    else
+                        #@verify_error "Missing line number information for statement $idx of $ir"
+                    end
+                end
                 if !(stmt isa ReturnNode && !isdefined(stmt, :val)) # not actually a return node, but an unreachable marker
                     if ir.stmts[idx][:line] <= 0
-                        #@verify_error "Missing line number information for statement $idx of $ir"
                     end
                 end
             end
@@ -245,12 +270,18 @@ function verify_ir(ir::IRCode, print::Bool=true, allow_frontend_forms::Bool=fals
                         (stmt.args[1] isa GlobalRef || (stmt.args[1] isa Expr && stmt.args[1].head === :static_parameter))
                     # a GlobalRef or static_parameter isdefined check does not evaluate its argument
                     continue
+                elseif stmt.head === :call
+                    f = stmt.args[1]
+                    if f isa GlobalRef && f.name === :cglobal
+                        # TODO: these are not yet linearized
+                        continue
+                    end
                 end
             end
             n = 1
             for op in userefs(stmt)
                 op = op[]
-                check_op(ir, domtree, op, bb, idx, print, isforeigncall, n, allow_frontend_forms)
+                check_op(ir, domtree, op, bb, idx, idx, print, isforeigncall, n, allow_frontend_forms)
                 n += 1
             end
         end
diff --git a/base/compiler/stmtinfo.jl b/base/compiler/stmtinfo.jl
index 2b0f453951ec8..556c0082e4532 100644
--- a/base/compiler/stmtinfo.jl
+++ b/base/compiler/stmtinfo.jl
@@ -11,23 +11,28 @@ and any additional information (`call.info`) for a given generic call.
 struct CallMeta
     rt::Any
     effects::Effects
-    info::Any
+    info::CallInfo
 end
 
+struct NoCallInfo <: CallInfo end
+
 """
-    info::MethodMatchInfo
+    info::MethodMatchInfo <: CallInfo
 
 Captures the result of a `:jl_matching_methods` lookup for the given call (`info.results`).
 This info may then be used by the optimizer to inline the matches, without having
 to re-consult the method table. This info is illegal on any statement that is
 not a call to a generic function.
 """
-struct MethodMatchInfo
+struct MethodMatchInfo <: CallInfo
     results::MethodLookupResult
 end
+nsplit_impl(info::MethodMatchInfo) = 1
+getsplit_impl(info::MethodMatchInfo, idx::Int) = (@assert idx == 1; info.results)
+getresult_impl(::MethodMatchInfo, ::Int) = nothing
 
 """
-    info::UnionSplitInfo
+    info::UnionSplitInfo <: CallInfo
 
 If inference decides to partition the method search space by splitting unions,
 it will issue a method lookup query for each such partition. This info indicates
@@ -35,7 +40,7 @@ that such partitioning happened and wraps the corresponding `MethodMatchInfo` fo
 each partition (`info.matches::Vector{MethodMatchInfo}`).
 This info is illegal on any statement that is not a call to a generic function.
 """
-struct UnionSplitInfo
+struct UnionSplitInfo <: CallInfo
     matches::Vector{MethodMatchInfo}
 end
 
@@ -47,6 +52,9 @@ function nmatches(info::UnionSplitInfo)
     end
     return n
 end
+nsplit_impl(info::UnionSplitInfo) = length(info.matches)
+getsplit_impl(info::UnionSplitInfo, idx::Int) = getsplit_impl(info.matches[idx], 1)
+getresult_impl(::UnionSplitInfo, ::Int) = nothing
 
 struct ConstPropResult
     result::InferenceResult
@@ -66,40 +74,43 @@ struct SemiConcreteResult
     effects::Effects
 end
 
-const ConstResult = Union{ConstPropResult,ConcreteResult, SemiConcreteResult}
+const ConstResult = Union{ConstPropResult, ConcreteResult, SemiConcreteResult}
 
 """
-    info::ConstCallInfo
+    info::ConstCallInfo <: CallInfo
 
 The precision of this call was improved using constant information.
 In addition to the original call information `info.call`, this info also keeps the results
 of constant inference `info.results::Vector{Union{Nothing,ConstResult}}`.
 """
-struct ConstCallInfo
+struct ConstCallInfo <: CallInfo
     call::Union{MethodMatchInfo,UnionSplitInfo}
     results::Vector{Union{Nothing,ConstResult}}
 end
+nsplit_impl(info::ConstCallInfo) = nsplit(info.call)
+getsplit_impl(info::ConstCallInfo, idx::Int) = getsplit(info.call, idx)
+getresult_impl(info::ConstCallInfo, idx::Int) = info.results[idx]
 
 """
-    info::MethodResultPure
+    info::MethodResultPure <: CallInfo
 
 This struct represents a method result constant was proven to be
 effect-free, including being no-throw (typically because the value was computed
 by calling an `@pure` function).
 """
-struct MethodResultPure
-    info::Any
+struct MethodResultPure <: CallInfo
+    info::CallInfo
 end
-let instance = MethodResultPure(false)
+let instance = MethodResultPure(NoCallInfo())
     global MethodResultPure
     MethodResultPure() = instance
 end
 
 """
-    info::AbstractIterationInfo
+    ainfo::AbstractIterationInfo
 
 Captures all the information for abstract iteration analysis of a single value.
-Each (abstract) call to `iterate`, corresponds to one entry in `info.each::Vector{CallMeta}`.
+Each (abstract) call to `iterate`, corresponds to one entry in `ainfo.each::Vector{CallMeta}`.
 """
 struct AbstractIterationInfo
     each::Vector{CallMeta}
@@ -108,7 +119,7 @@ end
 const MaybeAbstractIterationInfo = Union{Nothing, AbstractIterationInfo}
 
 """
-    info::ApplyCallInfo
+    info::ApplyCallInfo <: CallInfo
 
 This info applies to any call of `_apply_iterate(...)` and captures both the
 info of the actual call being applied and the info for any implicit call
@@ -117,7 +128,7 @@ to be yet another `_apply_iterate`, in which case the `info.call` field will
 be another `ApplyCallInfo`. This info is illegal on any statement that is
 not an `_apply_iterate` call.
 """
-struct ApplyCallInfo
+struct ApplyCallInfo <: CallInfo
     # The info for the call itself
     call::Any
     # AbstractIterationInfo for each argument, if applicable
@@ -125,12 +136,12 @@ struct ApplyCallInfo
 end
 
 """
-    info::UnionSplitApplyCallInfo
+    info::UnionSplitApplyCallInfo <: CallInfo
 
 Like `UnionSplitInfo`, but for `ApplyCallInfo` rather than `MethodMatchInfo`.
 This info is illegal on any statement that is not an `_apply_iterate` call.
 """
-struct UnionSplitApplyCallInfo
+struct UnionSplitApplyCallInfo <: CallInfo
     infos::Vector{ApplyCallInfo}
 end
 
@@ -141,7 +152,7 @@ Represents a resolved call to `Core.invoke`, carrying the `info.match::MethodMat
 the method that has been processed.
 Optionally keeps `info.result::InferenceResult` that keeps constant information.
 """
-struct InvokeCallInfo
+struct InvokeCallInfo <: CallInfo
     match::MethodMatch
     result::Union{Nothing,ConstResult}
 end
@@ -153,20 +164,20 @@ Represents a resolved call of opaque closure, carrying the `info.match::MethodMa
 the method that has been processed.
 Optionally keeps `info.result::InferenceResult` that keeps constant information.
 """
-struct OpaqueClosureCallInfo
+struct OpaqueClosureCallInfo <: CallInfo
     match::MethodMatch
     result::Union{Nothing,ConstResult}
 end
 
 """
-    info::OpaqueClosureCreateInfo
+    info::OpaqueClosureCreateInfo <: CallInfo
 
 This info may be constructed upon opaque closure construction, with `info.unspec::CallMeta`
 carrying out inference result of an unreal, partially specialized call (i.e. specialized on
 the closure environment, but not on the argument types of the opaque closure) in order to
 allow the optimizer to rewrite the return type parameter of the `OpaqueClosure` based on it.
 """
-struct OpaqueClosureCreateInfo
+struct OpaqueClosureCreateInfo <: CallInfo
     unspec::CallMeta
     function OpaqueClosureCreateInfo(unspec::CallMeta)
         @assert isa(unspec.info, OpaqueClosureCallInfo)
@@ -179,35 +190,35 @@ end
 # the AbstractInterpreter.
 
 """
-    info::ReturnTypeCallInfo
+    info::ReturnTypeCallInfo <: CallInfo
 
 Represents a resolved call of `Core.Compiler.return_type`.
 `info.call` wraps the info corresponding to the call that `Core.Compiler.return_type` call
 was supposed to analyze.
 """
-struct ReturnTypeCallInfo
-    info::Any
+struct ReturnTypeCallInfo <: CallInfo
+    info::CallInfo
 end
 
 """
-    info::FinalizerInfo
+    info::FinalizerInfo <: CallInfo
 
 Represents the information of a potential (later) call to the finalizer on the given
 object type.
 """
-struct FinalizerInfo
-    info::Any
+struct FinalizerInfo <: CallInfo
+    info::CallInfo   # the callinfo for the finalizer call
     effects::Effects # the effects for the finalizer call
 end
 
 """
-    info::ModifyFieldInfo
+    info::ModifyFieldInfo <: CallInfo
 
 Represents a resolved all of `modifyfield!(obj, name, op, x, [order])`.
 `info.info` wraps the call information of `op(getfield(obj, name), x)`.
 """
-struct ModifyFieldInfo
-    info::Any # the callinfo for the `op(getfield(obj, name), x)` call
+struct ModifyFieldInfo <: CallInfo
+    info::CallInfo # the callinfo for the `op(getfield(obj, name), x)` call
 end
 
 @specialize
diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl
index 32551df34eea2..8faeb5db53794 100644
--- a/base/compiler/tfuncs.jl
+++ b/base/compiler/tfuncs.jl
@@ -6,8 +6,6 @@
 
 @nospecialize
 
-const _NAMEDTUPLE_NAME = NamedTuple.body.body.name
-
 const INT_INF = typemax(Int) # integer infinity
 
 const N_IFUNC = reinterpret(Int32, have_fma) + 1
@@ -223,6 +221,11 @@ function ifelse_tfunc(@nospecialize(cnd), @nospecialize(x), @nospecialize(y))
 end
 add_tfunc(Core.ifelse, 3, 3, ifelse_tfunc, 1)
 
+function ifelse_nothrow(@specialize(𝕃::AbstractLattice), @nospecialize(cond), @nospecialize(x), @nospecialize(y))
+    ⊑ = Core.Compiler.:⊑(𝕃)
+    return cond ⊑ Bool
+end
+
 function egal_tfunc(@nospecialize(x), @nospecialize(y))
     xx = widenconditional(x)
     yy = widenconditional(y)
@@ -246,13 +249,12 @@ function egal_tfunc(@nospecialize(x), @nospecialize(y))
 end
 add_tfunc(===, 2, 2, egal_tfunc, 1)
 
-function isdefined_nothrow(argtypes::Array{Any, 1})
-    length(argtypes) == 2 || return false
-    a1, a2 = argtypes[1], argtypes[2]
-    if hasintersect(widenconst(a1), Module)
-        return a2 ⊑ Symbol
+function isdefined_nothrow(@specialize(𝕃::AbstractLattice), @nospecialize(x), @nospecialize(name))
+    ⊑ = Core.Compiler.:⊑(𝕃)
+    if hasintersect(widenconst(x), Module)
+        return name ⊑ Symbol
     else
-        return a2 ⊑ Symbol || a2 ⊑ Int
+        return name ⊑ Symbol || name ⊑ Int
     end
 end
 
@@ -405,11 +407,19 @@ add_tfunc(Core.sizeof, 1, 1, sizeof_tfunc, 1)
 function nfields_tfunc(@nospecialize(x))
     isa(x, Const) && return Const(nfields(x.val))
     isa(x, Conditional) && return Const(0)
-    x = unwrap_unionall(widenconst(x))
+    xt = widenconst(x)
+    x = unwrap_unionall(xt)
     isconstType(x) && return Const(nfields(x.parameters[1]))
     if isa(x, DataType) && !isabstracttype(x)
-        if !(x.name === Tuple.name && isvatuple(x)) &&
-           !(x.name === _NAMEDTUPLE_NAME && !isconcretetype(x))
+        if x.name === Tuple.name
+            isvatuple(x) && return Int
+            return Const(length(x.types))
+        elseif x.name === _NAMEDTUPLE_NAME
+            length(x.parameters) == 2 || return Int
+            names = x.parameters[1]
+            isa(names, Tuple{Vararg{Symbol}}) || return nfields_tfunc(rewrap_unionall(x.parameters[2], xt))
+            return Const(length(names))
+        else
             return Const(isdefined(x, :types) ? length(x.types) : length(x.name.names))
         end
     end
@@ -476,10 +486,7 @@ function arraysize_tfunc(@nospecialize(ary), @nospecialize(dim))
 end
 add_tfunc(arraysize, 2, 2, arraysize_tfunc, 4)
 
-function arraysize_nothrow(argtypes::Vector{Any})
-    length(argtypes) == 2 || return false
-    ary = argtypes[1]
-    dim = argtypes[2]
+function arraysize_nothrow(@nospecialize(ary), @nospecialize(dim))
     ary ⊑ Array || return false
     if isa(dim, Const)
         dimval = dim.val
@@ -586,6 +593,10 @@ end
 add_tfunc(compilerbarrier, 2, 2, compilerbarrier_tfunc, 5)
 add_tfunc(Core.finalizer, 2, 4, (@nospecialize args...)->Nothing, 5)
 
+function compilerbarrier_nothrow(@nospecialize(setting), @nospecialize(val))
+    return isa(setting, Const) && contains_is((:type, :const, :conditional), setting.val)
+end
+
 # more accurate typeof_tfunc for vararg tuples abstract only in length
 function typeof_concrete_vararg(t::DataType)
     np = length(t.parameters)
@@ -658,6 +669,17 @@ function typeassert_tfunc(@nospecialize(v), @nospecialize(t))
 end
 add_tfunc(typeassert, 2, 2, typeassert_tfunc, 4)
 
+function typeassert_nothrow(@specialize(𝕃::AbstractLattice), @nospecialize(v), @nospecialize(t))
+    ⊑ = Core.Compiler.:⊑(𝕃)
+    # ty, exact = instanceof_tfunc(t)
+    # return exact && v ⊑ ty
+    if (isType(t) && !has_free_typevars(t) && v ⊑ t.parameters[1]) ||
+        (isa(t, Const) && isa(t.val, Type) && v ⊑ t.val)
+        return true
+    end
+    return false
+end
+
 function isa_tfunc(@specialize(𝕃::AbstractLattice), @nospecialize(v), @nospecialize(tt))
     t, isexact = instanceof_tfunc(tt)
     if t === Bottom
@@ -694,6 +716,11 @@ end
 isa_tfunc(@nospecialize(v), @nospecialize(t)) = isa_tfunc(fallback_lattice, v, t)
 add_tfunc(isa, 2, 2, isa_tfunc, 1)
 
+function isa_nothrow(@specialize(𝕃::AbstractLattice), @nospecialize(obj), @nospecialize(typ))
+    ⊑ = Core.Compiler.:⊑(𝕃)
+    return typ ⊑ Type
+end
+
 function subtype_tfunc(@nospecialize(a), @nospecialize(b))
     a, isexact_a = instanceof_tfunc(a)
     b, isexact_b = instanceof_tfunc(b)
@@ -712,6 +739,11 @@ function subtype_tfunc(@nospecialize(a), @nospecialize(b))
 end
 add_tfunc(<:, 2, 2, subtype_tfunc, 10)
 
+function subtype_nothrow(@specialize(𝕃::AbstractLattice), @nospecialize(lty), @nospecialize(rty))
+    ⊑ = Core.Compiler.:⊑(𝕃)
+    return lty ⊑ Type && rty ⊑ Type
+end
+
 function fieldcount_noerror(@nospecialize t)
     if t isa UnionAll || t isa Union
         t = argument_datatype(t)
@@ -848,15 +880,18 @@ function getfield_nothrow(@nospecialize(s00), @nospecialize(name), boundscheck::
     return false
 end
 
-function getfield_tfunc(s00, name, boundscheck_or_order)
-    @nospecialize
+function getfield_tfunc(@specialize(lattice::AbstractLattice), @nospecialize(s00),
+        @nospecialize(name), @nospecialize(boundscheck_or_order))
     t = isvarargtype(boundscheck_or_order) ? unwrapva(boundscheck_or_order) :
         widenconst(boundscheck_or_order)
     hasintersect(t, Symbol) || hasintersect(t, Bool) || return Bottom
-    return getfield_tfunc(s00, name)
+    return getfield_tfunc(lattice, s00, name)
 end
-function getfield_tfunc(s00, name, order, boundscheck)
-    @nospecialize
+function getfield_tfunc(@nospecialize(s00), name, boundscheck_or_order)
+    return getfield_tfunc(fallback_lattice, s00, name, boundscheck_or_order)
+end
+function getfield_tfunc(@specialize(lattice::AbstractLattice), @nospecialize(s00),
+        @nospecialize(name), @nospecialize(order), @nospecialize(boundscheck))
     hasintersect(widenconst(order), Symbol) || return Bottom
     if isvarargtype(boundscheck)
         t = unwrapva(boundscheck)
@@ -864,11 +899,15 @@ function getfield_tfunc(s00, name, order, boundscheck)
     else
         hasintersect(widenconst(boundscheck), Bool) || return Bottom
     end
-    return getfield_tfunc(s00, name)
+    return getfield_tfunc(lattice, s00, name)
+end
+function getfield_tfunc(@nospecialize(s00), @nospecialize(name), @nospecialize(order), @nospecialize(boundscheck))
+    return getfield_tfunc(fallback_lattice, s00, name, order, boundscheck)
 end
-getfield_tfunc(@nospecialize(s00), @nospecialize(name)) = _getfield_tfunc(s00, name, false)
+getfield_tfunc(@nospecialize(s00), @nospecialize(name)) = _getfield_tfunc(fallback_lattice, s00, name, false)
+getfield_tfunc(@specialize(lattice::AbstractLattice), @nospecialize(s00), @nospecialize(name)) = _getfield_tfunc(lattice, s00, name, false)
 
-function _getfield_fieldindex(@nospecialize(s), name::Const)
+function _getfield_fieldindex(s::DataType, name::Const)
     nv = name.val
     if isa(nv, Symbol)
         nv = fieldindex(s, nv, false)
@@ -879,27 +918,61 @@ function _getfield_fieldindex(@nospecialize(s), name::Const)
     return nothing
 end
 
-function _getfield_tfunc_const(@nospecialize(sv), name::Const, setfield::Bool)
-    if isa(name, Const)
-        nv = _getfield_fieldindex(typeof(sv), name)
-        nv === nothing && return Bottom
-        if isa(sv, DataType) && nv == DATATYPE_TYPES_FIELDINDEX && isdefined(sv, nv)
+function _getfield_tfunc_const(@nospecialize(sv), name::Const)
+    nv = _getfield_fieldindex(typeof(sv), name)
+    nv === nothing && return Bottom
+    if isa(sv, DataType) && nv == DATATYPE_TYPES_FIELDINDEX && isdefined(sv, nv)
+        return Const(getfield(sv, nv))
+    end
+    if isconst(typeof(sv), nv)
+        if isdefined(sv, nv)
             return Const(getfield(sv, nv))
         end
-        if isconst(typeof(sv), nv)
-            if isdefined(sv, nv)
-                return Const(getfield(sv, nv))
-            end
-            return Union{}
-        end
+        return Bottom
     end
     return nothing
 end
 
-function _getfield_tfunc(@nospecialize(s00), @nospecialize(name), setfield::Bool)
-    if isa(s00, Conditional)
+function _getfield_tfunc(@specialize(lattice::InferenceLattice), @nospecialize(s00), @nospecialize(name), setfield::Bool)
+    if isa(s00, LimitedAccuracy)
+        # This will error, but it's better than duplicating the error here
+        s00 = widenconst(s00)
+    end
+    return _getfield_tfunc(widenlattice(lattice), s00, name, setfield)
+end
+
+function _getfield_tfunc(@specialize(lattice::OptimizerLattice), @nospecialize(s00), @nospecialize(name), setfield::Bool)
+    # If undef, that's a Union, but that doesn't affect the rt when tmerged
+    # into the unwrapped result.
+    isa(s00, MaybeUndef) && (s00 = s00.typ)
+    return _getfield_tfunc(widenlattice(lattice), s00, name, setfield)
+end
+
+function _getfield_tfunc(@specialize(lattice::AnyConditionalsLattice), @nospecialize(s00), @nospecialize(name), setfield::Bool)
+    if isa(s00, AnyConditional)
         return Bottom # Bool has no fields
-    elseif isa(s00, Const)
+    end
+    return _getfield_tfunc(widenlattice(lattice), s00, name, setfield)
+end
+
+function _getfield_tfunc(@specialize(lattice::PartialsLattice), @nospecialize(s00), @nospecialize(name), setfield::Bool)
+    if isa(s00, PartialStruct)
+        s = widenconst(s00)
+        sty = unwrap_unionall(s)::DataType
+        if isa(name, Const)
+            nv = _getfield_fieldindex(sty, name)
+            if isa(nv, Int) && 1 <= nv <= length(s00.fields)
+                return unwrapva(s00.fields[nv])
+            end
+        end
+        s00 = s
+    end
+
+    return _getfield_tfunc(widenlattice(lattice), s00, name, setfield)
+end
+
+function _getfield_tfunc(lattice::ConstsLattice, @nospecialize(s00), @nospecialize(name), setfield::Bool)
+    if isa(s00, Const)
         sv = s00.val
         if isa(name, Const)
             nv = name.val
@@ -910,38 +983,32 @@ function _getfield_tfunc(@nospecialize(s00), @nospecialize(name), setfield::Bool
                 end
                 return Bottom
             end
-            r = _getfield_tfunc_const(sv, name, setfield)
+            r = _getfield_tfunc_const(sv, name)
             r !== nothing && return r
         end
-        s = typeof(sv)
-    elseif isa(s00, PartialStruct)
-        s = widenconst(s00)
-        sty = unwrap_unionall(s)::DataType
-        if isa(name, Const)
-            nv = _getfield_fieldindex(sty, name)
-            if isa(nv, Int) && 1 <= nv <= length(s00.fields)
-                return unwrapva(s00.fields[nv])
-            end
-        end
-    else
-        s = unwrap_unionall(s00)
+        s00 = widenconst(s00)
     end
+    return _getfield_tfunc(widenlattice(lattice), s00, name, setfield)
+end
+
+function _getfield_tfunc(lattice::JLTypeLattice, @nospecialize(s00), @nospecialize(name), setfield::Bool)
+    s = unwrap_unionall(s00)
     if isa(s, Union)
-        return tmerge(_getfield_tfunc(rewrap_unionall(s.a, s00), name, setfield),
-                      _getfield_tfunc(rewrap_unionall(s.b, s00), name, setfield))
+        return tmerge(_getfield_tfunc(lattice, rewrap_unionall(s.a, s00), name, setfield),
+                      _getfield_tfunc(lattice, rewrap_unionall(s.b, s00), name, setfield))
     end
     if isType(s)
         if isconstType(s)
-            sv = s00.parameters[1]
-	    if isa(name, Const)
-                r = _getfield_tfunc_const(sv, name, setfield)
+            sv = (s00::DataType).parameters[1]
+            if isa(name, Const)
+                r = _getfield_tfunc_const(sv, name)
                 r !== nothing && return r
-	    end
+            end
             s = typeof(sv)
         else
             sv = s.parameters[1]
             if isTypeDataType(sv) && isa(name, Const)
-                nv = _getfield_fieldindex(DataType, name)
+                nv = _getfield_fieldindex(DataType, name)::Int
                 if nv == DATATYPE_NAME_FIELDINDEX
                     # N.B. This only works for fields that do not depend on type
                     # parameters (which we do not know here).
@@ -976,7 +1043,7 @@ function _getfield_tfunc(@nospecialize(s00), @nospecialize(name), setfield::Bool
         if !(_ts <: Tuple)
             return Any
         end
-        return _getfield_tfunc(_ts, name, setfield)
+        return _getfield_tfunc(lattice, _ts, name, setfield)
     end
     ftypes = datatype_fieldtypes(s)
     nf = length(ftypes)
@@ -1084,17 +1151,17 @@ end
 function setfield!_tfunc(o, f, v)
     @nospecialize
     mutability_errorcheck(o) || return Bottom
-    ft = _getfield_tfunc(o, f, true)
+    ft = _getfield_tfunc(fallback_lattice, o, f, true)
     ft === Bottom && return Bottom
     hasintersect(widenconst(v), widenconst(ft)) || return Bottom
     return v
 end
-function mutability_errorcheck(@nospecialize obj)
-    objt0 = widenconst(obj)
+mutability_errorcheck(@nospecialize obj) = _mutability_errorcheck(widenconst(obj))
+function _mutability_errorcheck(@nospecialize objt0)
     objt = unwrap_unionall(objt0)
     if isa(objt, Union)
-        return mutability_errorcheck(rewrap_unionall(objt.a, objt0)) ||
-               mutability_errorcheck(rewrap_unionall(objt.b, objt0))
+        return _mutability_errorcheck(rewrap_unionall(objt.a, objt0)) ||
+               _mutability_errorcheck(rewrap_unionall(objt.b, objt0))
     elseif isa(objt, DataType)
         # Can't say anything about abstract types
         isabstracttype(objt) && return true
@@ -1103,22 +1170,19 @@ function mutability_errorcheck(@nospecialize obj)
     return true
 end
 
-function setfield!_nothrow(argtypes::Vector{Any})
-    if length(argtypes) == 4
-        order = argtypes[4]
-        order === Const(:not_atomic) || return false # currently setfield!_nothrow is assuming not atomic
-    else
-        length(argtypes) == 3 || return false
-    end
-    return setfield!_nothrow(argtypes[1], argtypes[2], argtypes[3])
+function setfield!_nothrow(@specialize(𝕃::AbstractLattice), s00, name, v, order)
+    @nospecialize s00 name v order
+    order === Const(:not_atomic) || return false # currently setfield!_nothrow is assuming not atomic
+    return setfield!_nothrow(𝕃, s00, name, v)
 end
-function setfield!_nothrow(s00, name, v)
-    @nospecialize
+function setfield!_nothrow(@specialize(𝕃::AbstractLattice), s00, name, v)
+    @nospecialize s00 name v
+    ⊑ = Core.Compiler.:⊑(𝕃)
     s0 = widenconst(s00)
     s = unwrap_unionall(s0)
     if isa(s, Union)
-        return setfield!_nothrow(rewrap_unionall(s.a, s00), name, v) &&
-               setfield!_nothrow(rewrap_unionall(s.b, s00), name, v)
+        return setfield!_nothrow(𝕃, rewrap_unionall(s.a, s00), name, v) &&
+               setfield!_nothrow(𝕃, rewrap_unionall(s.b, s00), name, v)
     elseif isa(s, DataType)
         # Can't say anything about abstract types
         isabstracttype(s) && return false
@@ -1148,21 +1212,21 @@ end
 function abstract_modifyfield!(interp::AbstractInterpreter, argtypes::Vector{Any}, si::StmtInfo, sv::InferenceState)
     nargs = length(argtypes)
     if !isempty(argtypes) && isvarargtype(argtypes[nargs])
-        nargs - 1 <= 6 || return CallMeta(Bottom, EFFECTS_THROWS, false)
-        nargs > 3 || return CallMeta(Any, EFFECTS_UNKNOWN, false)
+        nargs - 1 <= 6 || return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo())
+        nargs > 3 || return CallMeta(Any, EFFECTS_UNKNOWN, NoCallInfo())
     else
-        5 <= nargs <= 6 || return CallMeta(Bottom, EFFECTS_THROWS, false)
+        5 <= nargs <= 6 || return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo())
     end
     o = unwrapva(argtypes[2])
     f = unwrapva(argtypes[3])
     RT = modifyfield!_tfunc(o, f, Any, Any)
-    info = false
+    info = NoCallInfo()
     if nargs >= 5 && RT !== Bottom
         # we may be able to refine this to a PartialStruct by analyzing `op(o.f, v)::T`
         # as well as compute the info for the method matches
         op = unwrapva(argtypes[4])
         v = unwrapva(argtypes[5])
-        TF = getfield_tfunc(o, f)
+        TF = getfield_tfunc(typeinf_lattice(interp), o, f)
         callinfo = abstract_call(interp, ArgInfo(nothing, Any[op, TF, v]), StmtInfo(true), sv, #=max_methods=# 1)
         TF2 = tmeet(callinfo.rt, widenconst(TF))
         if TF2 === Bottom
@@ -1193,8 +1257,9 @@ add_tfunc(swapfield!, 3, 4, swapfield!_tfunc, 3)
 add_tfunc(modifyfield!, 4, 5, modifyfield!_tfunc, 3)
 add_tfunc(replacefield!, 4, 6, replacefield!_tfunc, 3)
 
-function fieldtype_nothrow(@nospecialize(s0), @nospecialize(name))
+function fieldtype_nothrow(@specialize(𝕃::AbstractLattice), @nospecialize(s0), @nospecialize(name))
     s0 === Bottom && return true # unreachable
+    ⊑ = Core.Compiler.:⊑(𝕃)
     if s0 === Any || s0 === Type || DataType ⊑ s0 || UnionAll ⊑ s0
         # We have no idea
         return false
@@ -1208,8 +1273,8 @@ function fieldtype_nothrow(@nospecialize(s0), @nospecialize(name))
 
     su = unwrap_unionall(s0)
     if isa(su, Union)
-        return fieldtype_nothrow(rewrap_unionall(su.a, s0), name) &&
-               fieldtype_nothrow(rewrap_unionall(su.b, s0), name)
+        return fieldtype_nothrow(𝕃, rewrap_unionall(su.a, s0), name) &&
+               fieldtype_nothrow(𝕃, rewrap_unionall(su.b, s0), name)
     end
 
     s, exact = instanceof_tfunc(s0)
@@ -1387,7 +1452,7 @@ valid_tparam_type(T::DataType) = valid_typeof_tparam(T)
 valid_tparam_type(U::Union) = valid_tparam_type(U.a) && valid_tparam_type(U.b)
 valid_tparam_type(U::UnionAll) = valid_tparam_type(unwrap_unionall(U))
 
-function apply_type_nothrow(@specialize(lattice::AbstractLattice), argtypes::Array{Any, 1}, @nospecialize(rt))
+function apply_type_nothrow(@specialize(lattice::AbstractLattice), argtypes::Vector{Any}, @nospecialize(rt))
     rt === Type && return false
     length(argtypes) >= 1 || return false
     headtypetype = argtypes[1]
@@ -1443,7 +1508,6 @@ function apply_type_nothrow(@specialize(lattice::AbstractLattice), argtypes::Arr
     end
     return true
 end
-apply_type_nothrow(argtypes::Array{Any, 1}, @nospecialize(rt)) = apply_type_nothrow(fallback_lattice, argtypes, rt)
 
 const _tvarnames = Symbol[:_A, :_B, :_C, :_D, :_E, :_F, :_G, :_H, :_I, :_J, :_K, :_L, :_M,
                           :_N, :_O, :_P, :_Q, :_R, :_S, :_T, :_U, :_V, :_W, :_X, :_Y, :_Z]
@@ -1596,6 +1660,12 @@ function apply_type_tfunc(@nospecialize(headtypetype), @nospecialize args...)
     end
     if istuple
         return Type{<:appl}
+    elseif isa(appl, DataType) && appl.name === _NAMEDTUPLE_NAME && length(appl.parameters) == 2 &&
+           (appl.parameters[1] === () || appl.parameters[2] === Tuple{})
+        # if the first/second parameter of `NamedTuple` is known to be empty,
+        # the second/first argument should also be empty tuple type,
+        # so refine it here
+        return Const(NamedTuple{(),Tuple{}})
     end
     ans = Type{appl}
     for i = length(outervars):-1:1
@@ -1781,8 +1851,8 @@ function arrayset_typecheck(@nospecialize(arytype), @nospecialize(elmtype))
 end
 
 # Query whether the given builtin is guaranteed not to throw given the argtypes
-function _builtin_nothrow(@specialize(lattice::AbstractLattice), @nospecialize(f), argtypes::Array{Any,1}, @nospecialize(rt))
-    ⊑ₗ = ⊑(lattice)
+function _builtin_nothrow(@specialize(𝕃::AbstractLattice), @nospecialize(f), argtypes::Vector{Any}, @nospecialize(rt))
+    ⊑ = Core.Compiler.:⊑(𝕃)
     if f === arrayset
         array_builtin_common_nothrow(argtypes, 4) || return false
         # Additionally check element type compatibility
@@ -1791,73 +1861,81 @@ function _builtin_nothrow(@specialize(lattice::AbstractLattice), @nospecialize(f
         return array_builtin_common_nothrow(argtypes, 3)
     elseif f === Core._expr
         length(argtypes) >= 1 || return false
-        return argtypes[1] ⊑ₗ Symbol
+        return argtypes[1] ⊑ Symbol
     end
 
     # These builtins are not-vararg, so if we have varars, here, we can't guarantee
     # the correct number of arguments.
-    (!isempty(argtypes) && isvarargtype(argtypes[end])) && return false
+    na = length(argtypes)
+    (na ≠ 0 && isvarargtype(argtypes[end])) && return false
     if f === arraysize
-        return arraysize_nothrow(argtypes)
+        na == 2 || return false
+        return arraysize_nothrow(argtypes[1], argtypes[2])
     elseif f === Core._typevar
-        length(argtypes) == 3 || return false
+        na == 3 || return false
         return typevar_nothrow(argtypes[1], argtypes[2], argtypes[3])
     elseif f === invoke
         return false
     elseif f === getfield
         return getfield_nothrow(argtypes)
     elseif f === setfield!
-        return setfield!_nothrow(argtypes)
+        if na == 3
+            return setfield!_nothrow(𝕃, argtypes[1], argtypes[2], argtypes[3])
+        elseif na == 4
+            return setfield!_nothrow(𝕃, argtypes[1], argtypes[2], argtypes[3], argtypes[4])
+        end
+        return false
     elseif f === fieldtype
-        length(argtypes) == 2 || return false
-        return fieldtype_nothrow(argtypes[1], argtypes[2])
+        na == 2 || return false
+        return fieldtype_nothrow(𝕃, argtypes[1], argtypes[2])
     elseif f === apply_type
-        return apply_type_nothrow(lattice, argtypes, rt)
+        return apply_type_nothrow(𝕃, argtypes, rt)
     elseif f === isa
-        length(argtypes) == 2 || return false
-        return argtypes[2] ⊑ₗ Type
+        na == 2 || return false
+        return isa_nothrow(𝕃, nothing, argtypes[2])
     elseif f === (<:)
-        length(argtypes) == 2 || return false
-        return argtypes[1] ⊑ₗ Type && argtypes[2] ⊑ₗ Type
+        na == 2 || return false
+        return subtype_nothrow(𝕃, argtypes[1], argtypes[2])
     elseif f === UnionAll
-        return length(argtypes) == 2 &&
-            (argtypes[1] ⊑ₗ TypeVar && argtypes[2] ⊑ₗ Type)
+        return na == 2 && (argtypes[1] ⊑ TypeVar && argtypes[2] ⊑ Type)
     elseif f === isdefined
-        return isdefined_nothrow(argtypes)
+        na == 2 || return false
+        return isdefined_nothrow(𝕃, argtypes[1], argtypes[2])
     elseif f === Core.sizeof
-        length(argtypes) == 1 || return false
+        na == 1 || return false
         return sizeof_nothrow(argtypes[1])
-    elseif f === Core.kwfunc
-        length(argtypes) == 1 || return false
-        return isa(rt, Const)
     elseif f === Core.ifelse
-        length(argtypes) == 3 || return false
-        return argtypes[1] ⊑ₗ Bool
+        na == 3 || return false
+        return ifelse_nothrow(𝕃, argtypes[1], nothing, nothing)
     elseif f === typeassert
-        length(argtypes) == 2 || return false
-        a3 = argtypes[2]
-        if (isType(a3) && !has_free_typevars(a3) && argtypes[1] ⊑ₗ a3.parameters[1]) ||
-            (isa(a3, Const) && isa(a3.val, Type) && argtypes[1] ⊑ₗ a3.val)
-            return true
+        na == 2 || return false
+        return typeassert_nothrow(𝕃, argtypes[1], argtypes[2])
+    elseif f === getglobal
+        if na == 2
+            return getglobal_nothrow(argtypes[1], argtypes[2])
+        elseif na == 3
+            return getglobal_nothrow(argtypes[1], argtypes[2], argtypes[3])
         end
         return false
-    elseif f === getglobal
-        return getglobal_nothrow(argtypes)
     elseif f === setglobal!
-        return setglobal!_nothrow(argtypes)
+        if na == 3
+            return setglobal!_nothrow(argtypes[1], argtypes[2], argtypes[3])
+        elseif na == 4
+            return setglobal!_nothrow(argtypes[1], argtypes[2], argtypes[3], argtypes[4])
+        end
+        return false
     elseif f === Core.get_binding_type
-        length(argtypes) == 2 || return false
-        return argtypes[1] ⊑ₗ Module && argtypes[2] ⊑ₗ Symbol
+        na == 2 || return false
+        return get_binding_type_nothrow(𝕃, argtypes[1], argtypes[2])
     elseif f === donotdelete
         return true
     elseif f === Core.finalizer
-        2 <= length(argtypes) <= 4 || return false
+        2 <= na <= 4 || return false
         # Core.finalizer does no error checking - that's done in Base.finalizer
         return true
     elseif f === Core.compilerbarrier
-        length(argtypes) == 2 || return false
-        a1 = argtypes[1]
-        return isa(a1, Const) && contains_is((:type, :const, :conditional), a1.val)
+        na == 2 || return false
+        return compilerbarrier_nothrow(argtypes[1], nothing)
     end
     return false
 end
@@ -1869,7 +1947,7 @@ const _PURE_BUILTINS = Any[tuple, svec, ===, typeof, nfields]
 const _EFFECT_FREE_BUILTINS = [
     fieldtype, apply_type, isa, UnionAll,
     getfield, arrayref, const_arrayref, isdefined, Core.sizeof,
-    Core.kwfunc, Core.ifelse, Core._typevar, (<:),
+    Core.ifelse, Core._typevar, (<:),
     typeassert, throw, arraysize, getglobal, compilerbarrier
 ]
 
@@ -1884,7 +1962,6 @@ const _CONSISTENT_BUILTINS = Any[
     isa,
     UnionAll,
     Core.sizeof,
-    Core.kwfunc,
     Core.ifelse,
     (<:),
     typeassert,
@@ -1948,12 +2025,13 @@ const _SPECIAL_BUILTINS = Any[
     Core._apply_iterate,
 ]
 
-function isdefined_effects(argtypes::Vector{Any})
+function isdefined_effects(@specialize(𝕃::AbstractLattice), argtypes::Vector{Any})
     # consistent if the first arg is immutable
-    isempty(argtypes) && return EFFECTS_THROWS
+    na = length(argtypes)
+    na == 0 && return EFFECTS_THROWS
     obj = argtypes[1]
     consistent = is_immutable_argtype(unwrapva(obj)) ? ALWAYS_TRUE : ALWAYS_FALSE
-    nothrow = !isvarargtype(argtypes[end]) && isdefined_nothrow(argtypes)
+    nothrow = !isvarargtype(argtypes[end]) && na == 2 && isdefined_nothrow(𝕃, obj, argtypes[2])
     return Effects(EFFECTS_TOTAL; consistent, nothrow)
 end
 
@@ -1999,21 +2077,24 @@ end
 function getglobal_effects(argtypes::Vector{Any}, @nospecialize(rt))
     consistent = inaccessiblememonly = ALWAYS_FALSE
     nothrow = false
-    if getglobal_nothrow(argtypes)
-        nothrow = true
-        # typeasserts below are already checked in `getglobal_nothrow`
-        M, s = (argtypes[1]::Const).val::Module, (argtypes[2]::Const).val::Symbol
-        if isconst(M, s)
-            consistent = ALWAYS_TRUE
-            if is_mutation_free_argtype(rt)
-                inaccessiblememonly = ALWAYS_TRUE
+    if length(argtypes) ≥ 2
+        M, s = argtypes[1], argtypes[2]
+        if getglobal_nothrow(M, s)
+            nothrow = true
+            # typeasserts below are already checked in `getglobal_nothrow`
+            Mval, sval = (M::Const).val::Module, (s::Const).val::Symbol
+            if isconst(Mval, sval)
+                consistent = ALWAYS_TRUE
+                if is_mutation_free_argtype(rt)
+                    inaccessiblememonly = ALWAYS_TRUE
+                end
             end
         end
     end
     return Effects(EFFECTS_TOTAL; consistent, nothrow, inaccessiblememonly)
 end
 
-function builtin_effects(@specialize(lattice::AbstractLattice), f::Builtin, argtypes::Vector{Any}, @nospecialize(rt))
+function builtin_effects(@specialize(𝕃::AbstractLattice), f::Builtin, argtypes::Vector{Any}, @nospecialize(rt))
     if isa(f, IntrinsicFunction)
         return intrinsic_effects(f, argtypes)
     end
@@ -2021,7 +2102,7 @@ function builtin_effects(@specialize(lattice::AbstractLattice), f::Builtin, argt
     @assert !contains_is(_SPECIAL_BUILTINS, f)
 
     if f === isdefined
-        return isdefined_effects(argtypes)
+        return isdefined_effects(𝕃, argtypes)
     elseif f === getfield
         return getfield_effects(argtypes, rt)
     elseif f === getglobal
@@ -2039,7 +2120,7 @@ function builtin_effects(@specialize(lattice::AbstractLattice), f::Builtin, argt
         else
             effect_free = ALWAYS_FALSE
         end
-        nothrow = (!(!isempty(argtypes) && isvarargtype(argtypes[end])) && builtin_nothrow(lattice, f, argtypes, rt))
+        nothrow = (!(!isempty(argtypes) && isvarargtype(argtypes[end])) && builtin_nothrow(𝕃, f, argtypes, rt))
         if contains_is(_INACCESSIBLEMEM_BUILTINS, f)
             inaccessiblememonly = ALWAYS_TRUE
         elseif contains_is(_ARGMEM_BUILTINS, f)
@@ -2106,6 +2187,9 @@ function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtyp
         # wrong # of args
         return Bottom
     end
+    if f === getfield
+        return getfield_tfunc(typeinf_lattice(interp), argtypes...)
+    end
     return tf[3](argtypes...)
 end
 
@@ -2235,7 +2319,7 @@ function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, s
                 if isa(af_argtype, DataType) && af_argtype <: Tuple
                     argtypes_vec = Any[aft, af_argtype.parameters...]
                     if contains_is(argtypes_vec, Union{})
-                        return CallMeta(Const(Union{}), EFFECTS_TOTAL, false)
+                        return CallMeta(Const(Union{}), EFFECTS_TOTAL, NoCallInfo())
                     end
                     #
                     # Run the abstract_call without restricting abstract call
@@ -2278,7 +2362,7 @@ function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, s
             end
         end
     end
-    return CallMeta(Type, EFFECTS_THROWS, false)
+    return CallMeta(Type, EFFECTS_THROWS, NoCallInfo())
 end
 
 # N.B.: typename maps type equivalence classes to a single value
@@ -2298,12 +2382,11 @@ function global_order_nothrow(@nospecialize(o), loading::Bool, storing::Bool)
     end
     return false
 end
-function getglobal_nothrow(argtypes::Vector{Any})
-    2 ≤ length(argtypes) ≤ 3 || return false
-    if length(argtypes) == 3
-        global_order_nothrow(argtypes[3], #=loading=#true, #=storing=#false) || return false
-    end
-    M, s = argtypes
+function getglobal_nothrow(@nospecialize(M), @nospecialize(s), @nospecialize(o))
+    global_order_nothrow(o, #=loading=#true, #=storing=#false) || return false
+    return getglobal_nothrow(M, s)
+end
+function getglobal_nothrow(@nospecialize(M), @nospecialize(s))
     if M isa Const && s isa Const
         M, s = M.val, s.val
         if M isa Module && s isa Symbol
@@ -2333,15 +2416,16 @@ function setglobal!_tfunc(@nospecialize(M), @nospecialize(s), @nospecialize(v),
 end
 add_tfunc(getglobal, 2, 3, getglobal_tfunc, 1)
 add_tfunc(setglobal!, 3, 4, setglobal!_tfunc, 3)
-function setglobal!_nothrow(argtypes::Vector{Any})
-    3 ≤ length(argtypes) ≤ 4 || return false
-    if length(argtypes) == 4
-        global_order_nothrow(argtypes[4], #=loading=#false, #=storing=#true) || return false
-    end
-    M, s, newty = argtypes
+function setglobal!_nothrow(@nospecialize(M), @nospecialize(s), @nospecialize(newty), @nospecialize(o))
+    global_order_nothrow(o, #=loading=#false, #=storing=#true) || return false
+    return setglobal!_nothrow(M, s, newty)
+end
+function setglobal!_nothrow(@nospecialize(M), @nospecialize(s), @nospecialize(newty))
     if M isa Const && s isa Const
         M, s = M.val, s.val
-        return global_assignment_nothrow(M, s, newty)
+        if isa(M, Module) && isa(s, Symbol)
+            return global_assignment_nothrow(M, s, newty)
+        end
     end
     return false
 end
@@ -2371,6 +2455,11 @@ function get_binding_type_tfunc(@nospecialize(M), @nospecialize(s))
 end
 add_tfunc(Core.get_binding_type, 2, 2, get_binding_type_tfunc, 0)
 
+function get_binding_type_nothrow(@specialize(𝕃::AbstractLattice), @nospecialize(M), @nospecialize(s))
+    ⊑ = Core.Compiler.:⊑(𝕃)
+    return M ⊑ Module && s ⊑ Symbol
+end
+
 # foreigncall
 # ===========
 
diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl
index 6db3c42a6ca54..94ec9bcace94e 100644
--- a/base/compiler/typeinfer.jl
+++ b/base/compiler/typeinfer.jl
@@ -220,9 +220,21 @@ function finish!(interp::AbstractInterpreter, caller::InferenceResult)
     # If we didn't transform the src for caching, we may have to transform
     # it anyway for users like typeinf_ext. Do that here.
     opt = caller.src
-    if opt isa OptimizationState # implies `may_optimize(interp) === true`
+    if opt isa OptimizationState{typeof(interp)} # implies `may_optimize(interp) === true`
         if opt.ir !== nothing
-            caller.src = ir_to_codeinf!(opt)
+            if caller.must_be_codeinf
+                caller.src = ir_to_codeinf!(opt)
+            elseif is_inlineable(opt.src)
+                # TODO: If the CFG is too big, inlining becomes more expensive and if we're going to
+                # use this IR over and over, it's worth simplifying it. Round trips through
+                # CodeInstance do this implicitly, since they recompute the CFG, so try to
+                # match that behavior here.
+                # ir = cfg_simplify!(opt.ir)
+                caller.src = opt.ir
+            else
+                # Not cached and not inlineable - drop the ir
+                caller.src = nothing
+            end
         end
     end
     return caller.src
@@ -255,7 +267,7 @@ function _typeinf(interp::AbstractInterpreter, frame::InferenceState)
     empty!(frames)
     for (caller, _, _) in results
         opt = caller.src
-        if opt isa OptimizationState # implies `may_optimize(interp) === true`
+        if opt isa OptimizationState{typeof(interp)} # implies `may_optimize(interp) === true`
             analyzed = optimize(interp, opt, OptimizationParams(interp), caller)
             if isa(analyzed, ConstAPI)
                 # XXX: The work in ir_to_codeinf! is essentially wasted. The only reason
@@ -355,7 +367,7 @@ function transform_result_for_cache(interp::AbstractInterpreter,
     inferred_result = result.src
     # If we decided not to optimize, drop the OptimizationState now.
     # External interpreters can override as necessary to cache additional information
-    if inferred_result isa OptimizationState
+    if inferred_result isa OptimizationState{typeof(interp)}
         inferred_result = ir_to_codeinf!(inferred_result)
     end
     if inferred_result isa CodeInfo
@@ -925,6 +937,9 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize
         # completely new
         lock_mi_inference(interp, mi)
         result = InferenceResult(mi)
+        if cache === :local
+            result.must_be_codeinf = true # TODO directly keep `opt.ir` for this case
+        end
         frame = InferenceState(result, cache, interp) # always use the cache for edge targets
         if frame === nothing
             # can't get the source for this, so we know nothing
@@ -998,6 +1013,7 @@ function typeinf_frame(interp::AbstractInterpreter, method::Method, @nospecializ
     mi = specialize_method(method, atype, sparams)::MethodInstance
     ccall(:jl_typeinf_timing_begin, Cvoid, ())
     result = InferenceResult(mi)
+    result.must_be_codeinf = true
     frame = InferenceState(result, run_optimizer ? :global : :no, interp)
     frame === nothing && return nothing
     typeinf(interp, frame)
@@ -1056,7 +1072,9 @@ function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance)
         return retrieve_code_info(mi)
     end
     lock_mi_inference(interp, mi)
-    frame = InferenceState(InferenceResult(mi), #=cache=#:global, interp)
+    result = InferenceResult(mi)
+    result.must_be_codeinf = true
+    frame = InferenceState(result, #=cache=#:global, interp)
     frame === nothing && return nothing
     typeinf(interp, frame)
     ccall(:jl_typeinf_timing_end, Cvoid, ())
@@ -1099,6 +1117,7 @@ function typeinf_ext_toplevel(interp::AbstractInterpreter, linfo::MethodInstance
             ccall(:jl_typeinf_timing_begin, Cvoid, ())
             if !src.inferred
                 result = InferenceResult(linfo)
+                result.must_be_codeinf = true
                 frame = InferenceState(result, src, #=cache=#:global, interp)
                 typeinf(interp, frame)
                 @assert frame.inferred # TODO: deal with this better
diff --git a/base/compiler/typelattice.jl b/base/compiler/typelattice.jl
index 5c57443d70656..bcbc722cab266 100644
--- a/base/compiler/typelattice.jl
+++ b/base/compiler/typelattice.jl
@@ -221,11 +221,12 @@ function ⊑(lattice::InferenceLattice, @nospecialize(a), @nospecialize(b))
 end
 
 function ⊑(lattice::OptimizerLattice, @nospecialize(a), @nospecialize(b))
-    if isa(a, MaybeUndef) && !isa(b, MaybeUndef)
-        return false
+    if isa(a, MaybeUndef)
+        isa(b, MaybeUndef) || return false
+        a, b = a.typ, b.typ
+    elseif isa(b, MaybeUndef)
+        b = b.typ
     end
-    isa(a, MaybeUndef) && (a = a.typ)
-    isa(b, MaybeUndef) && (b = b.typ)
     return ⊑(widenlattice(lattice), a, b)
 end
 
@@ -235,15 +236,15 @@ function ⊑(lattice::AnyConditionalsLattice, @nospecialize(a), @nospecialize(b)
     a === Any && return false
     a === Union{} && return true
     b === Union{} && return false
-    T = isa(lattice, ConditionalsLattice) ? Conditional : InterConditional
-    if isa(a, T)
-        if isa(b, T)
+    ConditionalT = isa(lattice, ConditionalsLattice) ? Conditional : InterConditional
+    if isa(a, ConditionalT)
+        if isa(b, ConditionalT)
             return issubconditional(lattice, a, b)
         elseif isa(b, Const) && isa(b.val, Bool)
             return maybe_extract_const_bool(a) === b.val
         end
         a = Bool
-    elseif isa(b, T)
+    elseif isa(b, ConditionalT)
         return false
     end
     return ⊑(widenlattice(lattice), a, b)
@@ -348,7 +349,8 @@ function is_lattice_equal(lattice::OptimizerLattice, @nospecialize(a), @nospecia
 end
 
 function is_lattice_equal(lattice::AnyConditionalsLattice, @nospecialize(a), @nospecialize(b))
-    if isa(a, AnyConditional) || isa(b, AnyConditional)
+    ConditionalT = isa(lattice, ConditionalsLattice) ? Conditional : InterConditional
+    if isa(a, ConditionalT) || isa(b, ConditionalT)
         # TODO: Unwrap these and recurse to is_lattice_equal
         return ⊑(lattice, a, b) && ⊑(lattice, b, a)
     end
@@ -419,7 +421,7 @@ function tmeet(lattice::PartialsLattice, @nospecialize(v), @nospecialize(t::Type
             if isvarargtype(vfi)
                 new_fields[i] = vfi
             else
-                new_fields[i] = tmeet(lattice, vfi, widenconst(getfield_tfunc(t, Const(i))))
+                new_fields[i] = tmeet(lattice, vfi, widenconst(getfield_tfunc(lattice, t, Const(i))))
                 if new_fields[i] === Bottom
                     return Bottom
                 end
diff --git a/base/compiler/typelimits.jl b/base/compiler/typelimits.jl
index 43ce57d6adb5d..61b292718a7e2 100644
--- a/base/compiler/typelimits.jl
+++ b/base/compiler/typelimits.jl
@@ -321,7 +321,7 @@ function issimplertype(lattice::AbstractLattice, @nospecialize(typea), @nospecia
                 bi = (tni.val::Core.TypeName).wrapper
                 is_lattice_equal(lattice, ai, bi) && continue
             end
-            bi = getfield_tfunc(typeb, Const(i))
+            bi = getfield_tfunc(lattice, typeb, Const(i))
             is_lattice_equal(lattice, ai, bi) && continue
             # It is not enough for ai to be simpler than bi: it must exactly equal
             # (for this, an invariant struct field, by contrast to
@@ -468,6 +468,8 @@ function tmerge(lattice::InterConditionalsLattice, @nospecialize(typea), @nospec
         end
         return Bool
     end
+    typea = widenconditional(typea)
+    typeb = widenconditional(typeb)
     return tmerge(widenlattice(lattice), typea, typeb)
 end
 
@@ -490,8 +492,8 @@ function tmerge(lattice::PartialsLattice, @nospecialize(typea), @nospecialize(ty
             fields = Vector{Any}(undef, type_nfields)
             anyrefine = false
             for i = 1:type_nfields
-                ai = getfield_tfunc(typea, Const(i))
-                bi = getfield_tfunc(typeb, Const(i))
+                ai = getfield_tfunc(lattice, typea, Const(i))
+                bi = getfield_tfunc(lattice, typeb, Const(i))
                 ft = fieldtype(aty, i)
                 if is_lattice_equal(lattice, ai, bi) || is_lattice_equal(lattice, ai, ft)
                     # Since ai===bi, the given type has no restrictions on complexity.
@@ -524,10 +526,13 @@ function tmerge(lattice::PartialsLattice, @nospecialize(typea), @nospecialize(ty
             return anyrefine ? PartialStruct(aty, fields) : aty
         end
     end
+
+
     # Don't widen const here - external AbstractInterpreter might insert lattice
     # layers between us and `ConstsLattice`.
-    isa(typea, PartialStruct) && (typea = widenconst(typea))
-    isa(typeb, PartialStruct) && (typeb = widenconst(typeb))
+    wl = widenlattice(lattice)
+    isa(typea, PartialStruct) && (typea = widenlattice(wl, typea))
+    isa(typeb, PartialStruct) && (typeb = widenlattice(wl, typeb))
 
     # type-lattice for PartialOpaque wrapper
     apo = isa(typea, PartialOpaque)
@@ -540,24 +545,27 @@ function tmerge(lattice::PartialsLattice, @nospecialize(typea), @nospecialize(ty
                 typea.parent === typeb.parent)
                 return widenconst(typea)
             end
-            return PartialOpaque(typea.typ, tmerge(typea.env, typeb.env),
+            return PartialOpaque(typea.typ, tmerge(lattice, typea.env, typeb.env),
                 typea.parent, typea.source)
         end
         typea = aty
         typeb = bty
     elseif apo
-        typea = widenconst(typea)
+        typea = widenlattice(wl, typea)
     elseif bpo
-        typeb = widenconst(typeb)
+        typeb = widenlattice(wl, typeb)
     end
 
-    return tmerge(widenlattice(lattice), typea, typeb)
+    return tmerge(wl, typea, typeb)
 end
 
 function tmerge(lattice::ConstsLattice, @nospecialize(typea), @nospecialize(typeb))
     # the equality of the constants can be checked here, but the equivalent check is usually
     # done by `tmerge_fast_path` at earlier lattice stage
-    return tmerge(widenlattice(lattice), widenconst(typea), widenconst(typeb))
+    wl = widenlattice(lattice)
+    (isa(typea, Const) || isa(typea, PartialTypeVar)) && (typea = widenlattice(wl, typea))
+    (isa(typeb, Const) || isa(typeb, PartialTypeVar)) && (typeb = widenlattice(wl, typeb))
+    return tmerge(wl, typea, typeb)
 end
 
 function tmerge(::JLTypeLattice, @nospecialize(typea::Type), @nospecialize(typeb::Type))
diff --git a/base/compiler/types.jl b/base/compiler/types.jl
index ac36475b8c603..37f8b5a23bbf6 100644
--- a/base/compiler/types.jl
+++ b/base/compiler/types.jl
@@ -41,18 +41,19 @@ mutable struct InferenceResult
     argtypes::Vector{Any}
     overridden_by_const::BitVector
     result                   # ::Type, or InferenceState if WIP
-    src                      # ::Union{CodeInfo, OptimizationState} if inferred copy is available, nothing otherwise
+    src                      # ::Union{CodeInfo, IRCode, OptimizationState} if inferred copy is available, nothing otherwise
     valid_worlds::WorldRange # if inference and optimization is finished
     ipo_effects::Effects     # if inference is finished
     effects::Effects         # if optimization is finished
     argescapes               # ::ArgEscapeCache if optimized, nothing otherwise
+    must_be_codeinf::Bool    # if this must come out as CodeInfo or leaving it as IRCode is ok
     # NOTE the main constructor is defined within inferencestate.jl
     global function _InferenceResult(
         linfo::MethodInstance,
         arginfo#=::Union{Nothing,Tuple{ArgInfo,InferenceState}}=#)
         argtypes, overridden_by_const = matching_cache_argtypes(linfo, arginfo)
         return new(linfo, argtypes, overridden_by_const, Any, nothing,
-            WorldRange(), Effects(), Effects(), nothing)
+            WorldRange(), Effects(), Effects(), nothing, true)
     end
 end
 
@@ -291,4 +292,18 @@ infer_compilation_signature(::NativeInterpreter) = true
 
 typeinf_lattice(::AbstractInterpreter) = InferenceLattice(BaseInferenceLattice.instance)
 ipo_lattice(::AbstractInterpreter) = InferenceLattice(IPOResultLattice.instance)
-optimizer_lattice(::AbstractInterpreter) = OptimizerLattice()
+optimizer_lattice(::AbstractInterpreter) = OptimizerLattice(BaseInferenceLattice.instance)
+
+abstract type CallInfo end
+
+@nospecialize
+
+nsplit(info::CallInfo) = nsplit_impl(info)::Union{Nothing,Int}
+getsplit(info::CallInfo, idx::Int) = getsplit_impl(info, idx)::MethodLookupResult
+getresult(info::CallInfo, idx::Int) = getresult_impl(info, idx)
+
+nsplit_impl(::CallInfo) = nothing
+getsplit_impl(::CallInfo, ::Int) = error("unexpected call into `getsplit`")
+getresult_impl(::CallInfo, ::Int) = nothing
+
+@specialize
diff --git a/base/compiler/typeutils.jl b/base/compiler/typeutils.jl
index d2992fc6113ba..209d899dc8762 100644
--- a/base/compiler/typeutils.jl
+++ b/base/compiler/typeutils.jl
@@ -24,12 +24,12 @@ function hasuniquerep(@nospecialize t)
 end
 
 """
-    isTypeDataType(@nospecialize t)
+    isTypeDataType(@nospecialize t) -> Bool
 
 For a type `t` test whether ∀S s.t. `isa(S, rewrap_unionall(Type{t}, ...))`,
 we have `isa(S, DataType)`. In particular, if a statement is typed as `Type{t}`
-(potentially wrapped in some UnionAll), then we are guaranteed that this statement
-will be a DataType at runtime (and not e.g. a Union or UnionAll typeequal to it).
+(potentially wrapped in some `UnionAll`), then we are guaranteed that this statement
+will be a `DataType` at runtime (and not e.g. a `Union` or `UnionAll` typeequal to it).
 """
 function isTypeDataType(@nospecialize t)
     isa(t, DataType) || return false
@@ -41,7 +41,7 @@ function isTypeDataType(@nospecialize t)
         # e.g. `Tuple{Union{Int, Float64}, Int}` is a DataType, but
         # `Union{Tuple{Int, Int}, Tuple{Float64, Int}}` is typeequal to it and
         # is not.
-        return _all(isTypeDataType, t.parameters)
+        return all(isTypeDataType, t.parameters)
     end
     return true
 end
@@ -80,7 +80,7 @@ end
 # (therefore also a lower bound on the number of fields)
 function datatype_min_ninitialized(t::DataType)
     isabstracttype(t) && return 0
-    if t.name === NamedTuple_typename
+    if t.name === _NAMEDTUPLE_NAME
         names, types = t.parameters[1], t.parameters[2]
         if names isa Tuple
             return length(names)
diff --git a/base/compiler/utilities.jl b/base/compiler/utilities.jl
index e71597c68bdb2..e049003dbc09e 100644
--- a/base/compiler/utilities.jl
+++ b/base/compiler/utilities.jl
@@ -146,7 +146,7 @@ end
 
 function get_compileable_sig(method::Method, @nospecialize(atype), sparams::SimpleVector)
     isa(atype, DataType) || return nothing
-    mt = ccall(:jl_method_table_for, Any, (Any,), atype)
+    mt = ccall(:jl_method_get_table, Any, (Any,), method)
     mt === nothing && return nothing
     return ccall(:jl_normalize_to_compilable_sig, Any, (Any, Any, Any, Any),
         mt, atype, sparams, method)
@@ -337,6 +337,16 @@ function foreachssa(@specialize(f), @nospecialize(stmt))
     end
 end
 
+function foreach_anyssa(@specialize(f), @nospecialize(stmt))
+    urs = userefs(stmt)
+    for op in urs
+        val = op[]
+        if isa(val, AnySSAValue)
+            f(val)
+        end
+    end
+end
+
 function find_ssavalue_uses(body::Vector{Any}, nvals::Int)
     uses = BitSet[ BitSet() for i = 1:nvals ]
     for line in 1:length(body)
@@ -442,38 +452,6 @@ end
 @inline slot_id(s) = isa(s, SlotNumber) ? (s::SlotNumber).id :
     isa(s, Argument) ? (s::Argument).n : (s::TypedSlot).id
 
-######################
-# IncrementalCompact #
-######################
-
-# specifically meant to be used with body1 = compact.result and body2 = compact.new_new_nodes, with nvals == length(compact.used_ssas)
-function find_ssavalue_uses1(compact)
-    body1, body2 = compact.result.inst, compact.new_new_nodes.stmts.inst
-    nvals = length(compact.used_ssas)
-    nbody1 = length(body1)
-    nbody2 = length(body2)
-
-    uses = zeros(Int, nvals)
-    function increment_uses(ssa::SSAValue)
-        uses[ssa.id] += 1
-    end
-
-    for line in 1:(nbody1 + nbody2)
-        # index into the right body
-        if line <= nbody1
-            isassigned(body1, line) || continue
-            e = body1[line]
-        else
-            line -= nbody1
-            isassigned(body2, line) || continue
-            e = body2[line]
-        end
-
-        foreachssa(increment_uses, e)
-    end
-    return uses
-end
-
 ###########
 # options #
 ###########
diff --git a/base/deprecated.jl b/base/deprecated.jl
index 87fc670cd594a..6953cd600cacd 100644
--- a/base/deprecated.jl
+++ b/base/deprecated.jl
@@ -167,11 +167,8 @@ function firstcaller(bt::Vector, funcsyms)
             if !found
                 li = lkup.linfo
                 if li isa Core.MethodInstance
-                    ft = ccall(:jl_first_argument_datatype, Any, (Any,), (li.def::Method).sig)
-                    if isType(ft)
-                        ft = unwrap_unionall(ft.parameters[1])
-                        found = (isa(ft, DataType) && ft.name.name in funcsyms)
-                    end
+                    def = li.def
+                    found = def isa Method && def.name in funcsyms
                 end
             end
         end
@@ -336,4 +333,34 @@ function setproperty!(ci::CodeInfo, s::Symbol, v)
     return setfield!(ci, s, convert(fieldtype(CodeInfo, s), v))
 end
 
+@eval Threads nthreads() = threadpoolsize()
+
+@eval Threads begin
+    """
+        resize_nthreads!(A, copyvalue=A[1])
+
+    Resize the array `A` to length [`nthreads()`](@ref).   Any new
+    elements that are allocated are initialized to `deepcopy(copyvalue)`,
+    where `copyvalue` defaults to `A[1]`.
+
+    This is typically used to allocate per-thread variables, and
+    should be called in `__init__` if `A` is a global constant.
+
+    !!! warning
+
+        This function is deprecated, since as of Julia v1.9 the number of
+        threads can change at run time. Instead, per-thread state should be
+        created as needed based on the thread id of the caller.
+    """
+    function resize_nthreads!(A::AbstractVector, copyvalue=A[1])
+        nthr = nthreads()
+        nold = length(A)
+        resize!(A, nthr)
+        for i = nold+1:nthr
+            A[i] = deepcopy(copyvalue)
+        end
+        return A
+    end
+end
+
 # END 1.9 deprecations
diff --git a/base/docs/basedocs.jl b/base/docs/basedocs.jl
index 2286b3500ee0f..df8a3d0b98aba 100644
--- a/base/docs/basedocs.jl
+++ b/base/docs/basedocs.jl
@@ -1750,7 +1750,7 @@ A symbol in the current scope is not defined.
 # Examples
 ```jldoctest
 julia> a
-ERROR: UndefVarError: a not defined
+ERROR: UndefVarError: `a` not defined
 
 julia> a = 1;
 
@@ -1773,7 +1773,7 @@ julia> function my_func(;my_arg)
 my_func (generic function with 1 method)
 
 julia> my_func()
-ERROR: UndefKeywordError: keyword argument my_arg not assigned
+ERROR: UndefKeywordError: keyword argument `my_arg` not assigned
 Stacktrace:
  [1] my_func() at ./REPL[1]:2
  [2] top-level scope at REPL[2]:1
diff --git a/base/error.jl b/base/error.jl
index 4459e54def19b..07f66aa5cf6d2 100644
--- a/base/error.jl
+++ b/base/error.jl
@@ -162,7 +162,7 @@ end
 ## keyword arg lowering generates calls to this ##
 function kwerr(kw, args::Vararg{Any,N}) where {N}
     @noinline
-    throw(MethodError(typeof(args[1]).name.mt.kwsorter, (kw,args...)))
+    throw(MethodError(Core.kwcall, (kw, args...)))
 end
 
 ## system error handling ##
diff --git a/base/errorshow.jl b/base/errorshow.jl
index 2d9ada0ff29cb..16190d64e01e4 100644
--- a/base/errorshow.jl
+++ b/base/errorshow.jl
@@ -157,10 +157,10 @@ showerror(io::IO, ex::AssertionError) = print(io, "AssertionError: ", ex.msg)
 showerror(io::IO, ex::OverflowError) = print(io, "OverflowError: ", ex.msg)
 
 showerror(io::IO, ex::UndefKeywordError) =
-    print(io, "UndefKeywordError: keyword argument $(ex.var) not assigned")
+    print(io, "UndefKeywordError: keyword argument `$(ex.var)` not assigned")
 
 function showerror(io::IO, ex::UndefVarError)
-    print(io, "UndefVarError: $(ex.var) not defined")
+    print(io, "UndefVarError: `$(ex.var)` not defined")
     Experimental.show_error_hints(io, ex)
 end
 
@@ -235,17 +235,16 @@ function showerror(io::IO, ex::MethodError)
     show_candidates = true
     print(io, "MethodError: ")
     ft = typeof(f)
-    name = ft.name.mt.name
     f_is_function = false
     kwargs = ()
-    if endswith(string(ft.name.name), "##kw")
-        f = ex.args[2]
+    if f === Core.kwcall && !is_arg_types
+        f = (ex.args::Tuple)[2]
         ft = typeof(f)
-        name = ft.name.mt.name
         arg_types_param = arg_types_param[3:end]
         kwargs = pairs(ex.args[1])
         ex = MethodError(f, ex.args[3:end::Int])
     end
+    name = ft.name.mt.name
     if f === Base.convert && length(arg_types_param) == 2 && !is_arg_types
         f_is_function = true
         show_convert_error(io, ex, arg_types_param)
@@ -794,11 +793,6 @@ function show_backtrace(io::IO, t::Vector)
 end
 
 
-function is_kw_sorter_name(name::Symbol)
-    sn = string(name)
-    return !startswith(sn, '#') && endswith(sn, "##kw")
-end
-
 # For improved user experience, filter out frames for include() implementation
 # - see #33065. See also #35371 for extended discussion of internal frames.
 function _simplify_include_frames(trace)
@@ -850,15 +844,26 @@ function process_backtrace(t::Vector, limit::Int=typemax(Int); skipC = true)
                 continue
             end
 
-            if (lkup.from_c && skipC) || is_kw_sorter_name(lkup.func)
+            if (lkup.from_c && skipC)
                 continue
             end
+            code = lkup.linfo
+            if code isa MethodInstance
+                def = code.def
+                if def isa Method
+                    if def.name === :kwcall && def.module === Core
+                        continue
+                    end
+                end
+            elseif !lkup.from_c
+                lkup.func === :kwcall && continue
+            end
             count += 1
             if count > limit
                 break
             end
 
-            if lkup.file != last_frame.file || lkup.line != last_frame.line || lkup.func != last_frame.func || lkup.linfo !== lkup.linfo
+            if lkup.file != last_frame.file || lkup.line != last_frame.line || lkup.func != last_frame.func || lkup.linfo !== last_frame.linfo
                 if n > 0
                     push!(ret, (last_frame, n))
                 end
diff --git a/base/essentials.jl b/base/essentials.jl
index daee352b7649d..d33aca52073fa 100644
--- a/base/essentials.jl
+++ b/base/essentials.jl
@@ -809,7 +809,7 @@ function invokelatest(@nospecialize(f), @nospecialize args...; kwargs...)
     if isempty(kwargs)
         return Core._call_latest(f, args...)
     end
-    return Core._call_latest(Core.kwfunc(f), kwargs, f, args...)
+    return Core._call_latest(Core.kwcall, kwargs, f, args...)
 end
 
 """
@@ -843,7 +843,7 @@ function invoke_in_world(world::UInt, @nospecialize(f), @nospecialize args...; k
     if isempty(kwargs)
         return Core._call_in_world(world, f, args...)
     end
-    return Core._call_in_world(world, Core.kwfunc(f), kwargs, f, args...)
+    return Core._call_in_world(world, Core.kwcall, kwargs, f, args...)
 end
 
 inferencebarrier(@nospecialize(x)) = compilerbarrier(:type, x)
diff --git a/base/intfuncs.jl b/base/intfuncs.jl
index 823deee94f173..168c9f1e3b7ad 100644
--- a/base/intfuncs.jl
+++ b/base/intfuncs.jl
@@ -904,7 +904,7 @@ end
 """
     hastypemax(T::Type) -> Bool
 
-Return true if and only if the extrema `typemax(T)` and `typemin(T)` are defined.
+Return `true` if and only if the extrema `typemax(T)` and `typemin(T)` are defined.
 """
 hastypemax(::Base.BitIntegerType) = true
 hastypemax(::Type{Bool}) = true
diff --git a/base/irrationals.jl b/base/irrationals.jl
index 3c4a422a74147..72341fea71690 100644
--- a/base/irrationals.jl
+++ b/base/irrationals.jl
@@ -174,6 +174,14 @@ and arbitrary-precision definition in terms of `BigFloat`s given by the expressi
 An `AssertionError` is thrown when either `big(def) isa BigFloat` or `Float64(val) == Float64(def)`
 returns `false`.
 
+!!! warning
+    This macro should not be used outside of `Base` Julia.
+
+    The macro creates a new type `Irrational{:sym}` regardless of where it's invoked. This can
+    lead to conflicting definitions if two packages define an irrational number with the same
+    name but different values.
+
+
 # Examples
 ```jldoctest
 julia> Base.@irrational(twoπ, 6.2831853071795864769, 2*big(π))
diff --git a/base/loading.jl b/base/loading.jl
index 6df28abbd9dbb..2c2b1ebc74462 100644
--- a/base/loading.jl
+++ b/base/loading.jl
@@ -354,8 +354,7 @@ Pkg [44cfe95a-1eb2-52ea-b672-e2afdf69b78f]
 julia> using LinearAlgebra
 
 julia> Base.identify_package(LinearAlgebra, "Pkg") # Pkg is not a dependency of LinearAlgebra
-
-````
+```
 """
 identify_package(where::Module, name::String) = _nothing_or_first(identify_package_env(where, name))
 identify_package(where::PkgId, name::String)  = _nothing_or_first(identify_package_env(where, name))
@@ -1593,8 +1592,27 @@ end
 """
     evalfile(path::AbstractString, args::Vector{String}=String[])
 
-Load the file using [`include`](@ref), evaluate all expressions,
-and return the value of the last one.
+Load the file into an anonymous module using [`include`](@ref), evaluate all expressions,
+and return the value of the last expression.
+The optional `args` argument can be used to set the input arguments of the script (i.e. the global `ARGS` variable).
+Note that definitions (e.g. methods, globals) are evaluated in the anonymous module and do not affect the current module.
+
+# Example
+
+```jldoctest
+julia> write("testfile.jl", \"\"\"
+           @show ARGS
+           1 + 1
+       \"\"\");
+
+julia> x = evalfile("testfile.jl", ["ARG1", "ARG2"]);
+ARGS = ["ARG1", "ARG2"]
+
+julia> x
+2
+
+julia> rm("testfile.jl")
+```
 """
 function evalfile(path::AbstractString, args::Vector{String}=String[])
     return Core.eval(Module(:__anon__),
diff --git a/base/logging.jl b/base/logging.jl
index 809a9368d95bd..d7dc45122e063 100644
--- a/base/logging.jl
+++ b/base/logging.jl
@@ -42,7 +42,7 @@ function handle_message end
 """
     shouldlog(logger, level, _module, group, id)
 
-Return true when `logger` accepts a message at `level`, generated for
+Return `true` when `logger` accepts a message at `level`, generated for
 `_module`, `group` and with unique log identifier `id`.
 """
 function shouldlog end
@@ -58,7 +58,7 @@ function min_enabled_level end
 """
     catch_exceptions(logger)
 
-Return true if the logger should catch exceptions which happen during log
+Return `true` if the logger should catch exceptions which happen during log
 record construction.  By default, messages are caught
 
 By default all exceptions are caught to prevent log message generation from
diff --git a/base/meta.jl b/base/meta.jl
index c9bad2bb8a4a5..b0e0dc371b26c 100644
--- a/base/meta.jl
+++ b/base/meta.jl
@@ -48,7 +48,7 @@ quot(ex) = Expr(:quote, ex)
 """
     Meta.isexpr(ex, head[, n])::Bool
 
-Return true if `ex` is an `Expr` with the given type `head` and optionally that
+Return `true` if `ex` is an `Expr` with the given type `head` and optionally that
 the argument list is of length `n`. `head` may be a `Symbol` or collection of
 `Symbol`s. For example, to check that a macro was passed a function call
 expression, you might use `isexpr(ex, :call)`.
diff --git a/base/methodshow.jl b/base/methodshow.jl
index 4bd29f75c361d..83c5421530956 100644
--- a/base/methodshow.jl
+++ b/base/methodshow.jl
@@ -78,12 +78,8 @@ end
 
 # NOTE: second argument is deprecated and is no longer used
 function kwarg_decl(m::Method, kwtype = nothing)
-    if m.sig === Tuple # OpaqueClosure
-        return Symbol[]
-    end
-    mt = get_methodtable(m)
-    if isdefined(mt, :kwsorter)
-        kwtype = typeof(mt.kwsorter)
+    if m.sig !== Tuple # OpaqueClosure or Builtin
+        kwtype = typeof(Core.kwcall)
         sig = rewrap_unionall(Tuple{kwtype, Any, (unwrap_unionall(m.sig)::DataType).parameters...}, m.sig)
         kwli = ccall(:jl_methtable_lookup, Any, (Any, Any, UInt), kwtype.name.mt, sig, get_world_counter())
         if kwli !== nothing
@@ -164,7 +160,7 @@ functionloc(m::Core.MethodInstance) = functionloc(m.def)
 """
     functionloc(m::Method)
 
-Returns a tuple `(filename,line)` giving the location of a `Method` definition.
+Return a tuple `(filename,line)` giving the location of a `Method` definition.
 """
 function functionloc(m::Method)
     file, ln = updated_methodloc(m)
@@ -177,7 +173,7 @@ end
 """
     functionloc(f::Function, types)
 
-Returns a tuple `(filename,line)` giving the location of a generic `Function` definition.
+Return a tuple `(filename,line)` giving the location of a generic `Function` definition.
 """
 functionloc(@nospecialize(f), @nospecialize(types)) = functionloc(which(f,types))
 
diff --git a/base/multimedia.jl b/base/multimedia.jl
index 308cc07a05a53..e634a19b7d6aa 100644
--- a/base/multimedia.jl
+++ b/base/multimedia.jl
@@ -57,7 +57,7 @@ print(io::IO, ::MIME{mime}) where {mime} = print(io, mime)
 """
     showable(mime, x)
 
-Returns a boolean value indicating whether or not the object `x` can be written
+Return a boolean value indicating whether or not the object `x` can be written
 as the given `mime` type.
 
 (By default, this is determined automatically by the existence of the
@@ -125,7 +125,7 @@ show(io::IO, m::AbstractString, x) = show(io, MIME(m), x)
 """
     repr(mime, x; context=nothing)
 
-Returns an `AbstractString` or `Vector{UInt8}` containing the representation of
+Return an `AbstractString` or `Vector{UInt8}` containing the representation of
 `x` in the requested `mime` type, as written by [`show(io, mime, x)`](@ref) (throwing a
 [`MethodError`](@ref) if no appropriate `show` is available). An `AbstractString` is
 returned for MIME types with textual representations (such as `"text/html"` or
@@ -232,7 +232,7 @@ display(mime::AbstractString, @nospecialize x) = display(MIME(mime), x)
     displayable(mime) -> Bool
     displayable(d::AbstractDisplay, mime) -> Bool
 
-Returns a boolean value indicating whether the given `mime` type (string) is displayable by
+Return a boolean value indicating whether the given `mime` type (string) is displayable by
 any of the displays in the current display stack, or specifically by the display `d` in the
 second variant.
 """
@@ -244,7 +244,7 @@ displayable(mime::AbstractString) = displayable(MIME(mime))
 """
     TextDisplay(io::IO)
 
-Returns a `TextDisplay <: AbstractDisplay`, which displays any object as the text/plain MIME type
+Return a `TextDisplay <: AbstractDisplay`, which displays any object as the text/plain MIME type
 (by default), writing the text representation to the given I/O stream. (This is how
 objects are printed in the Julia REPL.)
 """
diff --git a/base/namedtuple.jl b/base/namedtuple.jl
index 3e9f1272d588e..c994cd977be08 100644
--- a/base/namedtuple.jl
+++ b/base/namedtuple.jl
@@ -111,7 +111,7 @@ function NamedTuple{names}(nt::NamedTuple) where {names}
         types = Tuple{(fieldtype(nt, idx[n]) for n in 1:length(idx))...}
         Expr(:new, :(NamedTuple{names, $types}), Any[ :(getfield(nt, $(idx[n]))) for n in 1:length(idx) ]...)
     else
-        length_names = length(names)::Integer
+        length_names = length(names::Tuple)
         types = Tuple{(fieldtype(typeof(nt), names[n]) for n in 1:length_names)...}
         NamedTuple{names, types}(map(Fix1(getfield, nt), names))
     end
@@ -335,7 +335,7 @@ reverse(nt::NamedTuple) = NamedTuple{reverse(keys(nt))}(reverse(values(nt)))
 end
 
 """
-    structdiff(a::NamedTuple{an}, b::Union{NamedTuple{bn},Type{NamedTuple{bn}}}) where {an,bn}
+    structdiff(a::NamedTuple, b::Union{NamedTuple,Type{NamedTuple}})
 
 Construct a copy of named tuple `a`, except with fields that exist in `b` removed.
 `b` can be a named tuple, or a type of the form `NamedTuple{field_names}`.
@@ -343,14 +343,19 @@ Construct a copy of named tuple `a`, except with fields that exist in `b` remove
 function structdiff(a::NamedTuple{an}, b::Union{NamedTuple{bn}, Type{NamedTuple{bn}}}) where {an, bn}
     if @generated
         names = diff_names(an, bn)
+        isempty(names) && return (;) # just a fast pass
         idx = Int[ fieldindex(a, names[n]) for n in 1:length(names) ]
         types = Tuple{Any[ fieldtype(a, idx[n]) for n in 1:length(idx) ]...}
         vals = Any[ :(getfield(a, $(idx[n]))) for n in 1:length(idx) ]
-        :( NamedTuple{$names,$types}(($(vals...),)) )
+        return :( NamedTuple{$names,$types}(($(vals...),)) )
     else
         names = diff_names(an, bn)
+        # N.B this early return is necessary to get a better type stability,
+        # and also allows us to cut off the cost from constructing
+        # potentially type unstable closure passed to the `map` below
+        isempty(names) && return (;)
         types = Tuple{Any[ fieldtype(typeof(a), names[n]) for n in 1:length(names) ]...}
-        NamedTuple{names,types}(map(Fix1(getfield, a), names))
+        return NamedTuple{names,types}(map(n::Symbol->getfield(a, n), names))
     end
 end
 
@@ -418,7 +423,7 @@ macro NamedTuple(ex)
     return :(NamedTuple{($(vars...),), Tuple{$(types...)}})
 end
 
-function split_rest(t::NamedTuple{names}, n::Int, st...) where {names}
+@constprop :aggressive function split_rest(t::NamedTuple{names}, n::Int, st...) where {names}
     _check_length_split_rest(length(t), n)
     names_front, names_last_n = split_rest(names, n, st...)
     return NamedTuple{names_front}(t), NamedTuple{names_last_n}(t)
diff --git a/base/number.jl b/base/number.jl
index c90e2ce4a3875..31aa616b0eb55 100644
--- a/base/number.jl
+++ b/base/number.jl
@@ -115,7 +115,7 @@ copy(x::Number) = x # some code treats numbers as collection-like
 """
     signbit(x)
 
-Returns `true` if the value of the sign of `x` is negative, otherwise `false`.
+Return `true` if the value of the sign of `x` is negative, otherwise `false`.
 
 See also [`sign`](@ref) and [`copysign`](@ref).
 
@@ -352,7 +352,7 @@ one(x::T) where {T<:Number} = one(T)
     oneunit(x::T)
     oneunit(T::Type)
 
-Returns `T(one(x))`, where `T` is either the type of the argument or
+Return `T(one(x))`, where `T` is either the type of the argument or
 (if a type is passed) the argument.  This differs from [`one`](@ref) for
 dimensionful quantities: `one` is dimensionless (a multiplicative identity)
 while `oneunit` is dimensionful (of the same type as `x`, or of type `T`).
diff --git a/base/operators.jl b/base/operators.jl
index 6523b3716d1d1..acc5f9ba4fb01 100644
--- a/base/operators.jl
+++ b/base/operators.jl
@@ -969,15 +969,21 @@ julia> map(uppercase∘first, ["apple", "banana", "carrot"])
  'B': ASCII/Unicode U+0042 (category Lu: Letter, uppercase)
  'C': ASCII/Unicode U+0043 (category Lu: Letter, uppercase)
 
+julia> (==(6)∘length).(["apple", "banana", "carrot"])
+3-element BitVector:
+ 0
+ 1
+ 1
+
 julia> fs = [
            x -> 2x
-           x -> x/2
            x -> x-1
+           x -> x/2
            x -> x+1
        ];
 
 julia> ∘(fs...)(3)
-3.0
+2.0
 ```
 See also [`ComposedFunction`](@ref), [`!f::Function`](@ref).
 """
diff --git a/base/partr.jl b/base/partr.jl
index a4cfcb60fe520..c5bb6603d53af 100644
--- a/base/partr.jl
+++ b/base/partr.jl
@@ -2,7 +2,7 @@
 
 module Partr
 
-using ..Threads: SpinLock, nthreads, threadid
+using ..Threads: SpinLock, maxthreadid, threadid
 
 # a task minheap
 mutable struct taskheap
diff --git a/base/pcre.jl b/base/pcre.jl
index d689e9be29113..7597c1217ca9e 100644
--- a/base/pcre.jl
+++ b/base/pcre.jl
@@ -29,7 +29,7 @@ THREAD_MATCH_CONTEXTS::Vector{Ptr{Cvoid}} = [C_NULL]
 PCRE_COMPILE_LOCK = nothing
 
 _tid() = Int(ccall(:jl_threadid, Int16, ())) + 1
-_nth() = Int(unsafe_load(cglobal(:jl_n_threads, Cint)))
+_mth() = Int(Core.Intrinsics.atomic_pointerref(cglobal(:jl_n_threads, Cint), :acquire))
 
 function get_local_match_context()
     tid = _tid()
@@ -41,7 +41,7 @@ function get_local_match_context()
         try
             ctxs = THREAD_MATCH_CONTEXTS
             if length(ctxs) < tid
-                global THREAD_MATCH_CONTEXTS = ctxs = copyto!(fill(C_NULL, _nth()), ctxs)
+                global THREAD_MATCH_CONTEXTS = ctxs = copyto!(fill(C_NULL, length(ctxs) + _mth()), ctxs)
             end
         finally
             unlock(l)
diff --git a/base/reduce.jl b/base/reduce.jl
index a7f821a73be92..9df2171a96fd1 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -530,7 +530,7 @@ sum(f, a; kw...) = mapreduce(f, add_sum, a; kw...)
 """
     sum(itr; [init])
 
-Returns the sum of all elements in a collection.
+Return the sum of all elements in a collection.
 
 The return type is `Int` for signed integers of less than system word size, and
 `UInt` for unsigned integers of less than system word size.  For all other
@@ -562,7 +562,7 @@ sum(a::AbstractArray{Bool}; kw...) =
 """
     prod(f, itr; [init])
 
-Returns the product of `f` applied to each element of `itr`.
+Return the product of `f` applied to each element of `itr`.
 
 The return type is `Int` for signed integers of less than system word size, and
 `UInt` for unsigned integers of less than system word size.  For all other
@@ -586,7 +586,7 @@ prod(f, a; kw...) = mapreduce(f, mul_prod, a; kw...)
 """
     prod(itr; [init])
 
-Returns the product of all elements of a collection.
+Return the product of all elements of a collection.
 
 The return type is `Int` for signed integers of less than system word size, and
 `UInt` for unsigned integers of less than system word size.  For all other
@@ -673,7 +673,7 @@ end
 """
     maximum(f, itr; [init])
 
-Returns the largest result of calling function `f` on each element of `itr`.
+Return the largest result of calling function `f` on each element of `itr`.
 
 The value returned for empty `itr` can be specified by `init`. It must be
 a neutral element for `max` (i.e. which is less than or equal to any
@@ -700,7 +700,7 @@ maximum(f, a; kw...) = mapreduce(f, max, a; kw...)
 """
     minimum(f, itr; [init])
 
-Returns the smallest result of calling function `f` on each element of `itr`.
+Return the smallest result of calling function `f` on each element of `itr`.
 
 The value returned for empty `itr` can be specified by `init`. It must be
 a neutral element for `min` (i.e. which is greater than or equal to any
@@ -727,7 +727,7 @@ minimum(f, a; kw...) = mapreduce(f, min, a; kw...)
 """
     maximum(itr; [init])
 
-Returns the largest element in a collection.
+Return the largest element in a collection.
 
 The value returned for empty `itr` can be specified by `init`. It must be
 a neutral element for `max` (i.e. which is less than or equal to any
@@ -759,7 +759,7 @@ maximum(a; kw...) = mapreduce(identity, max, a; kw...)
 """
     minimum(itr; [init])
 
-Returns the smallest element in a collection.
+Return the smallest element in a collection.
 
 The value returned for empty `itr` can be specified by `init`. It must be
 a neutral element for `min` (i.e. which is greater than or equal to any
@@ -870,7 +870,7 @@ end
 """
     findmax(f, domain) -> (f(x), index)
 
-Returns a pair of a value in the codomain (outputs of `f`) and the index of
+Return a pair of a value in the codomain (outputs of `f`) and the index of
 the corresponding value in the `domain` (inputs to `f`) such that `f(x)` is maximised.
 If there are multiple maximal points, then the first one will be returned.
 
@@ -929,7 +929,7 @@ _findmax(a, ::Colon) = findmax(identity, a)
 """
     findmin(f, domain) -> (f(x), index)
 
-Returns a pair of a value in the codomain (outputs of `f`) and the index of
+Return a pair of a value in the codomain (outputs of `f`) and the index of
 the corresponding value in the `domain` (inputs to `f`) such that `f(x)` is minimised.
 If there are multiple minimal points, then the first one will be returned.
 
diff --git a/base/reflection.jl b/base/reflection.jl
index d6c044103d0bc..4313f03f2b8d1 100644
--- a/base/reflection.jl
+++ b/base/reflection.jl
@@ -100,7 +100,9 @@ since it is not idiomatic to explicitly export names from `Main`.
 See also: [`@locals`](@ref Base.@locals), [`@__MODULE__`](@ref).
 """
 names(m::Module; all::Bool = false, imported::Bool = false) =
-    sort!(ccall(:jl_module_names, Array{Symbol,1}, (Any, Cint, Cint), m, all, imported))
+    sort!(unsorted_names(m; all, imported))
+unsorted_names(m::Module; all::Bool = false, imported::Bool = false) =
+    ccall(:jl_module_names, Array{Symbol,1}, (Any, Cint, Cint), m, all, imported)
 
 isexported(m::Module, s::Symbol) = ccall(:jl_module_exports_p, Cint, (Any, Any), m, s) != 0
 isdeprecated(m::Module, s::Symbol) = ccall(:jl_is_binding_deprecated, Cint, (Any, Any), m, s) != 0
@@ -119,10 +121,10 @@ function resolve(g::GlobalRef; force::Bool=false)
     return g
 end
 
-const NamedTuple_typename = NamedTuple.body.body.name
+const _NAMEDTUPLE_NAME = NamedTuple.body.body.name
 
 function _fieldnames(@nospecialize t)
-    if t.name === NamedTuple_typename
+    if t.name === _NAMEDTUPLE_NAME
         if t.parameters[1] isa Tuple
             return t.parameters[1]
         else
@@ -498,8 +500,8 @@ end
     ismutable(v) -> Bool
 
 Return `true` if and only if value `v` is mutable.  See [Mutable Composite Types](@ref)
-for a discussion of immutability. Note that this function works on values, so if you give it
-a type, it will tell you that a value of `DataType` is mutable.
+for a discussion of immutability. Note that this function works on values, so if you
+give it a `DataType`, it will tell you that a value of the type is mutable.
 
 See also [`isbits`](@ref), [`isstructtype`](@ref).
 
@@ -797,7 +799,7 @@ function fieldcount(@nospecialize t)
     if !(t isa DataType)
         throw(TypeError(:fieldcount, DataType, t))
     end
-    if t.name === NamedTuple_typename
+    if t.name === _NAMEDTUPLE_NAME
         names, types = t.parameters[1], t.parameters[2]
         if names isa Tuple
             return length(names)
@@ -864,7 +866,7 @@ function to_tuple_type(@nospecialize(t))
     if isa(t, Type) && t <: Tuple
         for p in unwrap_unionall(t).parameters
             if isa(p, Core.TypeofVararg)
-                p = p.T
+                p = unwrapva(p)
             end
             if !(isa(p, Type) || isa(p, TypeVar))
                 error("argument tuple type must contain only types")
@@ -1095,6 +1097,7 @@ struct CodegenParams
     prefer_specsig::Cint
     gnu_pubnames::Cint
     debug_info_kind::Cint
+    safepoint_on_entry::Cint
 
     lookup::Ptr{Cvoid}
 
@@ -1103,12 +1106,14 @@ struct CodegenParams
     function CodegenParams(; track_allocations::Bool=true, code_coverage::Bool=true,
                    prefer_specsig::Bool=false,
                    gnu_pubnames=true, debug_info_kind::Cint = default_debug_info_kind(),
+                   safepoint_on_entry::Bool=true,
                    lookup::Ptr{Cvoid}=cglobal(:jl_rettype_inferred),
                    generic_context = nothing)
         return new(
             Cint(track_allocations), Cint(code_coverage),
             Cint(prefer_specsig),
             Cint(gnu_pubnames), debug_info_kind,
+            Cint(safepoint_on_entry),
             lookup, generic_context)
     end
 end
@@ -1802,7 +1807,7 @@ function delete_method(m::Method)
 end
 
 function get_methodtable(m::Method)
-    return ccall(:jl_method_table_for, Any, (Any,), m.sig)::Core.MethodTable
+    return ccall(:jl_method_get_table, Any, (Any,), m)::Core.MethodTable
 end
 
 """
diff --git a/base/refpointer.jl b/base/refpointer.jl
index 290ffc51cbf2a..0cb2df6d24bce 100644
--- a/base/refpointer.jl
+++ b/base/refpointer.jl
@@ -112,6 +112,8 @@ struct RefArray{T,A<:AbstractArray{T},R} <: Ref{T}
 end
 RefArray(x::AbstractArray{T}, i::Int, roots::Any) where {T} = RefArray{T,typeof(x),Any}(x, i, roots)
 RefArray(x::AbstractArray{T}, i::Int=1, roots::Nothing=nothing) where {T} = RefArray{T,typeof(x),Nothing}(x, i, nothing)
+RefArray(x::AbstractArray{T}, i::Integer, roots::Any) where {T} = RefArray{T,typeof(x),Any}(x, Int(i), roots)
+RefArray(x::AbstractArray{T}, i::Integer, roots::Nothing=nothing) where {T} = RefArray{T,typeof(x),Nothing}(x, Int(i), nothing)
 convert(::Type{Ref{T}}, x::AbstractArray{T}) where {T} = RefArray(x, 1)
 
 function unsafe_convert(P::Union{Type{Ptr{T}},Type{Ptr{Cvoid}}}, b::RefArray{T})::P where T
diff --git a/base/regex.jl b/base/regex.jl
index 7c4c780ba0a7c..dfd5d29b8d978 100644
--- a/base/regex.jl
+++ b/base/regex.jl
@@ -466,6 +466,18 @@ original string, otherwise they must be from disjoint character ranges.
 
 !!! compat "Julia 1.7"
       Using a character as the pattern requires at least Julia 1.7.
+
+# Examples
+```jldoctest
+julia> count('a', "JuliaLang")
+2
+
+julia> count(r"a(.)a", "cabacabac", overlap=true)
+3
+
+julia> count(r"a(.)a", "cabacabac")
+2
+```
 """
 function count(t::Union{AbstractChar,AbstractString,AbstractPattern}, s::AbstractString; overlap::Bool=false)
     n = 0
diff --git a/base/reinterpretarray.jl b/base/reinterpretarray.jl
index f34c295918f6a..c0df51000376a 100644
--- a/base/reinterpretarray.jl
+++ b/base/reinterpretarray.jl
@@ -640,7 +640,7 @@ function intersect(p1::Padding, p2::Padding)
     Padding(start, max(0, stop-start))
 end
 
-struct PaddingError
+struct PaddingError <: Exception
     S::Type
     T::Type
 end
diff --git a/base/ryu/utils.jl b/base/ryu/utils.jl
index 352f8f19cb9be..af1354bf851f3 100644
--- a/base/ryu/utils.jl
+++ b/base/ryu/utils.jl
@@ -65,14 +65,7 @@ lengthforindex(idx) = div(((Int64(16 * idx) * 1292913986) >> 32) + 1 + 16 + 8, 9
 Return `true` if `5^p` is a divisor of `x`.
 """
 @inline function pow5(x, p)
-    count = 0
-    while true
-        q = div(x, 5)
-        r = x - 5 * q
-        r != 0 && return count >= p
-        x = q
-        count += 1
-    end
+    x % (5^p) == 0
 end
 
 """
diff --git a/base/set.jl b/base/set.jl
index c1c9cc91d29c1..6f8580e222e40 100644
--- a/base/set.jl
+++ b/base/set.jl
@@ -210,7 +210,7 @@ unique(r::AbstractRange) = allunique(r) ? r : oftype(r, r[begin:begin])
 """
     unique(f, itr)
 
-Returns an array containing one value from `itr` for each unique value produced by `f`
+Return an array containing one value from `itr` for each unique value produced by `f`
 applied to elements of `itr`.
 
 # Examples
diff --git a/base/show.jl b/base/show.jl
index 1e281fbd6d6d1..8769a414a269e 100644
--- a/base/show.jl
+++ b/base/show.jl
@@ -606,7 +606,7 @@ function make_typealias(@nospecialize(x::Type))
     end
     x isa UnionAll && push!(xenv, x)
     for mod in mods
-        for name in names(mod)
+        for name in unsorted_names(mod)
             if isdefined(mod, name) && !isdeprecated(mod, name) && isconst(mod, name)
                 alias = getfield(mod, name)
                 if alias isa Type && !has_free_typevars(alias) && !print_without_params(alias) && x <: alias
@@ -810,7 +810,7 @@ function make_typealiases(@nospecialize(x::Type))
     end
     x isa UnionAll && push!(xenv, x)
     for mod in mods
-        for name in names(mod)
+        for name in unsorted_names(mod)
             if isdefined(mod, name) && !isdeprecated(mod, name) && isconst(mod, name)
                 alias = getfield(mod, name)
                 if alias isa Type && !has_free_typevars(alias) && !print_without_params(alias) && !(alias <: Tuple)
@@ -1366,7 +1366,7 @@ show(io::IO, s::Symbol) = show_unquoted_quote_expr(io, s, 0, 0, 0)
 #
 # This is consistent with many other show methods, i.e.:
 #   show(Set([1,2,3]))                     # ==> "Set{Int64}([2,3,1])"
-#   eval(Meta.parse("Set{Int64}([2,3,1])”) # ==> An actual set
+#   eval(Meta.parse("Set{Int64}([2,3,1])")) # ==> An actual set
 # While this isn’t true of ALL show methods, it is of all ASTs.
 
 const ExprNode = Union{Expr, QuoteNode, Slot, LineNumberNode, SSAValue,
@@ -2752,7 +2752,7 @@ function dump(io::IOContext, x::DataType, n::Int, indent)
                 tvar_io = IOContext(tvar_io, :unionall_env => tparam)
             end
         end
-        if x.name === NamedTuple_typename && !(x.parameters[1] isa Tuple)
+        if x.name === _NAMEDTUPLE_NAME && !(x.parameters[1] isa Tuple)
             # named tuple type with unknown field names
             return
         end
diff --git a/base/sort.jl b/base/sort.jl
index f6f737ac2082e..ce3f2707b655b 100644
--- a/base/sort.jl
+++ b/base/sort.jl
@@ -2,21 +2,12 @@
 
 module Sort
 
-import ..@__MODULE__, ..parentmodule
-const Base = parentmodule(@__MODULE__)
-using .Base.Order
-using .Base: copymutable, LinearIndices, length, (:), iterate, OneTo,
-    eachindex, axes, first, last, similar, zip, OrdinalRange, firstindex, lastindex,
-    AbstractVector, @inbounds, AbstractRange, @eval, @inline, Vector, @noinline,
-    AbstractMatrix, AbstractUnitRange, isless, identity, eltype, >, <, <=, >=, |, +, -, *, !,
-    extrema, sub_with_overflow, add_with_overflow, oneunit, div, getindex, setindex!,
-    length, resize!, fill, Missing, require_one_based_indexing, keytype, UnitRange,
-    min, max, reinterpret, signed, unsigned, Signed, Unsigned, typemin, xor, Type, BitSigned, Val,
-    midpoint, @boundscheck, checkbounds
-
-using .Base: >>>, !==, !=
-
-import .Base:
+using Base.Order
+
+using Base: copymutable, midpoint, require_one_based_indexing,
+    sub_with_overflow, add_with_overflow, OneTo, BitSigned, BitIntegerType
+
+import Base:
     sort,
     sort!,
     issorted,
@@ -95,7 +86,7 @@ issorted(itr;
     issorted(itr, ord(lt,by,rev,order))
 
 function partialsort!(v::AbstractVector, k::Union{Integer,OrdinalRange}, o::Ordering)
-    sort!(v, firstindex(v), lastindex(v), PartialQuickSort(k), o)
+    sort!(v, _PartialQuickSort(k), o)
     maybeview(v, k)
 end
 
@@ -107,10 +98,9 @@ maybeview(v, k::Integer) = v[k]
 
 Partially sort the vector `v` in place, according to the order specified by `by`, `lt` and
 `rev` so that the value at index `k` (or range of adjacent values if `k` is a range) occurs
-at the position where it would appear if the array were fully sorted via a non-stable
-algorithm. If `k` is a single index, that value is returned; if `k` is a range, an array of
-values at those indices is returned. Note that `partialsort!` does not fully sort the input
-array.
+at the position where it would appear if the array were fully sorted. If `k` is a single
+index, that value is returned; if `k` is a range, an array of values at those indices is
+returned. Note that `partialsort!` may not fully sort the input array.
 
 # Examples
 ```jldoctest
@@ -422,51 +412,39 @@ insorted(x, r::AbstractRange) = in(x, r)
 abstract type Algorithm end
 
 struct InsertionSortAlg <: Algorithm end
-struct QuickSortAlg     <: Algorithm end
 struct MergeSortAlg     <: Algorithm end
+struct AdaptiveSortAlg  <: Algorithm end
 
 """
-    AdaptiveSort(fallback)
-
-Indicate that a sorting function should use the fastest available algorithm.
+    PartialQuickSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing})
 
-Adaptive sort will use the algorithm specified by `fallback` for types and orders that are
-not [`UIntMappable`](@ref). Otherwise, it will typically use:
-  * Insertion sort for short vectors
-  * Radix sort for long vectors
-  * Counting sort for vectors of integers spanning a short range
-
-Adaptive sort is guaranteed to be stable if the fallback algorithm is stable.
-"""
-struct AdaptiveSort{Fallback <: Algorithm} <: Algorithm
-    fallback::Fallback
-end
-"""
-    PartialQuickSort{T <: Union{Integer,OrdinalRange}}
+Indicate that a sorting function should use the partial quick sort algorithm.
 
-Indicate that a sorting function should use the partial quick sort
-algorithm. Partial quick sort returns the smallest `k` elements sorted from smallest
-to largest, finding them and sorting them using [`QuickSort`](@ref).
+Partial quick sort finds and sorts the elements that would end up in positions
+`lo:hi` using [`QuickSort`](@ref).
 
 Characteristics:
-  * *not stable*: does not preserve the ordering of elements which
-    compare equal (e.g. "a" and "A" in a sort of letters which
-    ignores case).
-  * *in-place* in memory.
+  * *stable*: preserves the ordering of elements which compare equal
+    (e.g. "a" and "A" in a sort of letters which ignores case).
+  * *not in-place* in memory.
   * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref).
 """
-struct PartialQuickSort{T <: Union{Integer,OrdinalRange}} <: Algorithm
-    k::T
+struct PartialQuickSort{L<:Union{Integer,Missing}, H<:Union{Integer,Missing}} <: Algorithm
+    lo::L
+    hi::H
 end
-
+PartialQuickSort(k::Integer) = PartialQuickSort(missing, k)
+PartialQuickSort(k::OrdinalRange) = PartialQuickSort(first(k), last(k))
+_PartialQuickSort(k::Integer) = PartialQuickSort(k, k)
+_PartialQuickSort(k::OrdinalRange) = PartialQuickSort(k)
 
 """
     InsertionSort
 
-Indicate that a sorting function should use the insertion sort
-algorithm. Insertion sort traverses the collection one element
-at a time, inserting each element into its correct, sorted position in
-the output vector.
+Indicate that a sorting function should use the insertion sort algorithm.
+
+Insertion sort traverses the collection one element at a time, inserting
+each element into its correct, sorted position in the output vector.
 
 Characteristics:
   * *stable*: preserves the ordering of elements which
@@ -477,29 +455,34 @@ Characteristics:
     it is well-suited to small collections but should not be used for large ones.
 """
 const InsertionSort = InsertionSortAlg()
+
 """
     QuickSort
 
-Indicate that a sorting function should use the quick sort
-algorithm, which is *not* stable.
+Indicate that a sorting function should use the quick sort algorithm.
+
+Quick sort picks a pivot element, partitions the array based on the pivot,
+and then sorts the elements before and after the pivot recursively.
 
 Characteristics:
-  * *not stable*: does not preserve the ordering of elements which
-    compare equal (e.g. "a" and "A" in a sort of letters which
-    ignores case).
-  * *in-place* in memory.
+  * *stable*: preserves the ordering of elements which compare equal
+    (e.g. "a" and "A" in a sort of letters which ignores case).
+  * *not in-place* in memory.
   * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref).
-  * *good performance* for large collections.
+  * *good performance* for almost all large collections.
+  * *quadratic worst case runtime* in pathological cases
+    (vanishingly rare for non-malicious input)
 """
-const QuickSort     = QuickSortAlg()
+const QuickSort = PartialQuickSort(missing, missing)
+
 """
     MergeSort
 
-Indicate that a sorting function should use the merge sort
-algorithm. Merge sort divides the collection into
-subcollections and repeatedly merges them, sorting each
-subcollection at each step, until the entire
-collection has been recombined in sorted form.
+Indicate that a sorting function should use the merge sort algorithm.
+
+Merge sort divides the collection into subcollections and
+repeatedly merges them, sorting each subcollection at each step,
+until the entire collection has been recombined in sorted form.
 
 Characteristics:
   * *stable*: preserves the ordering of elements which compare
@@ -508,10 +491,23 @@ Characteristics:
   * *not in-place* in memory.
   * *divide-and-conquer* sort strategy.
 """
-const MergeSort     = MergeSortAlg()
+const MergeSort = MergeSortAlg()
+
+"""
+    AdaptiveSort
+
+Indicate that a sorting function should use the fastest available stable algorithm.
+
+Currently, AdaptiveSort uses
+  * [`InsertionSort`](@ref) for short vectors
+  * [`QuickSort`](@ref) for vectors that are not [`UIntMappable`](@ref)
+  * Radix sort for long vectors
+  * Counting sort for vectors of integers spanning a short range
+"""
+const AdaptiveSort = AdaptiveSortAlg()
 
-const DEFAULT_UNSTABLE = AdaptiveSort(QuickSort)
-const DEFAULT_STABLE   = AdaptiveSort(MergeSort)
+const DEFAULT_UNSTABLE = AdaptiveSort
+const DEFAULT_STABLE   = AdaptiveSort
 const SMALL_ALGORITHM  = InsertionSort
 const SMALL_THRESHOLD  = 20
 
@@ -533,75 +529,92 @@ function sort!(v::AbstractVector, lo::Integer, hi::Integer, ::InsertionSortAlg,
     return v
 end
 
-# selectpivot!
-#
-# Given 3 locations in an array (lo, mi, and hi), sort v[lo], v[mi], v[hi]) and
-# choose the middle value as a pivot
+# select a pivot for QuickSort
 #
-# Upon return, the pivot is in v[lo], and v[hi] is guaranteed to be
-# greater than the pivot
+# This method is redefined to rand(lo:hi) in Random.jl
+# We can't use rand here because it is not available in Core.Compiler and
+# because rand is defined in the stdlib Random.jl after sorting is used in Base.
+select_pivot(lo::Integer, hi::Integer) = typeof(hi-lo)(hash(lo) % (hi-lo+1)) + lo
 
-@inline function selectpivot!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering)
+# select a pivot, partition v[lo:hi] according
+# to the pivot, and store the result in t[lo:hi].
+#
+# returns (pivot, pivot_index) where pivot_index is the location the pivot
+# should end up, but does not set t[pivot_index] = pivot
+function partition!(t::AbstractVector, lo::Integer, hi::Integer, o::Ordering, v::AbstractVector, rev::Bool)
+    pivot_index = select_pivot(lo, hi)
+    trues = 0
     @inbounds begin
-        mi = midpoint(lo, hi)
-
-        # sort v[mi] <= v[lo] <= v[hi] such that the pivot is immediately in place
-        if lt(o, v[lo], v[mi])
-            v[mi], v[lo] = v[lo], v[mi]
+        pivot = v[pivot_index]
+        while lo < pivot_index
+            x = v[lo]
+            fx = rev ? !lt(o, x, pivot) : lt(o, pivot, x)
+            t[(fx ? hi : lo) - trues] = x
+            trues += fx
+            lo += 1
         end
-
-        if lt(o, v[hi], v[lo])
-            if lt(o, v[hi], v[mi])
-                v[hi], v[lo], v[mi] = v[lo], v[mi], v[hi]
-            else
-                v[hi], v[lo] = v[lo], v[hi]
-            end
+        while lo < hi
+            x = v[lo+1]
+            fx = rev ? lt(o, pivot, x) : !lt(o, x, pivot)
+            t[(fx ? hi : lo) - trues] = x
+            trues += fx
+            lo += 1
         end
-
-        # return the pivot
-        return v[lo]
     end
-end
 
-# partition!
-#
-# select a pivot, and partition v according to the pivot
+    # pivot_index = lo-trues
+    # t[pivot_index] is whatever it was before
+    # t[<pivot_index] <* pivot, stable
+    # t[>pivot_index] >* pivot, reverse stable
 
-function partition!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering)
-    pivot = selectpivot!(v, lo, hi, o)
-    # pivot == v[lo], v[hi] > pivot
-    i, j = lo, hi
-    @inbounds while true
-        i += 1; j -= 1
-        while lt(o, v[i], pivot); i += 1; end;
-        while lt(o, pivot, v[j]); j -= 1; end;
-        i >= j && break
-        v[i], v[j] = v[j], v[i]
+    pivot, lo-trues
+end
+
+function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::PartialQuickSort,
+               o::Ordering, t::AbstractVector=similar(v), swap=false, rev=false;
+               check_presorted=true)
+
+    if check_presorted && !rev && !swap
+        # Even if we are only sorting a short region, we can only short-circuit if the whole
+        # vector is presorted. A weaker condition is possible, but unlikely to be useful.
+        if _issorted(v, lo, hi, o)
+            return v
+        elseif _issorted(v, lo, hi, Lt((x, y) -> !lt(o, x, y)))
+            # Reverse only if necessary. Using issorted(..., Reverse(o)) would violate stability.
+            return reverse!(v, lo, hi)
+        end
     end
-    v[j], v[lo] = pivot, v[j]
 
-    # v[j] == pivot
-    # v[k] >= pivot for k > j
-    # v[i] <= pivot for i < j
-    return j
-end
+    while lo < hi && hi - lo > SMALL_THRESHOLD
+        pivot, j = swap ? partition!(v, lo, hi, o, t, rev) : partition!(t, lo, hi, o, v, rev)
+        @inbounds v[j] = pivot
+        swap = !swap
 
-function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::QuickSortAlg, o::Ordering)
-    @inbounds while lo < hi
-        hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o)
-        j = partition!(v, lo, hi, o)
-        if j-lo < hi-j
-            # recurse on the smaller chunk
-            # this is necessary to preserve O(log(n))
-            # stack space in the worst case (rather than O(n))
-            lo < (j-1) && sort!(v, lo, j-1, a, o)
+        # For QuickSort, a.lo === a.hi === missing, so the first two branches get skipped
+        if !ismissing(a.lo) && j <= a.lo # Skip sorting the lower part
+            swap && copyto!(v, lo, t, lo, j-lo)
+            rev && reverse!(v, lo, j-1)
             lo = j+1
-        else
-            j+1 < hi && sort!(v, j+1, hi, a, o)
+            rev = !rev
+        elseif !ismissing(a.hi) && a.hi <= j # Skip sorting the upper part
+            swap && copyto!(v, j+1, t, j+1, hi-j)
+            rev || reverse!(v, j+1, hi)
+            hi = j-1
+        elseif j-lo < hi-j
+            # Sort the lower part recursively because it is smaller. Recursing on the
+            # smaller part guarantees O(log(n)) stack space even on pathological inputs.
+            sort!(v, lo, j-1, a, o, t, swap, rev; check_presorted=false)
+            lo = j+1
+            rev = !rev
+        else # Sort the higher part recursively
+            sort!(v, j+1, hi, a, o, t, swap, !rev; check_presorted=false)
             hi = j-1
         end
     end
-    return v
+    hi < lo && return v
+    swap && copyto!(v, lo, t, lo, hi-lo+1)
+    rev && reverse!(v, lo, hi)
+    sort!(v, lo, hi, SMALL_ALGORITHM, o)
 end
 
 function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, a::MergeSortAlg, o::Ordering,
@@ -613,7 +626,7 @@ function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, a::MergeSortAlg,
 
         t = t0 === nothing ? similar(v, m-lo+1) : t0
         length(t) < m-lo+1 && resize!(t, m-lo+1)
-        Base.require_one_based_indexing(t)
+        require_one_based_indexing(t)
 
         sort!(v, lo,  m,  a, o, t)
         sort!(v, m+1, hi, a, o, t)
@@ -646,32 +659,6 @@ function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, a::MergeSortAlg,
     return v
 end
 
-function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::PartialQuickSort,
-               o::Ordering)
-    @inbounds while lo < hi
-        hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o)
-        j = partition!(v, lo, hi, o)
-
-        if j <= first(a.k)
-            lo = j+1
-        elseif j >= last(a.k)
-            hi = j-1
-        else
-            # recurse on the smaller chunk
-            # this is necessary to preserve O(log(n))
-            # stack space in the worst case (rather than O(n))
-            if j-lo < hi-j
-                lo < (j-1) && sort!(v, lo, j-1, a, o)
-                lo = j+1
-            else
-                hi > (j+1) && sort!(v, j+1, hi, a, o)
-                hi = j-1
-            end
-        end
-    end
-    return v
-end
-
 # This is a stable least significant bit first radix sort.
 #
 # That is, it first sorts the entire vector by the last chunk_size bits, then by the second
@@ -741,7 +728,7 @@ end
 # For AbstractVector{Bool}, counting sort is always best.
 # This is an implementation of counting sort specialized for Bools.
 # Accepts unused buffer to avoid method ambiguity.
-function sort!(v::AbstractVector{Bool}, lo::Integer, hi::Integer, a::AdaptiveSort, o::Ordering,
+function sort!(v::AbstractVector{Bool}, lo::Integer, hi::Integer, ::AdaptiveSortAlg, o::Ordering,
         t::Union{AbstractVector{Bool}, Nothing}=nothing)
     first = lt(o, false, true) ? false : lt(o, true, false) ? true : return v
     count = 0
@@ -773,12 +760,12 @@ function _issorted(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering)
     end
     true
 end
-function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, a::AdaptiveSort, o::Ordering,
-            t::Union{AbstractVector{T}, Nothing}=nothing) where T
+function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, ::AdaptiveSortAlg, o::Ordering,
+               t::Union{AbstractVector{T}, Nothing}=nothing) where T
     # if the sorting task is not UIntMappable, then we can't radix sort or sort_int_range!
     # so we skip straight to the fallback algorithm which is comparison based.
-    U = UIntMappable(T, o)
-    U === nothing && return sort!(v, lo, hi, a.fallback, o)
+    U = UIntMappable(eltype(v), o)
+    U === nothing && return sort!(v, lo, hi, QuickSort, o)
 
     # to avoid introducing excessive detection costs for the trivial sorting problem
     # and to avoid overflow, we check for small inputs before any other runtime checks
@@ -795,6 +782,8 @@ function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, a::AdaptiveSort,
 
     # For large arrays, a reverse-sorted check is essentially free (overhead < 1%)
     if lenm1 >= 500 && _issorted(v, lo, hi, ReverseOrdering(o))
+        # If reversing is valid, do so. This does not violate stability
+        # because being UIntMappable implies a linear order.
         reverse!(v, lo, hi)
         return v
     end
@@ -813,7 +802,7 @@ function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, a::AdaptiveSort,
                 return sort_int_range!(v, Int(v_range+1), v_min, o === Forward ? identity : reverse, lo, hi)
             end
         end
-        return sort!(v, lo, hi, a.fallback, o)
+        return sort!(v, lo, hi, QuickSort, o; check_presorted=false)
     end
 
     v_min, v_max = _extrema(v, lo, hi, o)
@@ -839,17 +828,15 @@ function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, a::AdaptiveSort,
     # where we only need to radix over the last few bits (5, in the example).
     bits = unsigned(8sizeof(u_range) - leading_zeros(u_range))
 
-    # radix sort runs in O(bits * lenm1), insertion sort runs in O(lenm1^2). Radix sort
-    # has a constant factor that is three times higher, so radix runtime is 3bits * lenm1
-    # and insertion runtime is lenm1^2. Empirically, insertion is faster than radix iff
-    # lenm1 < 3bits.
-    # Insertion < Radix
-    #   lenm1^2 < 3 * bits * lenm1
-    #     lenm1 < 3bits
-    if lenm1 < 3bits
-        # at lenm1 = 64*3-1, QuickSort is about 20% faster than InsertionSort.
-        alg = a.fallback === QuickSort && lenm1 > 120 ? QuickSort : SMALL_ALGORITHM
-        return sort!(v, lo, hi, alg, o)
+    # radix sort runs in O(bits * lenm1), quick sort runs in O(lenm1 * log(lenm1)).
+    # dividing both sides by lenm1 and introducing empirical constant factors yields
+    # the following heuristic for when QuickSort is faster than RadixSort
+    if 22log(lenm1) < bits + 70
+        return if lenm1 > 80
+            sort!(v, lo, hi, QuickSort, o; check_presorted=false)
+        else
+            sort!(v, lo, hi, SMALL_ALGORITHM, o)
+        end
     end
 
     # At this point, we are committed to radix sort.
@@ -891,12 +878,12 @@ defalg(v::AbstractArray{Missing}) = DEFAULT_UNSTABLE # for method disambiguation
 defalg(v::AbstractArray{Union{}}) = DEFAULT_UNSTABLE # for method disambiguation
 
 function sort!(v::AbstractVector{T}, alg::Algorithm,
-        order::Ordering, t::Union{AbstractVector{T}, Nothing}=nothing) where T
+               order::Ordering, t::Union{AbstractVector{T}, Nothing}=nothing) where T
     sort!(v, firstindex(v), lastindex(v), alg, order, t)
 end
 
 function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, alg::Algorithm,
-        order::Ordering, t::Union{AbstractVector{T}, Nothing}=nothing) where T
+               order::Ordering, t::Union{AbstractVector{T}, Nothing}=nothing) where T
     sort!(v, lo, hi, alg, order)
 end
 
@@ -1096,7 +1083,7 @@ function partialsortperm!(ix::AbstractVector{<:Integer}, v::AbstractVector,
     end
 
     # do partial quicksort
-    sort!(ix, PartialQuickSort(k), Perm(ord(lt, by, rev, order), v))
+    sort!(ix, _PartialQuickSort(k), Perm(ord(lt, by, rev, order), v))
 
     maybeview(ix, k)
 end
@@ -1416,10 +1403,7 @@ uint_map(x::Signed, ::ForwardOrdering) =
 uint_unmap(::Type{T}, u::Unsigned, ::ForwardOrdering) where T <: Signed =
     xor(signed(u), typemin(T))
 
-# unsigned(Int) is not available during bootstrapping.
-for (U, S) in [(UInt8, Int8), (UInt16, Int16), (UInt32, Int32), (UInt64, Int64), (UInt128, Int128)]
-    @eval UIntMappable(::Union{Type{$U}, Type{$S}}, ::ForwardOrdering) = $U
-end
+UIntMappable(T::BitIntegerType, ::ForwardOrdering) = unsigned(T)
 
 # Floats are not UIntMappable under regular orderings because they fail on NaN edge cases.
 # uint mappings for floats are defined in Float, where the Left and Right orderings
@@ -1461,14 +1445,12 @@ end
 module Float
 using ..Sort
 using ...Order
-using ..Base: @inbounds, AbstractVector, Vector, last, firstindex, lastindex, Missing, Type, reinterpret
+using Base: IEEEFloat
 
 import Core.Intrinsics: slt_int
 import ..Sort: sort!, UIntMappable, uint_map, uint_unmap
 import ...Order: lt, DirectOrdering
 
-# IEEEFloat is not available in Core.Compiler
-const Floats = Union{Float16, Float32, Float64}
 # fpsort is not safe for vectors of mixed bitwidth such as Vector{Union{Float32, Float64}}.
 # This type allows us to dispatch only when it is safe to do so. See #42739 for more info.
 const FPSortable = Union{
@@ -1489,8 +1471,8 @@ right(::DirectOrdering) = Right()
 left(o::Perm) = Perm(left(o.order), o.data)
 right(o::Perm) = Perm(right(o.order), o.data)
 
-lt(::Left, x::T, y::T) where {T<:Floats} = slt_int(y, x)
-lt(::Right, x::T, y::T) where {T<:Floats} = slt_int(x, y)
+lt(::Left, x::T, y::T) where {T<:IEEEFloat} = slt_int(y, x)
+lt(::Right, x::T, y::T) where {T<:IEEEFloat} = slt_int(x, y)
 
 uint_map(x::Float16, ::Left) = ~reinterpret(UInt16, x)
 uint_unmap(::Type{Float16}, u::UInt16, ::Left) = reinterpret(Float16, ~u)
@@ -1510,11 +1492,11 @@ uint_map(x::Float64, ::Right) = reinterpret(UInt64, x)
 uint_unmap(::Type{Float64}, u::UInt64, ::Right) = reinterpret(Float64, u)
 UIntMappable(::Type{Float64}, ::Union{Left, Right}) = UInt64
 
-isnan(o::DirectOrdering, x::Floats) = (x!=x)
+isnan(o::DirectOrdering, x::IEEEFloat) = (x!=x)
 isnan(o::DirectOrdering, x::Missing) = false
 isnan(o::Perm, i::Integer) = isnan(o.order,o.data[i])
 
-ismissing(o::DirectOrdering, x::Floats) = false
+ismissing(o::DirectOrdering, x::IEEEFloat) = false
 ismissing(o::DirectOrdering, x::Missing) = true
 ismissing(o::Perm, i::Integer) = ismissing(o.order,o.data[i])
 
@@ -1586,12 +1568,12 @@ specials2end!(v::AbstractVector{<:Integer}, a::Algorithm, o::Perm{<:ForwardOrder
 specials2end!(v::AbstractVector{<:Integer}, a::Algorithm, o::Perm{<:ReverseOrdering}) =
     specials2left!(v, a, o)
 
-issignleft(o::ForwardOrdering, x::Floats) = lt(o, x, zero(x))
-issignleft(o::ReverseOrdering, x::Floats) = lt(o, x, -zero(x))
+issignleft(o::ForwardOrdering, x::IEEEFloat) = lt(o, x, zero(x))
+issignleft(o::ReverseOrdering, x::IEEEFloat) = lt(o, x, -zero(x))
 issignleft(o::Perm, i::Integer) = issignleft(o.order, o.data[i])
 
 function fpsort!(v::AbstractVector{T}, a::Algorithm, o::Ordering,
-        t::Union{AbstractVector{T}, Nothing}=nothing) where T
+                 t::Union{AbstractVector{T}, Nothing}=nothing) where T
     # fpsort!'s optimizations speed up comparisons, of which there are O(nlogn).
     # The overhead is O(n). For n < 10, it's not worth it.
     length(v) < 10 && return sort!(v, firstindex(v), lastindex(v), SMALL_ALGORITHM, o, t)
@@ -1610,15 +1592,12 @@ function fpsort!(v::AbstractVector{T}, a::Algorithm, o::Ordering,
 end
 
 
-fpsort!(v::AbstractVector, a::Sort.PartialQuickSort, o::Ordering) =
-    sort!(v, firstindex(v), lastindex(v), a, o)
-
 function sort!(v::FPSortable, a::Algorithm, o::DirectOrdering,
-        t::Union{FPSortable, Nothing}=nothing)
+               t::Union{FPSortable, Nothing}=nothing)
     fpsort!(v, a, o, t)
 end
 function sort!(v::AbstractVector{T}, a::Algorithm, o::Perm{<:DirectOrdering,<:FPSortable},
-        t::Union{AbstractVector{T}, Nothing}=nothing) where T <: Union{Signed, Unsigned}
+               t::Union{AbstractVector{T}, Nothing}=nothing) where T <: Union{Signed, Unsigned}
     fpsort!(v, a, o, t)
 end
 
diff --git a/base/stacktraces.jl b/base/stacktraces.jl
index 3cb81d82bd3f7..ad088ffb51855 100644
--- a/base/stacktraces.jl
+++ b/base/stacktraces.jl
@@ -153,7 +153,7 @@ end
 """
     stacktrace([trace::Vector{Ptr{Cvoid}},] [c_funcs::Bool=false]) -> StackTrace
 
-Returns a stack trace in the form of a vector of `StackFrame`s. (By default stacktrace
+Return a stack trace in the form of a vector of `StackFrame`s. (By default stacktrace
 doesn't return C functions, but this can be enabled.) When called without specifying a
 trace, `stacktrace` first calls `backtrace`.
 """
@@ -200,7 +200,7 @@ end
 """
     remove_frames!(stack::StackTrace, m::Module)
 
-Returns the `StackTrace` with all `StackFrame`s from the provided `Module` removed.
+Return the `StackTrace` with all `StackFrame`s from the provided `Module` removed.
 """
 function remove_frames!(stack::StackTrace, m::Module)
     filter!(f -> !from(f, m), stack)
@@ -287,7 +287,7 @@ end
 """
     from(frame::StackFrame, filter_mod::Module) -> Bool
 
-Returns whether the `frame` is from the provided `Module`
+Return whether the `frame` is from the provided `Module`
 """
 function from(frame::StackFrame, m::Module)
     return parentmodule(frame) === m
diff --git a/base/stat.jl b/base/stat.jl
index 13dbca7780b61..09cf8f8eae808 100644
--- a/base/stat.jl
+++ b/base/stat.jl
@@ -170,7 +170,7 @@ stat(fd::Integer)           = stat(RawFD(fd))
 """
     stat(file)
 
-Returns a structure whose fields contain information about the file.
+Return a structure whose fields contain information about the file.
 The fields of the structure are:
 
 | Name    | Description                                                        |
diff --git a/base/strings/basic.jl b/base/strings/basic.jl
index c2666898243b0..7be775f5ece05 100644
--- a/base/strings/basic.jl
+++ b/base/strings/basic.jl
@@ -298,12 +298,13 @@ julia> cmp("b", "β")
 """
 function cmp(a::AbstractString, b::AbstractString)
     a === b && return 0
-    a, b = Iterators.Stateful(a), Iterators.Stateful(b)
-    for (c::AbstractChar, d::AbstractChar) in zip(a, b)
+    (iv1, iv2) = (iterate(a), iterate(b))
+    while iv1 !== nothing && iv2 !== nothing
+        (c, d) = (first(iv1)::AbstractChar, first(iv2)::AbstractChar)
         c ≠ d && return ifelse(c < d, -1, 1)
+        (iv1, iv2) = (iterate(a, last(iv1)), iterate(b, last(iv2)))
     end
-    isempty(a) && return ifelse(isempty(b), 0, -1)
-    return 1
+    return iv1 === nothing ? (iv2 === nothing ? 0 : -1) : 1
 end
 
 """
diff --git a/base/strings/unicode.jl b/base/strings/unicode.jl
index 821e186501d1d..17c5d66c160b6 100644
--- a/base/strings/unicode.jl
+++ b/base/strings/unicode.jl
@@ -11,7 +11,7 @@ import Base: show, ==, hash, string, Symbol, isless, length, eltype,
 """
     isvalid(value) -> Bool
 
-Returns `true` if the given value is valid for its type, which currently can be either
+Return `true` if the given value is valid for its type, which currently can be either
 `AbstractChar` or `String` or `SubString{String}`.
 
 # Examples
@@ -31,7 +31,7 @@ isvalid(value)
 """
     isvalid(T, value) -> Bool
 
-Returns `true` if the given value is valid for that type. Types currently can
+Return `true` if the given value is valid for that type. Types currently can
 be either `AbstractChar` or `String`. Values for `AbstractChar` can be of type `AbstractChar` or [`UInt32`](@ref).
 Values for `String` can be of that type, `SubString{String}`, `Vector{UInt8}`,
 or a contiguous subarray thereof.
diff --git a/base/strings/util.jl b/base/strings/util.jl
index 3cb98054d8ede..7d48fee9b1c52 100644
--- a/base/strings/util.jl
+++ b/base/strings/util.jl
@@ -500,7 +500,10 @@ See also [`split`](@ref).
 julia> a = "Ma.rch"
 "Ma.rch"
 
-julia> collect(eachsplit(a, "."))
+julia> b = eachsplit(a, ".")
+Base.SplitIterator{String, String}("Ma.rch", ".", 0, true)
+
+julia> collect(b)
 2-element Vector{SubString{String}}:
  "Ma"
  "rch"
diff --git a/base/sysimg.jl b/base/sysimg.jl
index 5a14bf5bfd3b9..ef7bad929b743 100644
--- a/base/sysimg.jl
+++ b/base/sysimg.jl
@@ -19,6 +19,14 @@ Base.init_load_path()
 if Base.is_primary_base_module
 # load some stdlib packages but don't put their names in Main
 let
+    # Loading here does not call __init__(). This leads to uninitialized RNG
+    # state which causes rand(::UnitRange{Int}) to hang. This is a workaround:
+    task = current_task()
+    task.rngState0 = 0x5156087469e170ab
+    task.rngState1 = 0x7431eaead385992c
+    task.rngState2 = 0x503e1d32781c2608
+    task.rngState3 = 0x3a77f7189200c20b
+
     # Stdlibs sorted in dependency, then alphabetical, order by contrib/print_sorted_stdlibs.jl
     # Run with the `--exclude-jlls` option to filter out all JLL packages
     stdlibs = [
diff --git a/base/task.jl b/base/task.jl
index 1a9bff051d7c7..ce34d2f179fc5 100644
--- a/base/task.jl
+++ b/base/task.jl
@@ -526,7 +526,7 @@ function do_async_macro(expr; wrap=identity)
 end
 
 # task wrapper that doesn't create exceptions wrapped in TaskFailedException
-struct UnwrapTaskFailedException
+struct UnwrapTaskFailedException <: Exception
     task::Task
 end
 
@@ -754,7 +754,7 @@ function workqueue_for(tid::Int)
     @lock l begin
         qs = Workqueues
         if length(qs) < tid
-            nt = Threads.nthreads()
+            nt = Threads.maxthreadid()
             @assert tid <= nt
             global Workqueues = qs = copyto!(typeof(qs)(undef, length(qs) + nt - 1), qs)
         end
@@ -767,7 +767,7 @@ end
 
 function enq_work(t::Task)
     (t._state === task_state_runnable && t.queue === nothing) || error("schedule: Task not runnable")
-    if t.sticky || Threads.nthreads() == 1
+    if t.sticky || Threads.threadpoolsize() == 1
         tid = Threads.threadid(t)
         if tid == 0
             # Issue #41324
diff --git a/base/threadingconstructs.jl b/base/threadingconstructs.jl
index 6c8ea35cfa373..271d6ea9f7664 100644
--- a/base/threadingconstructs.jl
+++ b/base/threadingconstructs.jl
@@ -11,20 +11,27 @@ ID `1`.
 """
 threadid() = Int(ccall(:jl_threadid, Int16, ())+1)
 
+# lower bound on the largest threadid()
 """
-    Threads.nthreads([:default|:interactive]) -> Int
+    Threads.maxthreadid() -> Int
 
-Get the number of threads (across all thread pools or within the specified
-thread pool) available to Julia. The number of threads across all thread
-pools is the inclusive upper bound on [`threadid()`](@ref).
+Get a lower bound on the number of threads (across all thread pools) available
+to the Julia process, with atomic-acquire semantics. The result will always be
+greater than or equal to [`threadid()`](@ref) as well as `threadid(task)` for
+any task you were able to observe before calling `maxthreadid`.
+"""
+maxthreadid() = Int(Core.Intrinsics.atomic_pointerref(cglobal(:jl_n_threads, Cint), :acquire))
 
-See also: `BLAS.get_num_threads` and `BLAS.set_num_threads` in the
-[`LinearAlgebra`](@ref man-linalg) standard library, and `nprocs()` in the
-[`Distributed`](@ref man-distributed) standard library.
 """
-function nthreads end
+    Threads.nthreads(:default | :interactive) -> Int
 
-nthreads() = Int(unsafe_load(cglobal(:jl_n_threads, Cint)))
+Get the current number of threads within the specified thread pool. The threads in default
+have id numbers `1:nthreads(:default)`.
+
+See also `BLAS.get_num_threads` and `BLAS.set_num_threads` in the [`LinearAlgebra`](@ref
+man-linalg) standard library, and `nprocs()` in the [`Distributed`](@ref man-distributed)
+standard library and [`Threads.maxthreadid()`](@ref).
+"""
 function nthreads(pool::Symbol)
     if pool === :default
         tpid = Int8(0)
@@ -35,6 +42,7 @@ function nthreads(pool::Symbol)
     end
     return _nthreads_in_pool(tpid)
 end
+
 function _nthreads_in_pool(tpid::Int8)
     p = unsafe_load(cglobal(:jl_n_threads_per_pool, Ptr{Cint}))
     return Int(unsafe_load(p, tpid + 1))
@@ -57,10 +65,20 @@ Returns the number of threadpools currently configured.
 """
 nthreadpools() = Int(unsafe_load(cglobal(:jl_n_threadpools, Cint)))
 
+"""
+    Threads.threadpoolsize()
+
+Get the number of threads available to the Julia default worker-thread pool.
+
+See also: `BLAS.get_num_threads` and `BLAS.set_num_threads` in the
+[`LinearAlgebra`](@ref man-linalg) standard library, and `nprocs()` in the
+[`Distributed`](@ref man-distributed) standard library.
+"""
+threadpoolsize() = Threads._nthreads_in_pool(Int8(0))
 
 function threading_run(fun, static)
     ccall(:jl_enter_threaded_region, Cvoid, ())
-    n = nthreads()
+    n = threadpoolsize()
     tasks = Vector{Task}(undef, n)
     for i = 1:n
         t = Task(() -> fun(i)) # pass in tid
@@ -93,7 +111,7 @@ function _threadsfor(iter, lbody, schedule)
                 tid = 1
                 len, rem = lenr, 0
             else
-                len, rem = divrem(lenr, nthreads())
+                len, rem = divrem(lenr, threadpoolsize())
             end
             # not enough iterations for all the threads?
             if len == 0
@@ -185,7 +203,7 @@ assumption may be removed in the future.
 This scheduling option is merely a hint to the underlying execution mechanism. However, a
 few properties can be expected. The number of `Task`s used by `:dynamic` scheduler is
 bounded by a small constant multiple of the number of available worker threads
-([`nthreads()`](@ref Threads.nthreads)). Each task processes contiguous regions of the
+([`Threads.threadpoolsize()`](@ref)). Each task processes contiguous regions of the
 iteration space. Thus, `@threads :dynamic for x in xs; f(x); end` is typically more
 efficient than `@sync for x in xs; @spawn f(x); end` if `length(xs)` is significantly
 larger than the number of the worker threads and the run-time of `f(x)` is relatively
@@ -222,7 +240,7 @@ julia> function busywait(seconds)
 
 julia> @time begin
             Threads.@spawn busywait(5)
-            Threads.@threads :static for i in 1:Threads.nthreads()
+            Threads.@threads :static for i in 1:Threads.threadpoolsize()
                 busywait(1)
             end
         end
@@ -230,7 +248,7 @@ julia> @time begin
 
 julia> @time begin
             Threads.@spawn busywait(5)
-            Threads.@threads :dynamic for i in 1:Threads.nthreads()
+            Threads.@threads :dynamic for i in 1:Threads.threadpoolsize()
                 busywait(1)
             end
         end
diff --git a/base/threads.jl b/base/threads.jl
index 2b68c7104ee5e..2d388cc4b9f77 100644
--- a/base/threads.jl
+++ b/base/threads.jl
@@ -11,25 +11,4 @@ include("threadingconstructs.jl")
 include("atomics.jl")
 include("locks-mt.jl")
 
-
-"""
-    resize_nthreads!(A, copyvalue=A[1])
-
-Resize the array `A` to length [`nthreads()`](@ref).   Any new
-elements that are allocated are initialized to `deepcopy(copyvalue)`,
-where `copyvalue` defaults to `A[1]`.
-
-This is typically used to allocate per-thread variables, and
-should be called in `__init__` if `A` is a global constant.
-"""
-function resize_nthreads!(A::AbstractVector, copyvalue=A[1])
-    nthr = nthreads()
-    nold = length(A)
-    resize!(A, nthr)
-    for i = nold+1:nthr
-        A[i] = deepcopy(copyvalue)
-    end
-    return A
-end
-
 end
diff --git a/base/threads_overloads.jl b/base/threads_overloads.jl
index 376c1af94f441..ccbc7e50d227b 100644
--- a/base/threads_overloads.jl
+++ b/base/threads_overloads.jl
@@ -3,7 +3,7 @@
 """
     Threads.foreach(f, channel::Channel;
                     schedule::Threads.AbstractSchedule=Threads.FairSchedule(),
-                    ntasks=Threads.nthreads())
+                    ntasks=Threads.threadpoolsize())
 
 Similar to `foreach(f, channel)`, but iteration over `channel` and calls to
 `f` are split across `ntasks` tasks spawned by `Threads.@spawn`. This function
@@ -40,7 +40,7 @@ collect(d) = [1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 196, 225, 256
 """
 function Threads.foreach(f, channel::Channel;
                          schedule::Threads.AbstractSchedule=Threads.FairSchedule(),
-                         ntasks=Threads.nthreads())
+                         ntasks=Threads.threadpoolsize())
     apply = _apply_for_schedule(schedule)
     stop = Threads.Atomic{Bool}(false)
     @sync for _ in 1:ntasks
diff --git a/base/tuple.jl b/base/tuple.jl
index 875d0173c6059..689645b35fcbb 100644
--- a/base/tuple.jl
+++ b/base/tuple.jl
@@ -186,7 +186,7 @@ function _split_rest(a::Union{AbstractArray, Core.SimpleVector}, n::Int)
     return a[begin:end-n], a[end-n+1:end]
 end
 
-split_rest(t::Tuple, n::Int, i=1) = t[i:end-n], t[end-n+1:end]
+@eval split_rest(t::Tuple, n::Int, i=1) = ($(Expr(:meta, :aggressive_constprop)); (t[i:end-n], t[end-n+1:end]))
 
 # Use dispatch to avoid a branch in first
 first(::Tuple{}) = throw(ArgumentError("tuple must be non-empty"))
@@ -534,7 +534,7 @@ isless(::Tuple, ::Tuple{}) = false
 """
     isless(t1::Tuple, t2::Tuple)
 
-Returns true when t1 is less than t2 in lexicographic order.
+Return `true` when `t1` is less than `t2` in lexicographic order.
 """
 function isless(t1::Tuple, t2::Tuple)
     a, b = t1[1], t2[1]
@@ -595,7 +595,7 @@ in(x::Symbol, @nospecialize itr::Tuple{Vararg{Symbol}}) = sym_in(x, itr)
 """
     empty(x::Tuple)
 
-Returns an empty tuple, `()`.
+Return an empty tuple, `()`.
 """
 empty(@nospecialize x::Tuple) = ()
 
diff --git a/base/util.jl b/base/util.jl
index f26ed0717a1fd..3345a737b4cfb 100644
--- a/base/util.jl
+++ b/base/util.jl
@@ -522,7 +522,7 @@ julia> Foo(b="hi")
 Foo(1, "hi")
 
 julia> Foo()
-ERROR: UndefKeywordError: keyword argument b not assigned
+ERROR: UndefKeywordError: keyword argument `b` not assigned
 Stacktrace:
 [...]
 ```
diff --git a/cli/loader_exe.c b/cli/loader_exe.c
index a5a9968896af6..9187d4f919cf4 100644
--- a/cli/loader_exe.c
+++ b/cli/loader_exe.c
@@ -15,7 +15,7 @@ extern "C" {
 JULIA_DEFINE_FAST_TLS
 
 #ifdef _COMPILER_ASAN_ENABLED_
-JL_DLLEXPORT const char* __asan_default_options()
+JL_DLLEXPORT const char* __asan_default_options(void)
 {
     return "allow_user_segv_handler=1:detect_leaks=0";
     // FIXME: enable LSAN after fixing leaks & defining __lsan_default_suppressions(),
diff --git a/contrib/generate_precompile.jl b/contrib/generate_precompile.jl
index acd61be502465..8fd6e2542023e 100644
--- a/contrib/generate_precompile.jl
+++ b/contrib/generate_precompile.jl
@@ -1,7 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-if Threads.nthreads() != 1
-    @warn "Running this file with multiple Julia threads may lead to a build error" Threads.nthreads()
+if Threads.maxthreadid() != 1
+    @warn "Running this file with multiple Julia threads may lead to a build error" Base.maxthreadid()
 end
 
 if Base.isempty(Base.ARGS) || Base.ARGS[1] !== "0"
@@ -212,7 +212,6 @@ if Test !== nothing
     precompile(Tuple{typeof(Test.match_logs), Function, Tuple{String, Regex}})
     precompile(Tuple{typeof(Base.CoreLogging.shouldlog), Test.TestLogger, Base.CoreLogging.LogLevel, Module, Symbol, Symbol})
     precompile(Tuple{typeof(Base.CoreLogging.handle_message), Test.TestLogger, Base.CoreLogging.LogLevel, String, Module, Symbol, Symbol, String, Int})
-    precompile(Tuple{typeof(Core.kwfunc(Base.CoreLogging.handle_message)), typeof((exception=nothing,)), typeof(Base.CoreLogging.handle_message), Test.TestLogger, Base.CoreLogging.LogLevel, String, Module, Symbol, Symbol, String, Int})
     precompile(Tuple{typeof(Test.detect_ambiguities), Any})
     precompile(Tuple{typeof(Test.collect_test_logs), Function})
     precompile(Tuple{typeof(Test.do_broken_test), Test.ExecutionResult, Any})
@@ -340,7 +339,7 @@ function generate_precompile_statements()
                 # wait for the next prompt-like to appear
                 readuntil(output_copy, "\n")
                 strbuf = ""
-                while true
+                while !eof(output_copy)
                     strbuf *= String(readavailable(output_copy))
                     occursin(JULIA_PROMPT, strbuf) && break
                     occursin(PKG_PROMPT, strbuf) && break
@@ -372,9 +371,9 @@ function generate_precompile_statements()
         end
     end
 
-    # Execute the collected precompile statements
+    # Execute the precompile statements
     n_succeeded = 0
-    include_time = @elapsed for statement in sort!(collect(statements))
+    include_time = @elapsed for statement in statements
         # println(statement)
         # XXX: skip some that are broken. these are caused by issue #39902
         occursin("Tuple{Artifacts.var\"#@artifact_str\", LineNumberNode, Module, Any, Any}", statement) && continue
diff --git a/contrib/print_sorted_stdlibs.jl b/contrib/print_sorted_stdlibs.jl
index bbf890328cb4e..28d75f079b9dd 100644
--- a/contrib/print_sorted_stdlibs.jl
+++ b/contrib/print_sorted_stdlibs.jl
@@ -27,9 +27,9 @@ end
 
 project_deps = Dict{String,Set{String}}()
 for project_dir in readdir(STDLIB_DIR, join=true)
-    files = readdir(project_dir)
-    if "Project.toml" in files
-        project = TOML.parsefile(joinpath(project_dir, "Project.toml"))
+    project_file = joinpath(project_dir, "Project.toml")
+    if isfile(project_file)
+        project = TOML.parsefile(project_file)
 
         if !haskey(project, "name")
             continue
diff --git a/deps/checksums/Downloads-0733701b0e21df6ae61a6b2fc8cec60ff1fd28dc.tar.gz/md5 b/deps/checksums/Downloads-0733701b0e21df6ae61a6b2fc8cec60ff1fd28dc.tar.gz/md5
deleted file mode 100644
index 5d41d13d146ae..0000000000000
--- a/deps/checksums/Downloads-0733701b0e21df6ae61a6b2fc8cec60ff1fd28dc.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-2776ac848de843f5d5fc340e1f14f8cf
diff --git a/deps/checksums/Downloads-0733701b0e21df6ae61a6b2fc8cec60ff1fd28dc.tar.gz/sha512 b/deps/checksums/Downloads-0733701b0e21df6ae61a6b2fc8cec60ff1fd28dc.tar.gz/sha512
deleted file mode 100644
index deede412320ac..0000000000000
--- a/deps/checksums/Downloads-0733701b0e21df6ae61a6b2fc8cec60ff1fd28dc.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-080d7a20d3381d6c1851fdeb9c41ed3d38186e922423f600cebc731afcd05efcc3b98e0ae72d5f28951e259c3193d10d1bb16b51d5a093327bd239a888aaad51
diff --git a/deps/checksums/Downloads-11b6bb73bff32cec1b1e3bf064420cad1335400b.tar.gz/md5 b/deps/checksums/Downloads-11b6bb73bff32cec1b1e3bf064420cad1335400b.tar.gz/md5
new file mode 100644
index 0000000000000..b968bee68a043
--- /dev/null
+++ b/deps/checksums/Downloads-11b6bb73bff32cec1b1e3bf064420cad1335400b.tar.gz/md5
@@ -0,0 +1 @@
+d02f5c45d09877258e493b61595bf3b8
diff --git a/deps/checksums/Downloads-11b6bb73bff32cec1b1e3bf064420cad1335400b.tar.gz/sha512 b/deps/checksums/Downloads-11b6bb73bff32cec1b1e3bf064420cad1335400b.tar.gz/sha512
new file mode 100644
index 0000000000000..bf0bcc6dbb174
--- /dev/null
+++ b/deps/checksums/Downloads-11b6bb73bff32cec1b1e3bf064420cad1335400b.tar.gz/sha512
@@ -0,0 +1 @@
+5c172f6030d0c377b04ec052e62738e3b36a2d99da5d2308b8425cf474f376a0e5d8caa4f9a4e93f871e79e491fb17a7c616190f585af62d59605dd19da14dbe
diff --git a/deps/checksums/Pkg-3cbbd860afd4c2a50a80a04fa229fe5cd5bddc76.tar.gz/md5 b/deps/checksums/Pkg-3cbbd860afd4c2a50a80a04fa229fe5cd5bddc76.tar.gz/md5
deleted file mode 100644
index 8480935cba812..0000000000000
--- a/deps/checksums/Pkg-3cbbd860afd4c2a50a80a04fa229fe5cd5bddc76.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-f164cc7c322b2bd3f9a1ed49882d9a8c
diff --git a/deps/checksums/Pkg-3cbbd860afd4c2a50a80a04fa229fe5cd5bddc76.tar.gz/sha512 b/deps/checksums/Pkg-3cbbd860afd4c2a50a80a04fa229fe5cd5bddc76.tar.gz/sha512
deleted file mode 100644
index 2f3164077b8a9..0000000000000
--- a/deps/checksums/Pkg-3cbbd860afd4c2a50a80a04fa229fe5cd5bddc76.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-5d1d8acfed6e432033473f083860cdbefafcc0f0b8d8aa99fa445288a3064ca72da8fc4dfa1a3459347e1d512adba252bc5d468305fa4a74e4e8f25ae0628c87
diff --git a/deps/checksums/Pkg-b11ca0acdda718a15068cd1815ec346a4facf412.tar.gz/md5 b/deps/checksums/Pkg-b11ca0acdda718a15068cd1815ec346a4facf412.tar.gz/md5
new file mode 100644
index 0000000000000..371956891d95f
--- /dev/null
+++ b/deps/checksums/Pkg-b11ca0acdda718a15068cd1815ec346a4facf412.tar.gz/md5
@@ -0,0 +1 @@
+b29fbda23156c6987ea749c5178b7030
diff --git a/deps/checksums/Pkg-b11ca0acdda718a15068cd1815ec346a4facf412.tar.gz/sha512 b/deps/checksums/Pkg-b11ca0acdda718a15068cd1815ec346a4facf412.tar.gz/sha512
new file mode 100644
index 0000000000000..26d8a0ccd4549
--- /dev/null
+++ b/deps/checksums/Pkg-b11ca0acdda718a15068cd1815ec346a4facf412.tar.gz/sha512
@@ -0,0 +1 @@
+5795a739788de76066cd93e3155ff5d4e6ecf4a7503ff759405870ad950dfa5e85ff09bf918a434fcf593d6e4c494102b33f28f54becff039f7708ec2eafc986
diff --git a/deps/checksums/SparseArrays-1bae96dc8f9a8ca8b7879eef4cf71e186598e982.tar.gz/md5 b/deps/checksums/SparseArrays-1bae96dc8f9a8ca8b7879eef4cf71e186598e982.tar.gz/md5
deleted file mode 100644
index c30fb0af82a8d..0000000000000
--- a/deps/checksums/SparseArrays-1bae96dc8f9a8ca8b7879eef4cf71e186598e982.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-88144ed473b0ca6154ec55a8977c281c
diff --git a/deps/checksums/SparseArrays-1bae96dc8f9a8ca8b7879eef4cf71e186598e982.tar.gz/sha512 b/deps/checksums/SparseArrays-1bae96dc8f9a8ca8b7879eef4cf71e186598e982.tar.gz/sha512
deleted file mode 100644
index 1808b81c26624..0000000000000
--- a/deps/checksums/SparseArrays-1bae96dc8f9a8ca8b7879eef4cf71e186598e982.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-1c8c27f6b74c60dedecd6dd58de6b4b400bf3b942104e3ba7319a10a111ebbab0be03f98f072c073f43ca454d187674737dd34c1c9acb92adf3c8b76b3c400ac
diff --git a/deps/checksums/SparseArrays-3c2b65f9ba6afb3c6c5dc76c03d897a6647e9dd7.tar.gz/md5 b/deps/checksums/SparseArrays-3c2b65f9ba6afb3c6c5dc76c03d897a6647e9dd7.tar.gz/md5
new file mode 100644
index 0000000000000..849bae264bbee
--- /dev/null
+++ b/deps/checksums/SparseArrays-3c2b65f9ba6afb3c6c5dc76c03d897a6647e9dd7.tar.gz/md5
@@ -0,0 +1 @@
+8316e14c31e6568f881f18febc5232b6
diff --git a/deps/checksums/SparseArrays-3c2b65f9ba6afb3c6c5dc76c03d897a6647e9dd7.tar.gz/sha512 b/deps/checksums/SparseArrays-3c2b65f9ba6afb3c6c5dc76c03d897a6647e9dd7.tar.gz/sha512
new file mode 100644
index 0000000000000..4741ad4c82980
--- /dev/null
+++ b/deps/checksums/SparseArrays-3c2b65f9ba6afb3c6c5dc76c03d897a6647e9dd7.tar.gz/sha512
@@ -0,0 +1 @@
+b5bea1cc4c7cdefcff0e1100f5fb0f52d3e6d49c827dd9cc027aaa3ae2bc2d2fa110d784383ddfd6991653ad0515f73a5974ae2e5b91279ab99dbaa74c488df1
diff --git a/deps/checksums/Statistics-0588f2cf9e43f9f72af5802feaf0af4b652c3257.tar.gz/md5 b/deps/checksums/Statistics-0588f2cf9e43f9f72af5802feaf0af4b652c3257.tar.gz/md5
deleted file mode 100644
index f0bd8c2517b21..0000000000000
--- a/deps/checksums/Statistics-0588f2cf9e43f9f72af5802feaf0af4b652c3257.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-fb508e9a699fde0d7f85b208ae7a0f2b
diff --git a/deps/checksums/Statistics-0588f2cf9e43f9f72af5802feaf0af4b652c3257.tar.gz/sha512 b/deps/checksums/Statistics-0588f2cf9e43f9f72af5802feaf0af4b652c3257.tar.gz/sha512
deleted file mode 100644
index 5f6512e8a7f16..0000000000000
--- a/deps/checksums/Statistics-0588f2cf9e43f9f72af5802feaf0af4b652c3257.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-5ea116f2ed5b4709e33888a865d07bbc6cb49f7ddb43c315a4e95e020e77c5eb769baab3e784c1c03665ac6ed4bad933bc21fdf4121667f7760027483ccd0171
diff --git a/deps/checksums/Statistics-20fbe576ec406180b1dddf4c7fbe16458a7aef21.tar.gz/md5 b/deps/checksums/Statistics-20fbe576ec406180b1dddf4c7fbe16458a7aef21.tar.gz/md5
new file mode 100644
index 0000000000000..5e467255c9460
--- /dev/null
+++ b/deps/checksums/Statistics-20fbe576ec406180b1dddf4c7fbe16458a7aef21.tar.gz/md5
@@ -0,0 +1 @@
+85a733533f946f1183f4546b6c8e14f5
diff --git a/deps/checksums/Statistics-20fbe576ec406180b1dddf4c7fbe16458a7aef21.tar.gz/sha512 b/deps/checksums/Statistics-20fbe576ec406180b1dddf4c7fbe16458a7aef21.tar.gz/sha512
new file mode 100644
index 0000000000000..e8c4c1b7dfeef
--- /dev/null
+++ b/deps/checksums/Statistics-20fbe576ec406180b1dddf4c7fbe16458a7aef21.tar.gz/sha512
@@ -0,0 +1 @@
+edb6faba80e3cd5685c59a7bf7f7ad76435e1df8b65bd03b534bd5d1b605ea6610704eaa08aa99b74796cbaf2ff7e786b3ff058fd2e5f494f88e15a9bd6e8613
diff --git a/deps/checksums/Tar-6bfc11475a80b752e70518047c3c3463f56bbc1d.tar.gz/md5 b/deps/checksums/Tar-6bfc11475a80b752e70518047c3c3463f56bbc1d.tar.gz/md5
new file mode 100644
index 0000000000000..cbbc18180334e
--- /dev/null
+++ b/deps/checksums/Tar-6bfc11475a80b752e70518047c3c3463f56bbc1d.tar.gz/md5
@@ -0,0 +1 @@
+3f153a0a3646995cc7dadd4720de74a2
diff --git a/deps/checksums/Tar-6bfc11475a80b752e70518047c3c3463f56bbc1d.tar.gz/sha512 b/deps/checksums/Tar-6bfc11475a80b752e70518047c3c3463f56bbc1d.tar.gz/sha512
new file mode 100644
index 0000000000000..2a64aab3ccb9c
--- /dev/null
+++ b/deps/checksums/Tar-6bfc11475a80b752e70518047c3c3463f56bbc1d.tar.gz/sha512
@@ -0,0 +1 @@
+433fe68dcf65805af68e088e127b859e3e95ff21820785ea152392554944a3d9904fa8152e43e1413593fe46a028788cea5cd7a19299a0a1f41b2cfcb7cfed73
diff --git a/deps/checksums/Tar-951955b7fbe0d79e4e8a1405b6816e4081a6976d.tar.gz/md5 b/deps/checksums/Tar-951955b7fbe0d79e4e8a1405b6816e4081a6976d.tar.gz/md5
deleted file mode 100644
index f9aa140eccc97..0000000000000
--- a/deps/checksums/Tar-951955b7fbe0d79e4e8a1405b6816e4081a6976d.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-b49a74404daa5575b82f89082ff35af9
diff --git a/deps/checksums/Tar-951955b7fbe0d79e4e8a1405b6816e4081a6976d.tar.gz/sha512 b/deps/checksums/Tar-951955b7fbe0d79e4e8a1405b6816e4081a6976d.tar.gz/sha512
deleted file mode 100644
index 1519b88a7a53e..0000000000000
--- a/deps/checksums/Tar-951955b7fbe0d79e4e8a1405b6816e4081a6976d.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-33280360d106269922c95c5cd3289babddf85f5031047a93a583b465f4c78ec41d800a025c3ab9e69817144390b206b6c2fdac181476b7fcbce91c55ee5b134f
diff --git a/deps/checksums/cacert-2022-02-01.pem/md5 b/deps/checksums/cacert-2022-02-01.pem/md5
deleted file mode 100644
index e287f024b8e18..0000000000000
--- a/deps/checksums/cacert-2022-02-01.pem/md5
+++ /dev/null
@@ -1 +0,0 @@
-3b89462e00eba6769fae30eebfb9997f
diff --git a/deps/checksums/cacert-2022-02-01.pem/sha512 b/deps/checksums/cacert-2022-02-01.pem/sha512
deleted file mode 100644
index a5d8840598343..0000000000000
--- a/deps/checksums/cacert-2022-02-01.pem/sha512
+++ /dev/null
@@ -1 +0,0 @@
-75f5222c23d14d194856d3fa58eb605a6400cbf0068e208e1bc75a4821f841c39a95dde161b904db54ce922efa384796ad5f2e2b6ef75327475f711e72652388
diff --git a/deps/checksums/cacert-2022-10-11.pem/md5 b/deps/checksums/cacert-2022-10-11.pem/md5
new file mode 100644
index 0000000000000..877aa5a716378
--- /dev/null
+++ b/deps/checksums/cacert-2022-10-11.pem/md5
@@ -0,0 +1 @@
+1363ae92d22e83c42a7f82ab6c5b0711
diff --git a/deps/checksums/cacert-2022-10-11.pem/sha512 b/deps/checksums/cacert-2022-10-11.pem/sha512
new file mode 100644
index 0000000000000..5c7b990cb9e4b
--- /dev/null
+++ b/deps/checksums/cacert-2022-10-11.pem/sha512
@@ -0,0 +1 @@
+fbbd8d33932a5d65dd548d91927fc5bac5218d5a44b8d992591bef2eab22b09cc2154b6effb2df1c61e1aa233816e3c3e7acfb27b3e3f90672a7752bb05b710f
diff --git a/deps/checksums/compilersupportlibraries b/deps/checksums/compilersupportlibraries
index 86250fdc63390..dd66bb25de8a6 100644
--- a/deps/checksums/compilersupportlibraries
+++ b/deps/checksums/compilersupportlibraries
@@ -1,92 +1,92 @@
-CompilerSupportLibraries.v0.5.2+0.aarch64-apple-darwin-libgfortran5.tar.gz/md5/e0651fbefd39d405ec97d7530f2887d7
-CompilerSupportLibraries.v0.5.2+0.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/0a067b7e37d98a4c96dd1400b8c1a07c82cc223d11a93a0ee2455c3b55b394eee0cb251e26206495453f2cf8866822fb586ffe105f44e3380fa949adffe8b83c
-CompilerSupportLibraries.v0.5.2+0.aarch64-linux-gnu-libgfortran3.tar.gz/md5/1f4a5e98cd88a08029326ca5e9d47e9c
-CompilerSupportLibraries.v0.5.2+0.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/696f359746de592d4e30dc9ad19d5e07ebc1e6635e1f082e249747c42338ef04ce885fee5ad5915ec39fa2866af4265bb6ef580c75874c091a15b64d02626123
-CompilerSupportLibraries.v0.5.2+0.aarch64-linux-gnu-libgfortran4.tar.gz/md5/8285fd34164fac0410fcec6bb9d8b8e4
-CompilerSupportLibraries.v0.5.2+0.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/df0869d357326c803d8ff33c9734f01457d877e80c4af33745d4ca016144eb0c52fba7aad7e1098eecde3fc4cf41ed971638b4b6f901c7306a2072e8c14c3513
-CompilerSupportLibraries.v0.5.2+0.aarch64-linux-gnu-libgfortran5.tar.gz/md5/82add6093bda667442236c04d84b6934
-CompilerSupportLibraries.v0.5.2+0.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/81538d75950cdf931f9aaa932d1f9cf40998bc256924c3231e984179f6a5c3eca0f7e1ba315b21f2add3bf9376e3a45ee59ccd8d9f6d765105e05da25bf65cfc
-CompilerSupportLibraries.v0.5.2+0.aarch64-linux-musl-libgfortran3.tar.gz/md5/ee0d6a9f0a1372e36a02a95b6c07aefc
-CompilerSupportLibraries.v0.5.2+0.aarch64-linux-musl-libgfortran3.tar.gz/sha512/f248e57249af88520f9c7ac32dba45ca03e5904606b4edb682ea514c31a9a775198d02f0892e79124326e184d7906b7a13b0e4f3e7721352b8105cdfa72f89ed
-CompilerSupportLibraries.v0.5.2+0.aarch64-linux-musl-libgfortran4.tar.gz/md5/dddc8f7a9be9f07e9738e2a027fe8a0c
-CompilerSupportLibraries.v0.5.2+0.aarch64-linux-musl-libgfortran4.tar.gz/sha512/36f9b94f470d451b9c3c2429026292463434427625563240467f50374624a69fbca7ddcb0678937a58d22d32a8157571d3e201c47cc9a2484d1d75d4c0f77ebc
-CompilerSupportLibraries.v0.5.2+0.aarch64-linux-musl-libgfortran5.tar.gz/md5/12b7eb088023eaf9583ffa6f9f0e18ac
-CompilerSupportLibraries.v0.5.2+0.aarch64-linux-musl-libgfortran5.tar.gz/sha512/a5f5a6053e63ea1fb0185a0c3a7752a938373da847dffb872c1227ed3a0a80f2de1e4394baaaeeb8e0d8f2a4da123433896742cfdca6f94343bd4d0ab3578c65
-CompilerSupportLibraries.v0.5.2+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/e5e6918571981e4cfa5a2951e59f2df7
-CompilerSupportLibraries.v0.5.2+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/5d7b0f4f55b6726ae7317edb170cafb6a2c4563b0f4a90c619da95c120edd8fdce118bbd1e7168110f75cc899b857472fd524a396deb6d9f2552f53c861faeb7
-CompilerSupportLibraries.v0.5.2+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/7ae11706e9c6c043ad771f2700d06591
-CompilerSupportLibraries.v0.5.2+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/4f2f01aed00a58f4393cfd4608df1a6df6c9bff6e352a02a2b9af13f14a4436611769d64d082d3b151ba23d3d905ae2700bf469b9858249757ad7b5aae716d6a
-CompilerSupportLibraries.v0.5.2+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/e922dad7dad1d5f80cc154a6ddb6de35
-CompilerSupportLibraries.v0.5.2+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/3fabbcedbbc4abfe1e0c01c387bbe2537105937674877122b5b66d6015944a58f547106da1e185c1434de0c1883d356f8dc52968f075a00c6a8a52edaaf88957
-CompilerSupportLibraries.v0.5.2+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/53741f61d806efe045a5abe0e748aa36
-CompilerSupportLibraries.v0.5.2+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/b975a8fdfb736ef2b1aede2c89e390df261bfe8aaf8ffdb37887add09263d95f46642c3898ac19ec6098cdfdfc7f0726436dc273e9f70f10fe1abf4ea945277a
-CompilerSupportLibraries.v0.5.2+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/9687cf768c6c2879261e385c44ba490c
-CompilerSupportLibraries.v0.5.2+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/02f9accf8273597f6889677de64255e4e399d67377b5363ed31dea7e2118cc24d3b7fad7c0632aea79dee44250b1ff74bf2fa22e4f3e7755de65871854112c14
-CompilerSupportLibraries.v0.5.2+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/b62a81b9f43903b3de6fa1c78c03b89f
-CompilerSupportLibraries.v0.5.2+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/d44eecb30ccf19bc8dca41c738dbedd2bd2cb6e379a3ab181c955cb9cdf9bae8efeaf7a90c85dc7434520ead7e910d38e92b448cff7aecaef0902684e9b06c9f
-CompilerSupportLibraries.v0.5.2+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/e31780333339ac64f54ad434578d6294
-CompilerSupportLibraries.v0.5.2+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/9c3b91ed90f3393dfc72e7e2feefa60afe6ad457971950b163ffbecafa41cea43a15cdfadd8f402fd8fb61652c224f5b1a04c432fb0f43593749f51ed1340116
-CompilerSupportLibraries.v0.5.2+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/0f7bdfb908aa3d721428a1ee8412b594
-CompilerSupportLibraries.v0.5.2+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/3199da41c3df3d702a557c8b5e9fdde3a47c12d4c45fb9094fd194cbbe667663334b6cc0a5169fcc755790c4b5fada71c5094dc8d9a7f8b6c836d3f4c4c6e509
-CompilerSupportLibraries.v0.5.2+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/f455758e436750092ba2df65adcfd380
-CompilerSupportLibraries.v0.5.2+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/b5d0dbdff19b5ce076b8ae7b907da25fdbe05eabd47e46987f9987690a3a670d14bd3d2c2343d366ca1ee861b85fcbaccc1460ba3a73571686ef9e4330427b65
-CompilerSupportLibraries.v0.5.2+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/4cf3790d881b829b4b8da882987d5a40
-CompilerSupportLibraries.v0.5.2+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/ef5810243af32135da0cb7d08ae35ff8a2cce50c05200450154aa860c181719844466b787faae551aa71bd94e721f2d7d17ab14a049d0558666037862aff2f6a
-CompilerSupportLibraries.v0.5.2+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/a49e1fa6e040ac86ddd85a3188f83a76
-CompilerSupportLibraries.v0.5.2+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/cb0292651392a14f952181eb7a4a0ea6359632e96b017169cf4f1792f44f2846b5d6b2b5d334dee490262dd1c2d421de49d1f4a919402392f77fdaf60c1d19a3
-CompilerSupportLibraries.v0.5.2+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/3f64969e0e70dc8644fe09637dd1cbe7
-CompilerSupportLibraries.v0.5.2+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/0a71f8b731911019666bdc82f42e306ff1801321362ce6fe58988c9a1b110cd032a01c11fd0f9a6a3fbf6c6545f3287e363f5b3c40ef2eab0659638c38687196
-CompilerSupportLibraries.v0.5.2+0.i686-linux-gnu-libgfortran3.tar.gz/md5/28f58931f66a3405fc4c99ce40724ece
-CompilerSupportLibraries.v0.5.2+0.i686-linux-gnu-libgfortran3.tar.gz/sha512/d5290079264cfc6f716dcc9171f8412369e685c7ba0b9e82ae3d764de41671fbb4a24fdf7ebae9a9b913393837c2e41951326dbf3e870340fba7121709ebba8b
-CompilerSupportLibraries.v0.5.2+0.i686-linux-gnu-libgfortran4.tar.gz/md5/f98763aae801cc7d88124bea422f13ca
-CompilerSupportLibraries.v0.5.2+0.i686-linux-gnu-libgfortran4.tar.gz/sha512/da2095a462637ffcd0825949f4bcc86be9484c9e009648dc3c2e22e2fa19c65124e5e45f2694e85616df49b1181e2f4d2b886d3b83401c09ca58207db461ea23
-CompilerSupportLibraries.v0.5.2+0.i686-linux-gnu-libgfortran5.tar.gz/md5/1bfee57db4f2bdd788e59e34d0bb4506
-CompilerSupportLibraries.v0.5.2+0.i686-linux-gnu-libgfortran5.tar.gz/sha512/8f4814d97d6cd6c1f0c1d23fce875c40b6df7de7a8dc66e66681ba3c533120cb14d9d018808ff4e33dec53bb8958fbcedc9be6ac70817839ff89a0db5c0d18a8
-CompilerSupportLibraries.v0.5.2+0.i686-linux-musl-libgfortran3.tar.gz/md5/5da7af0483ffde929c58f3ae411f6489
-CompilerSupportLibraries.v0.5.2+0.i686-linux-musl-libgfortran3.tar.gz/sha512/97e56fe4fe0e10fa0d57ec10882a62d290829940049ffce7a8d81a843b91c7844e53d737bcdbc7a5e8206ca9820a7066fcdd7d0eed1e831d7af96222ccca1224
-CompilerSupportLibraries.v0.5.2+0.i686-linux-musl-libgfortran4.tar.gz/md5/a0b5cf513f2f02107c8887ea5e30cdda
-CompilerSupportLibraries.v0.5.2+0.i686-linux-musl-libgfortran4.tar.gz/sha512/aeeacfb58094751fe5cec87825ebb02a22c58d3e7300b6ca6066eb717e28ebecff230838c32935ac11376a6efdd5a0c44fe0c8e7d5b9a1f0165171c2b67a2d8b
-CompilerSupportLibraries.v0.5.2+0.i686-linux-musl-libgfortran5.tar.gz/md5/569ef42292d8cfd157026b434e93fe4d
-CompilerSupportLibraries.v0.5.2+0.i686-linux-musl-libgfortran5.tar.gz/sha512/daf543fbe7e80fd63220f7c08e0d6b51d45ce9e0af592a591eecadcaac9b859ce596df2bf8fcb3fb72fb799f869d0caac28acb5d26b3c3aed6dc80245b90dcce
-CompilerSupportLibraries.v0.5.2+0.i686-w64-mingw32-libgfortran3.tar.gz/md5/f4e0f3d40f7f77d32f26424dedff850f
-CompilerSupportLibraries.v0.5.2+0.i686-w64-mingw32-libgfortran3.tar.gz/sha512/57e35c39c4c93919cdbbe33891b5938918d33840ad33ed51a010f9deab791d60fa2d030d3e14df6e445e0607dc9280b07ca287a3273630bf7e245d6ab8069cbd
-CompilerSupportLibraries.v0.5.2+0.i686-w64-mingw32-libgfortran4.tar.gz/md5/d366731c11314cb908fca2032e7fefca
-CompilerSupportLibraries.v0.5.2+0.i686-w64-mingw32-libgfortran4.tar.gz/sha512/a7e087e718f9d8cb4957b8bf3a4554faae97510b25d88a3e9ae4241cb69efa5b520bd9424a0072e7d712c9435e6900690c56004a716a716838367e91fe20e11d
-CompilerSupportLibraries.v0.5.2+0.i686-w64-mingw32-libgfortran5.tar.gz/md5/eff855bb45f038c9d74c67ae2eed5641
-CompilerSupportLibraries.v0.5.2+0.i686-w64-mingw32-libgfortran5.tar.gz/sha512/e674d60247086bb8029270406d246a4857e668442a77299a431ec837446387bd1ed2de5e0f9f6985cc6e5d15b6692f40b18e0016e7c9d4e95a3770dffc19b44d
-CompilerSupportLibraries.v0.5.2+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/0bfe78d226b3d89a83b54c6ff39239e1
-CompilerSupportLibraries.v0.5.2+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/fed14514c9603a1e4772d2fd5f4a48da751c10e34b6fba5e0c35ff40b8ed165af6daebc051fa86751bdffb8f820ac779215dc3b38c4ff5c1624214b61d7ad1b0
-CompilerSupportLibraries.v0.5.2+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/d5219b60117555a3ccd41ab406d485f4
-CompilerSupportLibraries.v0.5.2+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/9268d7c2c6ef649dc753757f9afc7ac1382e521d02c58a91eead9873f2a80f215f3b67f9a33abad53c8bca18c19ae3e63804e01e3109c939d33555c7ec8c5b1a
-CompilerSupportLibraries.v0.5.2+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/1f620c9a049e00b8b11c3970a23f2761
-CompilerSupportLibraries.v0.5.2+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/6ac900dfac9268334c9b54badbfbec323151353e8d87d3199f875a505febf863766ded0c52bce2939e5975fa6e35a28cc16c88e7c1cce37d65725fe275813606
-CompilerSupportLibraries.v0.5.2+0.x86_64-apple-darwin-libgfortran3.tar.gz/md5/c21c35b00ed7ad0171d63006f1a4170d
-CompilerSupportLibraries.v0.5.2+0.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/f993a616a75b1f5ee140ed47b6e4aa981cffbbffd795fc0cf9df9397a6366a4507a158530e961c398bab656e7d51a27be026088678e0c19485ef0bad136bb69a
-CompilerSupportLibraries.v0.5.2+0.x86_64-apple-darwin-libgfortran4.tar.gz/md5/f0cd5c8631256f3b903e95ad3623d702
-CompilerSupportLibraries.v0.5.2+0.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/81de3f699169254fa83a3ab8b6063ddfd300065edf90f15239b0a304f3feea9534acba7d982058a7712ce94dcdb1ae036502f276813a96f8254e323787556d63
-CompilerSupportLibraries.v0.5.2+0.x86_64-apple-darwin-libgfortran5.tar.gz/md5/6030c114c1250e99958a0727da9d6daf
-CompilerSupportLibraries.v0.5.2+0.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/1d4be1c0718aeab056368653b7f34bd5ac3c85edb9fbdc2752b8c4877fcf5d080774506519cf285954485d806bccc18323f6c45f069db8bd314d064a2cc1ed66
-CompilerSupportLibraries.v0.5.2+0.x86_64-linux-gnu-libgfortran3.tar.gz/md5/b45ac0c04357de9d013df598dd13f3bf
-CompilerSupportLibraries.v0.5.2+0.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/42174d05c7165f87693efa09facc9405c9d6eab490c4b5fc74ba02e1e2e871799a24dcb7496e0693f30f9c3fd7e81020b77a3dd946832288769063f6d2a31aba
-CompilerSupportLibraries.v0.5.2+0.x86_64-linux-gnu-libgfortran4.tar.gz/md5/761998b08e4b460cec95468adb850c31
-CompilerSupportLibraries.v0.5.2+0.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/32853dcb3202e735325e1e0e3d88e2e446d7c88d45bc462d4e91f7d57dfd78b0f3381302e72163fafdb1c2cef53d4822e1c52289081e06b7b74d67e2ed0d34c2
-CompilerSupportLibraries.v0.5.2+0.x86_64-linux-gnu-libgfortran5.tar.gz/md5/dfd50d071702f903213ea0c6a42ad81b
-CompilerSupportLibraries.v0.5.2+0.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/3d6ecca7689bcb1925801d26a328790228c564bb731f6fa25d88763eeb22cccc4409dd6376c7b574ec242fbf85e41fd82d038a2650f8d33bb850b9a9a9f9a722
-CompilerSupportLibraries.v0.5.2+0.x86_64-linux-musl-libgfortran3.tar.gz/md5/0b374bc55dd0d5f4cf34a12d4901c022
-CompilerSupportLibraries.v0.5.2+0.x86_64-linux-musl-libgfortran3.tar.gz/sha512/10db23cc1d1367f40fed6c6cfc232fdc49f55e666d3623faa1af40dd781ea7a5d37b6b5a39524f0fc57d6d49947f429389bbf7075f10163090d7ea48903e688a
-CompilerSupportLibraries.v0.5.2+0.x86_64-linux-musl-libgfortran4.tar.gz/md5/1e28cdc7937a500b081a1f4d340190f2
-CompilerSupportLibraries.v0.5.2+0.x86_64-linux-musl-libgfortran4.tar.gz/sha512/0b635b8f594739453033fd1dc5496976a8fff314dd078e2d8248d3c2136abaaa610ebc45252a81d16db9d91a0ec20a552f1bcb65ed3b50a627e40168e7f100e0
-CompilerSupportLibraries.v0.5.2+0.x86_64-linux-musl-libgfortran5.tar.gz/md5/f6fcf32044f69d8305a718eeb7651614
-CompilerSupportLibraries.v0.5.2+0.x86_64-linux-musl-libgfortran5.tar.gz/sha512/5940a145a3203d5a4a9b7cd9aab45b8bcff08a43a69a8fea67a9e18535625c8ecc051ba344421253b2f96eaa1a007d42555897a8f8aa0e8bd5dbf1ddbd38f197
-CompilerSupportLibraries.v0.5.2+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/eb46728ef7d3ce955d5a497a556138c2
-CompilerSupportLibraries.v0.5.2+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/922d3a85059e7cedc6e0e52687cd6f22cb708677a65fcab86f7571737d8f17455f15b3f1af7442ee5fd04a437f226d4eee374d0f353a10f8f7a87160d7a2351d
-CompilerSupportLibraries.v0.5.2+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/fc1f4fc44c08f0c3040b976558a35e3e
-CompilerSupportLibraries.v0.5.2+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/5406251fe1d1d1901ac4e6af3b8e9394fcaee2fa6a4f3d2817161a1626bc6b45d7b184f9bdd3d2e6571640f40b4e06c61f321358ad8fe484871ab9b878801a95
-CompilerSupportLibraries.v0.5.2+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/e1b52fdb233c9667610867e278e7719a
-CompilerSupportLibraries.v0.5.2+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/efadc4efc419808cb289c8c8f52664a72f2646bad2e8e02533456cf9afd613d4cbacd121da786316206df8f65b5264498f25adb04f7673121b2a58a20c4a75b9
-CompilerSupportLibraries.v0.5.2+0.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/a449351de41a3140534d278aacedc54e
-CompilerSupportLibraries.v0.5.2+0.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/db5bfbd161eba076598465cfee277418c6e9f4f0f7c4672a437c68ceff374f600917fdcaaa9dfdb945103d2b5c9786663e8e9403f6fdc796cda7c529dadf28ba
-CompilerSupportLibraries.v0.5.2+0.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/facd6a008270b85d08ca835556921127
-CompilerSupportLibraries.v0.5.2+0.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/236438e05eb3f50063aea90522e61f10a03c474f3c26117c071bf94d4ca24fae56e09a565cbf00dc5d1eabefec804fa5503ecbcc324b5da00a65b5471fccfadf
-CompilerSupportLibraries.v0.5.2+0.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/cd294be65ddd327d6c0feeca8b13f922
-CompilerSupportLibraries.v0.5.2+0.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/73dc99009d25fa0ebafa77d7c5747d21a6e0778a6266a2408df885d9553e4b8029c104e1fe174526d9261252bb564128ae7cf9058268475d168c79d19ee4f0c0
+CompilerSupportLibraries.v0.5.3+0.aarch64-apple-darwin-libgfortran5.tar.gz/md5/20ebaad57850393b6ac9fa924e511fe4
+CompilerSupportLibraries.v0.5.3+0.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/020de4d8b0ff6bedbadaa305ff8445e6849f12053762ea4aa68412d1ec763dbd86f479587a2fbb862487f1feb04d976c38099ddf3887817a3d32b3f029cf85b1
+CompilerSupportLibraries.v0.5.3+0.aarch64-linux-gnu-libgfortran3.tar.gz/md5/766b13c0f2559d95c8e0ea86b168c485
+CompilerSupportLibraries.v0.5.3+0.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/63cda86223d831d027093e0b0330a56a00380ecea8bd61f05fbc3fe9e0b5810b78a85fec526766d3d44a8829320c7996e613db7890c1be34f5d480325ac19da6
+CompilerSupportLibraries.v0.5.3+0.aarch64-linux-gnu-libgfortran4.tar.gz/md5/f7bf3d6e9c3670f3cbd0325f3b940b96
+CompilerSupportLibraries.v0.5.3+0.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/fe4d54e582f7c89e39179fdfe3861b45ece7e6afe40d839649761adac7c5c6b8acfe40f838ada590143a7f7eafe2a83b088be1063e8cb1542096eb902c44bd64
+CompilerSupportLibraries.v0.5.3+0.aarch64-linux-gnu-libgfortran5.tar.gz/md5/1624a383c98c86a89d8816abcf2d4ce9
+CompilerSupportLibraries.v0.5.3+0.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/1a970ec330c44e501f78cc911f1d68d48c3c8d413b562f5260d298349aa5989451a77637833b05c32934ef2266d55b69a85f7dff42d3b30b6be0876ae0345fc5
+CompilerSupportLibraries.v0.5.3+0.aarch64-linux-musl-libgfortran3.tar.gz/md5/c65c8272cd901d54f31fc152b9e43386
+CompilerSupportLibraries.v0.5.3+0.aarch64-linux-musl-libgfortran3.tar.gz/sha512/ae1e2df4e6d6ff13e21990b433cc526166dddfa8a0066d1f6e159db842f8942e4fcea8089d4c7a2a1830487d40c700ea50a75a721e77b949dddc9576e5c25b36
+CompilerSupportLibraries.v0.5.3+0.aarch64-linux-musl-libgfortran4.tar.gz/md5/1b20eb9a4db5857f399113d800f00202
+CompilerSupportLibraries.v0.5.3+0.aarch64-linux-musl-libgfortran4.tar.gz/sha512/c3f1fbf680e38f87417d4f343191ea63631b6b3421ac992d0ddcaac5dd0415d4ae69c1c211e3744d2deb939c3dd8cc644e2aaa9d049499a79d19f667fc883d6c
+CompilerSupportLibraries.v0.5.3+0.aarch64-linux-musl-libgfortran5.tar.gz/md5/17e6e016117ed4bc0cda513547885df1
+CompilerSupportLibraries.v0.5.3+0.aarch64-linux-musl-libgfortran5.tar.gz/sha512/874cc1bb894ba89078bb945d399bd6b52754fa9d0fd18aec07601616305f2e342260443e69dd9987124d227e4bdc2c7712179f3bf105db1aad91eef81668ab21
+CompilerSupportLibraries.v0.5.3+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/dd2e8c08ac76e808f336d1cec1caf9fa
+CompilerSupportLibraries.v0.5.3+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/1c7caa8cd301f36902ee6b3f4503f65db75b618c1fb257e90d65a16bf91f3e1c296c04039f87484f91b1106b14ac733b551677ff74c7551ea37df96d198bf843
+CompilerSupportLibraries.v0.5.3+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/e674ec70d6d4ef10ad6ec96ae4420d2d
+CompilerSupportLibraries.v0.5.3+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/cdbbc65f70d9563cb0b635f27aca5dad226a64381654c0d67a1f376726abece5df64d8de53140524d737a6532d0fca62e4c1c2e541f71ecd91fc1d270393c7f0
+CompilerSupportLibraries.v0.5.3+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/1300479a168b53743547ddd1c4a7b97e
+CompilerSupportLibraries.v0.5.3+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/e3cfbe0dbb472ad0e3e50bfef564b41d22d1cc67a28f158eeacbf42f977ef926ca7c12af405070b6ca7d5d3ac56ab53994a6ea2bfe8eeef33b07def69e776ec7
+CompilerSupportLibraries.v0.5.3+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/caed03d3c999a485b6fef7a0c17e0650
+CompilerSupportLibraries.v0.5.3+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/34b50d348f69202cc91ae60d71ac9534a68ecd9b781179b51b42c26db74b92bfd29e9983763a5696e8184fa9184e2c0ecdf0bb1a579003bff44e930edc72e6b6
+CompilerSupportLibraries.v0.5.3+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/687a67171c78dd0e1f4dd67aea9e81c2
+CompilerSupportLibraries.v0.5.3+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/57400b7fdd062854358bd3c1e7a71bd4a68d1249cbced8672a5a5420a87d45db57b03742d19edb49d8cb06d70ec7db7ce5530617603f2852144499f3aa974020
+CompilerSupportLibraries.v0.5.3+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/fbab8bb00d73c1f35e757f69fa7cf9fb
+CompilerSupportLibraries.v0.5.3+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/c51d3ee743a32e5738d8640aeb0ad6dcc4f1b46451bcd657f4012d2808c6093045642587feada5063152a78320bf2636383cecea16c518dcd980ebf373daff53
+CompilerSupportLibraries.v0.5.3+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/dd2e8c08ac76e808f336d1cec1caf9fa
+CompilerSupportLibraries.v0.5.3+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/1c7caa8cd301f36902ee6b3f4503f65db75b618c1fb257e90d65a16bf91f3e1c296c04039f87484f91b1106b14ac733b551677ff74c7551ea37df96d198bf843
+CompilerSupportLibraries.v0.5.3+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/e674ec70d6d4ef10ad6ec96ae4420d2d
+CompilerSupportLibraries.v0.5.3+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/cdbbc65f70d9563cb0b635f27aca5dad226a64381654c0d67a1f376726abece5df64d8de53140524d737a6532d0fca62e4c1c2e541f71ecd91fc1d270393c7f0
+CompilerSupportLibraries.v0.5.3+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/1300479a168b53743547ddd1c4a7b97e
+CompilerSupportLibraries.v0.5.3+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/e3cfbe0dbb472ad0e3e50bfef564b41d22d1cc67a28f158eeacbf42f977ef926ca7c12af405070b6ca7d5d3ac56ab53994a6ea2bfe8eeef33b07def69e776ec7
+CompilerSupportLibraries.v0.5.3+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/caed03d3c999a485b6fef7a0c17e0650
+CompilerSupportLibraries.v0.5.3+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/34b50d348f69202cc91ae60d71ac9534a68ecd9b781179b51b42c26db74b92bfd29e9983763a5696e8184fa9184e2c0ecdf0bb1a579003bff44e930edc72e6b6
+CompilerSupportLibraries.v0.5.3+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/687a67171c78dd0e1f4dd67aea9e81c2
+CompilerSupportLibraries.v0.5.3+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/57400b7fdd062854358bd3c1e7a71bd4a68d1249cbced8672a5a5420a87d45db57b03742d19edb49d8cb06d70ec7db7ce5530617603f2852144499f3aa974020
+CompilerSupportLibraries.v0.5.3+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/fbab8bb00d73c1f35e757f69fa7cf9fb
+CompilerSupportLibraries.v0.5.3+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/c51d3ee743a32e5738d8640aeb0ad6dcc4f1b46451bcd657f4012d2808c6093045642587feada5063152a78320bf2636383cecea16c518dcd980ebf373daff53
+CompilerSupportLibraries.v0.5.3+0.i686-linux-gnu-libgfortran3.tar.gz/md5/ad9b2ac08e5b1d820cbb3995cb0a5da2
+CompilerSupportLibraries.v0.5.3+0.i686-linux-gnu-libgfortran3.tar.gz/sha512/e8225c962fd2715e6a63f0b3068b55b52f882443bae08dc7cd0741179677a7f729f20578728ba1778e32e1c53a71eefd5c0e88cbcff736879e1716ac88c46924
+CompilerSupportLibraries.v0.5.3+0.i686-linux-gnu-libgfortran4.tar.gz/md5/20edad4ccb4d03288e0c79b21c438c81
+CompilerSupportLibraries.v0.5.3+0.i686-linux-gnu-libgfortran4.tar.gz/sha512/6c11ceeb315e6d0f508d0fe72f2abd26413a5e8f43ece559b9adc7b1795e1f04bfe62fa8d61f6c3dcd07a5331c0efe089db1fb842714982900cbd1ce293e05df
+CompilerSupportLibraries.v0.5.3+0.i686-linux-gnu-libgfortran5.tar.gz/md5/e224db2b1d04a3495be7eb896b566b21
+CompilerSupportLibraries.v0.5.3+0.i686-linux-gnu-libgfortran5.tar.gz/sha512/b439e4b375f2313f6b3c862e1b5cbb7f4c143d6aa470683031c6a86e52aeaccaa2c90efdcc78950fc7502f592e58dc335a490956b2a6f50e76245a8678684cf5
+CompilerSupportLibraries.v0.5.3+0.i686-linux-musl-libgfortran3.tar.gz/md5/11c2b8911933a65023cb337989a10118
+CompilerSupportLibraries.v0.5.3+0.i686-linux-musl-libgfortran3.tar.gz/sha512/2771b40caf6050d0106d6efc5766220ac6be5570f15275f4c05b931c0c6e529454df6945942b84bc89b7555a496e3c5d5a79bedb8cfb4b54c2d17907d8883418
+CompilerSupportLibraries.v0.5.3+0.i686-linux-musl-libgfortran4.tar.gz/md5/e57678fa9e442852e9fbdb3470fdb923
+CompilerSupportLibraries.v0.5.3+0.i686-linux-musl-libgfortran4.tar.gz/sha512/ca23b099f875fd4fd7f9d871bf5bd73829e8ed089122e64242112f6945d008f849e001dbb36b30f2a35fc8a92cdbb747d8c76ce892320ebbaec85f6a2180ee53
+CompilerSupportLibraries.v0.5.3+0.i686-linux-musl-libgfortran5.tar.gz/md5/1719ac7bb9d55a738f06133a843feced
+CompilerSupportLibraries.v0.5.3+0.i686-linux-musl-libgfortran5.tar.gz/sha512/17962e52395f10c07b2b19ed487b104486fddf93130913ff6023451542d56da4d125abd0e0d909df5e973805714ceb5bfab535c2ed131b4fa858152e7f833b63
+CompilerSupportLibraries.v0.5.3+0.i686-w64-mingw32-libgfortran3.tar.gz/md5/04f5eb5fa12b85d90eddaff07c34ed8a
+CompilerSupportLibraries.v0.5.3+0.i686-w64-mingw32-libgfortran3.tar.gz/sha512/890af6d446ff49765f6305efedd5ff65b4285c3a5522cf0d8238b0c65af2392715cf670f9ad0a453778e3175993fa800b1b6e4a6dcfb06a16e7f7f975b850922
+CompilerSupportLibraries.v0.5.3+0.i686-w64-mingw32-libgfortran4.tar.gz/md5/44f0c502e0856b169611abcb710f4ec6
+CompilerSupportLibraries.v0.5.3+0.i686-w64-mingw32-libgfortran4.tar.gz/sha512/e5f7687540ef0828e43b731b5292f526bb570ef59e37e2ebb7e791f845199650364b32550c684aee8afd95d08ef89da30f967964237c13db7ef4c48660420026
+CompilerSupportLibraries.v0.5.3+0.i686-w64-mingw32-libgfortran5.tar.gz/md5/d804d5fb304f11b4a6cf5617d38fbd8b
+CompilerSupportLibraries.v0.5.3+0.i686-w64-mingw32-libgfortran5.tar.gz/sha512/20152161730e80c4bdea92648ab0a4f7fa18dac038a98432d190bf532575a83d4cd05bde16af6f8b7003be557c441edcceab83600f70092e6f4b101e875bec67
+CompilerSupportLibraries.v0.5.3+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/353ae80799b6f0035aeaea3fe8fce67b
+CompilerSupportLibraries.v0.5.3+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/ebc40f9a0b074d56d428a738fae6f68152a9d30ed8190e2abcf9d774ca5cfcef43fd00dbcc811e0c5ebf40dfaf5ca5e2bce7041b692f75817ef95de2a108597b
+CompilerSupportLibraries.v0.5.3+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/65f3a5cde15d635aafeb1f6b1590b219
+CompilerSupportLibraries.v0.5.3+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/7125500cd57813261a094738e6fb9c89580a3f3208073fc5e53a9ea539815fc315376dcb04be226bf3894d638a73569c87e3737e098df056aa55e4fa4c19b631
+CompilerSupportLibraries.v0.5.3+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/41f16a092a0d769b4f6610b8045d41e5
+CompilerSupportLibraries.v0.5.3+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/21ab01603010525d5246088baced6815f852997c743ef723bb3aaa2a0a5bf6fd6196a9f72a993dd482e4f7afc93086c1b333abc96197f749901789f4ba49ff27
+CompilerSupportLibraries.v0.5.3+0.x86_64-apple-darwin-libgfortran3.tar.gz/md5/35724bf7657949342c844c82687c9411
+CompilerSupportLibraries.v0.5.3+0.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/c7cd31093d2fef7a93ef846dea412ffe10c0730b8c42fff916cb04db294b91765400b7a2ab3120730008225441d5cd43654bf528632468078bae35f14f2d13c3
+CompilerSupportLibraries.v0.5.3+0.x86_64-apple-darwin-libgfortran4.tar.gz/md5/9953854112a730afc6ba1104f4af3b52
+CompilerSupportLibraries.v0.5.3+0.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/c1b9c12c5a362295094d7aafb6aff4efa9fc0c4dc823189a0cbdb6430ed79d3f41eafbb8523217fa4fe7a95de5e541b3b971a64db5f3062dbb71aeb268e9699b
+CompilerSupportLibraries.v0.5.3+0.x86_64-apple-darwin-libgfortran5.tar.gz/md5/c43db22a404279f8503567a4f6d0ba1d
+CompilerSupportLibraries.v0.5.3+0.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/e115b6e7065699eb1e0be83f4c64ccbaa958883b7719dea7c84c8d3000ee42c759cd2d7220278373c404a484f56b2a47bf908439516523568844c99310b496f0
+CompilerSupportLibraries.v0.5.3+0.x86_64-linux-gnu-libgfortran3.tar.gz/md5/737d1c53fba0416a0e632ee6f01d181b
+CompilerSupportLibraries.v0.5.3+0.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/da9439d89574e0821345a2e133731859df91a2b2ba7597f98b924e33f8f5bd99edb6d6b733301f23ccbdbb7a27189cefa040438fcc26070b7e23c49438cb80d1
+CompilerSupportLibraries.v0.5.3+0.x86_64-linux-gnu-libgfortran4.tar.gz/md5/3e597127d9d35fc9dd683ce6c0559991
+CompilerSupportLibraries.v0.5.3+0.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/6dc8972cc9ecf70391461abb9489e60b3d5f8ea94ea93c223655fd6c73e6efcbfd3ef32ac2915a760d5bc743cc1afce759c83a18147300c9c0cf72400175c228
+CompilerSupportLibraries.v0.5.3+0.x86_64-linux-gnu-libgfortran5.tar.gz/md5/c43015ddde4f8fac4c9c63247b8bb0be
+CompilerSupportLibraries.v0.5.3+0.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/57b0eaffc6274955edf9a61343ae0e4043c67ffe5ef9b51c71731c39b47f45a5b65a8a8753f10f9d548eb54ad8618797eb0c4da60ad14aa8e7892585d221eb32
+CompilerSupportLibraries.v0.5.3+0.x86_64-linux-musl-libgfortran3.tar.gz/md5/c85a6e4152f2026718ab1246515e48f9
+CompilerSupportLibraries.v0.5.3+0.x86_64-linux-musl-libgfortran3.tar.gz/sha512/45686394b9eddadbb951373ac51e2e321238313d0bde8757bae338850a1789bae6b62849f2cc5034c4e1d3240ef6a72c99f27f1b049f5ea9e016132ecb05a95f
+CompilerSupportLibraries.v0.5.3+0.x86_64-linux-musl-libgfortran4.tar.gz/md5/1205e7e17911939db914a6d0ea26dcab
+CompilerSupportLibraries.v0.5.3+0.x86_64-linux-musl-libgfortran4.tar.gz/sha512/78eca2cb13719a5fb61747edc7156d9a939fd687702c1c07e9625b1a00cb3dcfafa1a19b4853337770ae1faf37b80d0f7e4f6fc51f40f32a4f9a4b6fcccce8cd
+CompilerSupportLibraries.v0.5.3+0.x86_64-linux-musl-libgfortran5.tar.gz/md5/cafcdc864d6ad074c8d15283843c8ad2
+CompilerSupportLibraries.v0.5.3+0.x86_64-linux-musl-libgfortran5.tar.gz/sha512/7f1ee1a37e5bb8f8df3a9d5469efadfdbb14dd69309b0366e71637f74cb6e815f9292b558e51b59efb7839a97f412f4433e2baa876ad4f5ba86be4f8b16008fa
+CompilerSupportLibraries.v0.5.3+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/bb1d821badf88fd0538e3d5881fe01ab
+CompilerSupportLibraries.v0.5.3+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/b3bfdf2065227eb5965a6325f9e46c10069adcc072b1b96e8c5e2c92f1c5cb93e39df14d26b7b062e18400676d56ce429eea820b4de00f2fbc8c410b2a1e1828
+CompilerSupportLibraries.v0.5.3+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/f3229d3d3cac19001111804515146306
+CompilerSupportLibraries.v0.5.3+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/26d4590a798afad6416e364c1e653b9d206218ad305be5adf1f475f1db4f1164f32c3152f6b6db0283d76c439c11c2af9ea52982bf502814cf0e66c728407942
+CompilerSupportLibraries.v0.5.3+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/1d5023d5999f767d10195c27c60cf580
+CompilerSupportLibraries.v0.5.3+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/359bb3f5eb20d6230d6dcf3c5ed95c8a50783f3d7811c77f05f6e0826d472642c5e9a3535bc5368050d6d6129b1b35303cfbed3c4184808e0104d1ea63f43740
+CompilerSupportLibraries.v0.5.3+0.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/e6545d6c6abb852c2667ae5d4df7926d
+CompilerSupportLibraries.v0.5.3+0.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/9b906e04fbb0552bdc37fe23a53937e04f998592762935bc645b5b36f6ed8d2fe213de1c0975f8aa08930b13eb574fcdfb7c13a50c43d92dc91327f55d9eedef
+CompilerSupportLibraries.v0.5.3+0.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/f1368a4d4401110bc34dd8c363995ddf
+CompilerSupportLibraries.v0.5.3+0.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/90975d27f6dd6e9017e26155f9aaa20b17bcea1ef720b02aec86c2e9baa2053daacbe9eb595544e8ad765bc111682e852e5cda401ac4353148cd8c1052c99cff
+CompilerSupportLibraries.v0.5.3+0.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/5fe386e00d7311c701f93f95b201f9af
+CompilerSupportLibraries.v0.5.3+0.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/8f8432c87b0aaea547d8a70031ca85a615970c85c83dce229489de18c93bab0b667e1dde86f05ce7c37d020cf126b0a4249ac7e7c86822c1f7ebc5671879bbcb
diff --git a/deps/checksums/unwind b/deps/checksums/unwind
index 5751b9111c9e0..7a3141d79368c 100644
--- a/deps/checksums/unwind
+++ b/deps/checksums/unwind
@@ -1,26 +1,26 @@
-LibUnwind.v1.5.0+2.aarch64-linux-gnu.tar.gz/md5/95e3a6b8e1842e21f8793c489dc07f9b
-LibUnwind.v1.5.0+2.aarch64-linux-gnu.tar.gz/sha512/faf51cc25065f1493b32fb7520a4ababe631b918336ca51091345f75cae967977b29185476d5c12fd5f9533531c1cbcb84ec17fe941a8d7bfa5aef9396842f87
-LibUnwind.v1.5.0+2.aarch64-linux-musl.tar.gz/md5/c084f8c50371b3ddb85f6290534e79ef
-LibUnwind.v1.5.0+2.aarch64-linux-musl.tar.gz/sha512/2df5fead3bd8ea43c136431e7b1340c2a0b605c04c575e1d22edab479fe8074d898f9bd3c791eb5d91c3e52d31bb7f841dd8f11be97a3eb6d8ac61d1d809622e
-LibUnwind.v1.5.0+2.armv6l-linux-gnueabihf.tar.gz/md5/6c11ca045cbaeb4fe75363aa116a784e
-LibUnwind.v1.5.0+2.armv6l-linux-gnueabihf.tar.gz/sha512/5c337a850f184a0c514da8abca42aa5728cfa384a9ef3d0f36d67b10e322affe95d4a05c76faca69ca66cdb4e8535f4e8ee0f650ef39a27f897083e66570d6fb
-LibUnwind.v1.5.0+2.armv6l-linux-musleabihf.tar.gz/md5/8e4b4569abccd11fb577346f6df42d4a
-LibUnwind.v1.5.0+2.armv6l-linux-musleabihf.tar.gz/sha512/025660fe2dbb9d5904b865d6a3716553f368c5589b0cf8fd5f93a87e2204e5c66309b046de0d853ce643992dfa6433fc6214417bd477c4f363fd946ad6c97179
-LibUnwind.v1.5.0+2.armv7l-linux-gnueabihf.tar.gz/md5/c57b86157a00931608786578795e398a
-LibUnwind.v1.5.0+2.armv7l-linux-gnueabihf.tar.gz/sha512/f7b720f0ab208212b66fac6783e98edfc80bca9b2b903bf665da1a464a0a615aaa998ea1bee9088c73124879ded53b58fe9c5086ec547a50bcdf14be93652da1
-LibUnwind.v1.5.0+2.armv7l-linux-musleabihf.tar.gz/md5/a88e3a13a02c9d491ced12c7ba416508
-LibUnwind.v1.5.0+2.armv7l-linux-musleabihf.tar.gz/sha512/ef705a74750680e81daec6ff790797f247a7dbdb99731ab4083bc9a56f3f79da68c2c15321f5f6466d2f71b228aae5f59f793a16a06cf93a57366a051b748376
-LibUnwind.v1.5.0+2.i686-linux-gnu.tar.gz/md5/657a43f2b2e323ed3f298baae60bcd52
-LibUnwind.v1.5.0+2.i686-linux-gnu.tar.gz/sha512/138646a791044ab3106452111b5801710fccd2a0356b566751fee93d8e636a7f2cc14679d5cf515f1bdebcac5722af746c2047775a7e191f7ddc068914d29383
-LibUnwind.v1.5.0+2.i686-linux-musl.tar.gz/md5/cfe5281bca9498083c1da5eb787c2bac
-LibUnwind.v1.5.0+2.i686-linux-musl.tar.gz/sha512/b786d9000d2435f3284072ae527d172e89224373c59683ba265d24946ac89ab714d2ced6eb37a0191bea85de556a5ea1420a089aa5ba4f01ed9397e945841bd9
-LibUnwind.v1.5.0+2.powerpc64le-linux-gnu.tar.gz/md5/c2f19ab443307b986d9545bfce7e3f83
-LibUnwind.v1.5.0+2.powerpc64le-linux-gnu.tar.gz/sha512/034493ac5822d481976e4ee2d53db066788fab7fb0053bd472c6ef1d078700882487aebc4f7bb1be5bff9719eb048a24d8a7318a34154e04f9a192eef5fa56b8
-LibUnwind.v1.5.0+2.x86_64-linux-gnu.tar.gz/md5/f6c7ca4303e43dd3a22314dbab294037
-LibUnwind.v1.5.0+2.x86_64-linux-gnu.tar.gz/sha512/0c7d7793b606cbd51d1be85bbc8c62bf2a60b4b25279d4267e535d5ba53b8cc667f5cc92e607439ee8354bda8c03637315f93bee23bb09b47d83b3b4543c690d
-LibUnwind.v1.5.0+2.x86_64-linux-musl.tar.gz/md5/4ba92194d0e323839d2207093f365be9
-LibUnwind.v1.5.0+2.x86_64-linux-musl.tar.gz/sha512/49110890d2e4e0050c52c5b2f94288c2afe1c75cd3b54345a49f095a9ea6804122c7d1b4dac831a169dabf510247107c299031b732a23d8d217ab0fd4e1d0682
-LibUnwind.v1.5.0+2.x86_64-unknown-freebsd.tar.gz/md5/e9b4a61538244b4dc05147f94b4d31d4
-LibUnwind.v1.5.0+2.x86_64-unknown-freebsd.tar.gz/sha512/bcae20fdd5ac3da362b94a6059b2c055de111507a8da7ae311fe176cb3873429eb7b30aaf83210699fb24fc8a309648a30514f34c43615e02268528b6b29cb27
+LibUnwind.v1.5.0+4.aarch64-linux-gnu.tar.gz/md5/b40fee1e2995d3fa2c823c45b231d9f0
+LibUnwind.v1.5.0+4.aarch64-linux-gnu.tar.gz/sha512/d5865dabb541c3e1a5b6bc20547adc0788dde0f74731006e44e2cd128742c1ce61638a31340f8f4bfcd8b052706c3d57c24a202d048cb8d0496a909ff51fe9f7
+LibUnwind.v1.5.0+4.aarch64-linux-musl.tar.gz/md5/580b46908f43309c3f88c9ec4177d296
+LibUnwind.v1.5.0+4.aarch64-linux-musl.tar.gz/sha512/c12caa005586bea53932054d2742d6b55c40fd1a284daeb73924f3b761115929e022f3cf377b590d818e2c69726d42f12d4c87be2daf6d43caeaef54e226afdb
+LibUnwind.v1.5.0+4.armv6l-linux-gnueabihf.tar.gz/md5/5af8f16e7eb32718cde68ee840c373c2
+LibUnwind.v1.5.0+4.armv6l-linux-gnueabihf.tar.gz/sha512/71e6f64477bc356c42bf1604e61a2596dfdb90f5fc3005e6656f2aa5ba0576867e6b482501d3d3c68da623cf4d6c572e4fb9708a71988671b1bbe76d6c2e4754
+LibUnwind.v1.5.0+4.armv6l-linux-musleabihf.tar.gz/md5/446f9021d1903410ed9b2e400e2533af
+LibUnwind.v1.5.0+4.armv6l-linux-musleabihf.tar.gz/sha512/bf39ac9faea323c394e627647aaafacccdcd9545ac970b771dc4736376c56f0e1cfe58fead45625b7c491d91ae4f1dd41c3303d04536ef514c3a3657c06fd261
+LibUnwind.v1.5.0+4.armv7l-linux-gnueabihf.tar.gz/md5/ab594ba2df5cdc08dcf74ee2d0af9742
+LibUnwind.v1.5.0+4.armv7l-linux-gnueabihf.tar.gz/sha512/80f3b0c922b27d98fec1ba58f227af3c9d3e9691f34ed088152619289fa09b03a5b891162cd8ba497432867d60c2cd97a3466178c0891d848ded167e64f720ef
+LibUnwind.v1.5.0+4.armv7l-linux-musleabihf.tar.gz/md5/84cdf938ab0880447f242d86ad9e6d1d
+LibUnwind.v1.5.0+4.armv7l-linux-musleabihf.tar.gz/sha512/a985e9fc4e75cb292e7cb80ae0446110221a7f785818f53ac26c03dc2e142c959a6f380ffbceb43039dc95659e0da608b436d5faa5133f7d49308dd6198652f3
+LibUnwind.v1.5.0+4.i686-linux-gnu.tar.gz/md5/29a8d300b5edc3b25fc0c38d415ec4a7
+LibUnwind.v1.5.0+4.i686-linux-gnu.tar.gz/sha512/c96b954ee5736ad69a47e1214aac483ed2697a013749a696de823e2064bd5869590ae17c19268bf06227c9065b10bb36b197fb73987a74706fd37e0eefc17254
+LibUnwind.v1.5.0+4.i686-linux-musl.tar.gz/md5/fe8822d87cbad1abc4173a0c5c3f082f
+LibUnwind.v1.5.0+4.i686-linux-musl.tar.gz/sha512/ff09cdbb4046413c260df0058a2fb3c2daa56e656a038c1ff4c47b251254e08066ae3b8b144a02483e1ca7d92192d8e3c1b005adcf2dad26343219eab4c26d95
+LibUnwind.v1.5.0+4.powerpc64le-linux-gnu.tar.gz/md5/15eea5ef1f4ad04cc8fb8f701571233f
+LibUnwind.v1.5.0+4.powerpc64le-linux-gnu.tar.gz/sha512/875d50cea141397783c4d3062a08a1951fb14c96e9c99489ddeb91f94f403c48e8d358c181b6649198318586463efedd1b5f991acc792d8412a6ad2c810c568e
+LibUnwind.v1.5.0+4.x86_64-linux-gnu.tar.gz/md5/2b7b2264763d10f39c548b3f23ea1a95
+LibUnwind.v1.5.0+4.x86_64-linux-gnu.tar.gz/sha512/7e76ae26ce7f6f60020af0908c7197e28204a8b290022af7dd92b17d64b01d68338d347e3f78a5946fef2faec3cd3f1c274bc55de1472a6245867b8e5219dd0a
+LibUnwind.v1.5.0+4.x86_64-linux-musl.tar.gz/md5/84789e4ee681fbe4697e02431ab1004b
+LibUnwind.v1.5.0+4.x86_64-linux-musl.tar.gz/sha512/e8166e2efbb70a3b492551556c72181c505b8cdb2e5d528caa69b32727c59f3e065e4455fdd9749878bb6d1ab5962ca7dfe2ebc9efa6dbdb0bebd210bd16c6a7
+LibUnwind.v1.5.0+4.x86_64-unknown-freebsd.tar.gz/md5/f35f256dd24183f72a932946c07073b0
+LibUnwind.v1.5.0+4.x86_64-unknown-freebsd.tar.gz/sha512/de80153025ba3e4192c8faf3f7c5f5a0044d4580f8cb56f4c0206f7030cbeeb406cdd064f87b4568392c06e96b9e32fc07c55b68b92e8cc5d596fb79040ecb78
 libunwind-1.5.0.tar.gz/md5/c6923dda0675f6a4ef21426164dc8b6a
 libunwind-1.5.0.tar.gz/sha512/1df20ca7a8cee2f2e61294fa9b677e88fec52e9d5a329f88d05c2671c69fa462f6c18808c97ca9ff664ef57292537a844f00b18d142b1938c9da701ca95a4bab
diff --git a/deps/checksums/zlib b/deps/checksums/zlib
index d524a3f588a18..15e2cffa5b485 100644
--- a/deps/checksums/zlib
+++ b/deps/checksums/zlib
@@ -1,34 +1,34 @@
-Zlib.v1.2.12+3.aarch64-apple-darwin.tar.gz/md5/2258883a6412fbdac0b807afd133834f
-Zlib.v1.2.12+3.aarch64-apple-darwin.tar.gz/sha512/6e82b57646dfe2b86978d51cb4401d565d00d6bdcfabe09ceb888ad8979bd1398fd9ea7652542f149d88c120110f6c3baa919616f01410e9238a5199f50f5dda
-Zlib.v1.2.12+3.aarch64-linux-gnu.tar.gz/md5/663aa0d0791b92464e4822a130ac7fa9
-Zlib.v1.2.12+3.aarch64-linux-gnu.tar.gz/sha512/e50f00d92600a78b2f540e0e8e1dce435d0d0499ea80ce3c3cd0e11c8e3b5b1a97eadca9ac863f597cee369e80bcd50ec1c0a0e0f1a87bb0ff94bbaf453dea2d
-Zlib.v1.2.12+3.aarch64-linux-musl.tar.gz/md5/471179a2364d59abb6426b378ea4e195
-Zlib.v1.2.12+3.aarch64-linux-musl.tar.gz/sha512/35208e4be5966343ecb2b78471a3e1a947489f83c828b562db3508506dd0493eae3318c7eb3a6b599e911416795023193df862fbb6fcc7389d44710dc30f16a8
-Zlib.v1.2.12+3.armv6l-linux-gnueabihf.tar.gz/md5/53601c0201dadc8c9ff038167d5c4277
-Zlib.v1.2.12+3.armv6l-linux-gnueabihf.tar.gz/sha512/19744283bb412a656b934347cb7a1d121fbaf7e5f9b1aac373ddf2466567b731817a2e72e3a4d993ca7e5b5eb1fd9bb9c24d0126778367b28bdb94721649298b
-Zlib.v1.2.12+3.armv6l-linux-musleabihf.tar.gz/md5/f7c923955fc600785aae455807e63c8b
-Zlib.v1.2.12+3.armv6l-linux-musleabihf.tar.gz/sha512/623cd1758465c9e40b0dad93981ae93097a03f4aa67487b7e1c7240be2d780d86f35f8db96743c35bbb329d572741b58e73735a2b1cfb9e18e77f4dbcc714063
-Zlib.v1.2.12+3.armv7l-linux-gnueabihf.tar.gz/md5/5ce0fe42f67e09de047626424d61bc82
-Zlib.v1.2.12+3.armv7l-linux-gnueabihf.tar.gz/sha512/322e32d6fe6cd7a3334f5146f8980d4f1fc85b9a1c60271659ba8b4bbfdec314f8d9e8c6c0719248f5dd18e3daefd946811a3dcc74fa3ae5505d6dd653e65309
-Zlib.v1.2.12+3.armv7l-linux-musleabihf.tar.gz/md5/5115c374df90393cb895dd45c77275c4
-Zlib.v1.2.12+3.armv7l-linux-musleabihf.tar.gz/sha512/b04b4f42220833b99923a3ff349e4a05ad9f67c2b62d4848de37c833b287420b1dbec8a039c09d2a95ab6b68a62c6dcbacb4ba7cc069a4e90a11f8592719d2b8
-Zlib.v1.2.12+3.i686-linux-gnu.tar.gz/md5/37e0186f765fada0d76b9cd6f28c8d5d
-Zlib.v1.2.12+3.i686-linux-gnu.tar.gz/sha512/1239675bbf46c6243131585283b0fc23baa32e68226fbb2f0b7a833c8979e2df33590947daade533e37bafe21838a10198e9f9de99e094c21fba6b218b2fceab
-Zlib.v1.2.12+3.i686-linux-musl.tar.gz/md5/a0d92af6481929eed3a9fec3dbb2e622
-Zlib.v1.2.12+3.i686-linux-musl.tar.gz/sha512/b448590129ef251083b675c3d7494a90151a03297fd9883efb70bde032d106f16f2ec7c28508d9b4a0d0e5a0be0bdb4bcf0d1a9e4b2ade034a6d6cfc4916536e
-Zlib.v1.2.12+3.i686-w64-mingw32.tar.gz/md5/cc38d9ec5430e2ed7fed4792c7ac9551
-Zlib.v1.2.12+3.i686-w64-mingw32.tar.gz/sha512/85ad3babb42682d7b2b69513a30fd5e992a56436dcd7e2a44800bf1bc30d60d09aff5769cfaeefd4f5668e7973a0c2d4ad4d28559ea5f28c1c5419ed595eae57
-Zlib.v1.2.12+3.powerpc64le-linux-gnu.tar.gz/md5/8f57d8c31d2355c64a05db0412462d58
-Zlib.v1.2.12+3.powerpc64le-linux-gnu.tar.gz/sha512/9a0208c7a4dbf71b6f7e1ccaf05e3f3a422507cf0431b6482aab1a7b1bea41bd135320567f7dba6666f37c26f48cb3a627f1a1ebd39bf5c2d61148aadf62a986
-Zlib.v1.2.12+3.x86_64-apple-darwin.tar.gz/md5/5d15bb591d26d24aa9d6c9c8cf3df097
-Zlib.v1.2.12+3.x86_64-apple-darwin.tar.gz/sha512/7d8b0ec5a46a85cef3c5de451823c5cfa73b5b7c5ac98699065bbc5692af556195664908cd5c35184b7a9586fc0adab41fc0f76ee8599ca09a740cf49b9be113
-Zlib.v1.2.12+3.x86_64-linux-gnu.tar.gz/md5/25df63b9e6cbef14b0f0bf2a9eec5d14
-Zlib.v1.2.12+3.x86_64-linux-gnu.tar.gz/sha512/2660b762d816491e6b877020d8dd4a1cf1b171d6232dd5e0f47c6ee7b15504b006cc8f051434df778e0910130ef7456e30d531464470d3c4a2502e8f9fd19e76
-Zlib.v1.2.12+3.x86_64-linux-musl.tar.gz/md5/3f0c85d248711608141046d15b2da339
-Zlib.v1.2.12+3.x86_64-linux-musl.tar.gz/sha512/e4256b1b9520d5b0d97fa7e7ca6f6b9aa2583c6e5f14967392d54e48f27e242461f77e522743b229ab9b333eec5fd51f6d7b1559b566bd68ca0741b05b96df3c
-Zlib.v1.2.12+3.x86_64-unknown-freebsd.tar.gz/md5/e67dae1456645930c9e2b2fef6f805c8
-Zlib.v1.2.12+3.x86_64-unknown-freebsd.tar.gz/sha512/5915ec48ae80be829c36a71e2ce580d2d14b7a9824c8f279ad5c69fea62d9a03345b665f224b9dde0bc4b808af246f89ec4f932d47a14236bc3b7db7651e5bec
-Zlib.v1.2.12+3.x86_64-w64-mingw32.tar.gz/md5/89b152b3de0068c7c2580b87ad529ed3
-Zlib.v1.2.12+3.x86_64-w64-mingw32.tar.gz/sha512/df4b585f6501f45bc85e8d00c1b03c482d70d3491081246f9e9f9560f90c5f6057b1174a81e653f725209323cd743cf05d3e1aba1385afd26cb6f8c50186f818
-zlib-21767c654d31d2dccdde4330529775c6c5fd5389.tar.gz/md5/1fb2320f871561306bc87b3894727b45
-zlib-21767c654d31d2dccdde4330529775c6c5fd5389.tar.gz/sha512/2ad1e728f97a81b65d24fe5bef66658c94222d717a3486a0d11682b61563d7eaaa578f7457078881e8ed8c91b87aec11634d4a64021546e23a3ecabb3285197a
+Zlib.v1.2.13+0.aarch64-apple-darwin.tar.gz/md5/64403a5962d70d7e4b6bf7c225526144
+Zlib.v1.2.13+0.aarch64-apple-darwin.tar.gz/sha512/a7e6bb32c324943e5df3fa8501ee9d744d132db6f27033fe8ce789c1f19f26c15dc456ee8d6fc8095b427054e750ffe268500f5f69edecaa1af230b4b23535c4
+Zlib.v1.2.13+0.aarch64-linux-gnu.tar.gz/md5/a2d3265543017db03bc47b9d9778d99d
+Zlib.v1.2.13+0.aarch64-linux-gnu.tar.gz/sha512/c8143445222e151d7f522a98ee8f2742571542f4e71d515e88086c9d7f27b952662ced93f40c795e0de42e3a07c0cb5e1d9d8e792347f3c068cb07ccc144a640
+Zlib.v1.2.13+0.aarch64-linux-musl.tar.gz/md5/c1f2a1c562f72c7aa4b228f57c2346d4
+Zlib.v1.2.13+0.aarch64-linux-musl.tar.gz/sha512/7ed89bc7696690c03617c7413f5456ff5a1caa0dd600880ae67132f6c9190672ae451a06d23956a1969be00bf5c8f29bfa4f5bc4ab646b3b375c350f67c993e5
+Zlib.v1.2.13+0.armv6l-linux-gnueabihf.tar.gz/md5/7dff966f7bc5dd2902fa9ce20444235b
+Zlib.v1.2.13+0.armv6l-linux-gnueabihf.tar.gz/sha512/49e7b4a7c84996b697cf944b11ce06ce6064983a6a911c4539587385afa1e0119e3b1dbf816703a2c132acc90f7f114ec10631647638b59b14954382c1a82014
+Zlib.v1.2.13+0.armv6l-linux-musleabihf.tar.gz/md5/6982f19d2446559c0fd369afe84ebe4a
+Zlib.v1.2.13+0.armv6l-linux-musleabihf.tar.gz/sha512/8f69dfb7fb91cd6f7c934e1acddd83f77c2ebcc1732553f41ae1adcb7805a3304d16062133ce5094a8aea18ff5eca5f7a2df5724ae5a5cb9137caee732c1bf36
+Zlib.v1.2.13+0.armv7l-linux-gnueabihf.tar.gz/md5/30579a91f8f1c96752fe9a82bc053523
+Zlib.v1.2.13+0.armv7l-linux-gnueabihf.tar.gz/sha512/64f6a0e66ee13b086609e0d070c8742de20052e1ef43da201be0007e478c65b2f0a28a3c19ca5be6537b7c8bbeb6a4b2886c15a1e47bb2bd1cfe9d5e1590a620
+Zlib.v1.2.13+0.armv7l-linux-musleabihf.tar.gz/md5/b052ad151dbc3bad78762bc06164d667
+Zlib.v1.2.13+0.armv7l-linux-musleabihf.tar.gz/sha512/b5d2de09a4d65d898cf9ba0db34327c712f42a78cd1fd0f1d77fd8798910502049be63ccfed23de5fe3b499d9e0fe3d4cbb07c72765fd54db275e92f8f1e4dc4
+Zlib.v1.2.13+0.i686-linux-gnu.tar.gz/md5/3074702010889f586b43aa3dbbda4ceb
+Zlib.v1.2.13+0.i686-linux-gnu.tar.gz/sha512/92aa87c5aa3831155305276c2f0da091b5be4e8a396772e1a28650c2837ceb116dd2207329732b653a97c011abd7dd6ac1fc9574ac64cb3049ccd36fa6700748
+Zlib.v1.2.13+0.i686-linux-musl.tar.gz/md5/eff02476825ea7a53ab26b346d58f96e
+Zlib.v1.2.13+0.i686-linux-musl.tar.gz/sha512/14b72607d524948198e999e3919ee01046c049b3ec441bc581c77642cf37c3d28cc3c5500a3c073d62e9b8dc1efc9661b23bb925ed9c80b5e69abaddbcb59115
+Zlib.v1.2.13+0.i686-w64-mingw32.tar.gz/md5/279d2699458b1dfec80da17dd6f32f02
+Zlib.v1.2.13+0.i686-w64-mingw32.tar.gz/sha512/fb14d27b4f4ed5eb75bf4d4377074a206610558301be89ed692cf61d1266e425edb0489511fbbec100dafc71cff2cac863a4ea4ec70cfaa94e8175b9b7add25c
+Zlib.v1.2.13+0.powerpc64le-linux-gnu.tar.gz/md5/bc69de101d9159b22b7a334e2700faa6
+Zlib.v1.2.13+0.powerpc64le-linux-gnu.tar.gz/sha512/174eb4f154594d268d970d23eb6144dd2f6be41ddcfb9bc756b2ff48f0781ad0ed6571e2ead64dab0967da91517a02cd8db2b0e33a0bde9400103b5204f78e85
+Zlib.v1.2.13+0.x86_64-apple-darwin.tar.gz/md5/9a53075fc5595e638bacd25341f7ff42
+Zlib.v1.2.13+0.x86_64-apple-darwin.tar.gz/sha512/8124f677c036a288575712e201a809f44532b300fa56f8c12be9a1d7094fd644cb198c47b63d9f9f16d5509e27e7b3c59f080d4748ae489a4977fdfeae79e762
+Zlib.v1.2.13+0.x86_64-linux-gnu.tar.gz/md5/b192d547d56124262e2ae744f385efd6
+Zlib.v1.2.13+0.x86_64-linux-gnu.tar.gz/sha512/c6dca3c0a713ef2e2296bc9e9afa75e103a4cc4f00b5c905ebc5cff688904d6a454f83ab5ef3b6c66bdf425daa2fcd25825e50a3534c0ff109b13affbb686179
+Zlib.v1.2.13+0.x86_64-linux-musl.tar.gz/md5/f2a466b38b2ff1c895f630982147a950
+Zlib.v1.2.13+0.x86_64-linux-musl.tar.gz/sha512/191261d37fc501591005bf680d76bf518da261252456c4fef1c12bc572f9200a855fbd1b125bb8ad10d803eedbc53d4c9d7a2861e9a35d629fb40f87e5306f5f
+Zlib.v1.2.13+0.x86_64-unknown-freebsd.tar.gz/md5/00cb91c5edede46f72fae113b3115799
+Zlib.v1.2.13+0.x86_64-unknown-freebsd.tar.gz/sha512/8894e4a89dbf10e60ed020993484dcad91a52a8d310f3dfcc53808643c8401b1e445db46a815c19d55c0e5fd1a386945d1253c16af94b00ff27ccda44941f69b
+Zlib.v1.2.13+0.x86_64-w64-mingw32.tar.gz/md5/f98c68e19d9cfd24c7cec0b79d374e05
+Zlib.v1.2.13+0.x86_64-w64-mingw32.tar.gz/sha512/8e68edbdfe4e2ec6de70a724e30bc2df439901291639eca9e5aace75e31c7c6d3f47021213b8b7473b1f6ad4986f6b8695da4e24e2ea3025681e5d07dcfc067d
+zlib-04f42ceca40f73e2978b50e93806c2a18c1281fc.tar.gz/md5/60a49c89b9409dd91c1b039266f7bd0c
+zlib-04f42ceca40f73e2978b50e93806c2a18c1281fc.tar.gz/sha512/83122539da9399ce5f51c2ecbc38a627405334a9a6d53a024341353c1263a1e3aef7498f30ee281a49b3022be70e992eae475691e33da7a9c6a59b83207bd688
diff --git a/deps/curl.mk b/deps/curl.mk
index 58f3e1f89c05d..435ee278e3468 100644
--- a/deps/curl.mk
+++ b/deps/curl.mk
@@ -38,11 +38,11 @@ checksum-curl: $(SRCCACHE)/curl-$(CURL_VER).tar.bz2
 CURL_CONFIGURE_FLAGS := $(CONFIGURE_COMMON) \
 	--without-ssl --without-gnutls --without-libidn2 --without-librtmp \
 	--without-nss --without-libpsl --without-libgsasl --without-fish-functions-dir \
-	--disable-ares --disable-manual --disable-ldap --disable-ldaps --disable-static
+	--disable-ares --disable-manual --disable-ldap --disable-ldaps --disable-static \
+	--without-gssapi --without-brotli
 # A few things we actually enable
 CURL_CONFIGURE_FLAGS += --enable-versioned-symbols \
 	--with-libssh2=${build_prefix} --with-zlib=${build_prefix} --with-nghttp2=${build_prefix}
-CURL_CONFIGURE_FLAGS += --without-gssapi
 
 # We use different TLS libraries on different platforms.
 #   On Windows, we use schannel
diff --git a/deps/libgit2.version b/deps/libgit2.version
index 0c3390b350bd3..057ce9b444772 100644
--- a/deps/libgit2.version
+++ b/deps/libgit2.version
@@ -1,3 +1,4 @@
+# -*- makefile -*-
 ## jll artifact
 LIBGIT2_JLL_NAME := LibGit2
 
@@ -9,4 +10,4 @@ LIBGIT2_SHA1=465bbf88ea939a965fbcbade72870c61f815e457
 # Specify the version of the Mozilla CA Certificate Store to obtain.
 # The versions of cacert.pem are identified by the date (YYYY-MM-DD) of their changes.
 # See https://curl.haxx.se/docs/caextract.html for more details.
-MOZILLA_CACERT_VERSION := 2022-02-01
+MOZILLA_CACERT_VERSION := 2022-10-11
diff --git a/deps/unwind.mk b/deps/unwind.mk
index 58a6edcf728d8..76593df1e5ef0 100644
--- a/deps/unwind.mk
+++ b/deps/unwind.mk
@@ -46,10 +46,13 @@ $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-non-empty-structs.patch-applied: $
 	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f -u -l < $(SRCDIR)/patches/libunwind-non-empty-structs.patch
 	echo 1 > $@
 
+# note minidebuginfo requires liblzma, which we do not have a source build for
+# (it will be enabled in BinaryBuilder-based downloads however)
+# since https://github.com/JuliaPackaging/Yggdrasil/commit/0149e021be9badcb331007c62442a4f554f3003c
 $(BUILDDIR)/libunwind-$(UNWIND_VER)/build-configured: $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-non-empty-structs.patch-applied
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
-	$(dir $<)/configure $(CONFIGURE_COMMON) CPPFLAGS="$(CPPFLAGS) $(LIBUNWIND_CPPFLAGS)" CFLAGS="$(CFLAGS) $(LIBUNWIND_CFLAGS)" --enable-shared --disable-minidebuginfo --disable-tests --enable-zlibdebuginfo
+	$(dir $<)/configure $(CONFIGURE_COMMON) CPPFLAGS="$(CPPFLAGS) $(LIBUNWIND_CPPFLAGS)" CFLAGS="$(CFLAGS) $(LIBUNWIND_CFLAGS)" --enable-shared --disable-minidebuginfo --disable-tests --enable-zlibdebuginfo --disable-conservative-checks
 	echo 1 > $@
 
 $(BUILDDIR)/libunwind-$(UNWIND_VER)/build-compiled: $(BUILDDIR)/libunwind-$(UNWIND_VER)/build-configured
diff --git a/deps/zlib.version b/deps/zlib.version
index c6a295882a7ce..89a304c49b6dc 100644
--- a/deps/zlib.version
+++ b/deps/zlib.version
@@ -1,7 +1,8 @@
+# -*- makefile -*-
 ## jll artifact
 ZLIB_JLL_NAME := Zlib
 
 ## source build
-ZLIB_VER := 1.2.12
-ZLIB_BRANCH=v1.2.12
-ZLIB_SHA1=21767c654d31d2dccdde4330529775c6c5fd5389
+ZLIB_VER := 1.2.13
+ZLIB_BRANCH=v1.2.13
+ZLIB_SHA1=04f42ceca40f73e2978b50e93806c2a18c1281fc
diff --git a/doc/make.jl b/doc/make.jl
index 61adf2ec603fa..75e3598ced6f7 100644
--- a/doc/make.jl
+++ b/doc/make.jl
@@ -152,6 +152,7 @@ DevDocs = [
         "devdocs/ssair.md",
         "devdocs/EscapeAnalysis.md",
         "devdocs/gc-sa.md",
+        "devdocs/gc.md",
     ],
     "Developing/debugging Julia's C code" => [
         "devdocs/backtraces.md",
diff --git a/doc/src/base/multi-threading.md b/doc/src/base/multi-threading.md
index 293857c1c6c65..4932aef4cc938 100644
--- a/doc/src/base/multi-threading.md
+++ b/doc/src/base/multi-threading.md
@@ -5,9 +5,11 @@ Base.Threads.@threads
 Base.Threads.foreach
 Base.Threads.@spawn
 Base.Threads.threadid
+Base.Threads.maxthreadid
 Base.Threads.nthreads
 Base.Threads.threadpool
 Base.Threads.nthreadpools
+Base.Threads.threadpoolsize
 ```
 
 See also [Multi-Threading](@ref man-multithreading).
diff --git a/doc/src/devdocs/functions.md b/doc/src/devdocs/functions.md
index 13f863cd26d81..283f63b2d0dce 100644
--- a/doc/src/devdocs/functions.md
+++ b/doc/src/devdocs/functions.md
@@ -48,7 +48,7 @@ jl_value_t *jl_call(jl_function_t *f, jl_value_t **args, int32_t nargs);
 
 Given the above dispatch process, conceptually all that is needed to add a new method is (1) a
 tuple type, and (2) code for the body of the method. `jl_method_def` implements this operation.
-`jl_first_argument_datatype` is called to extract the relevant method table from what would be
+`jl_method_table_for` is called to extract the relevant method table from what would be
 the type of the first argument. This is much more complicated than the corresponding procedure
 during dispatch, since the argument tuple type might be abstract. For example, we can define:
 
@@ -141,9 +141,9 @@ but works reasonably well.
 
 ## Keyword arguments
 
-Keyword arguments work by associating a special, hidden function object with each method table
-that has definitions with keyword arguments. This function is called the "keyword argument sorter"
-or "keyword sorter", or "kwsorter", and is stored in the `kwsorter` field of `MethodTable` objects.
+Keyword arguments work by adding methods to the kwcall function. This function
+is usually the "keyword argument sorter" or "keyword sorter", which then calls
+the inner body of the function (defined anonymously).
 Every definition in the kwsorter function has the same arguments as some definition in the normal
 method table, except with a single `NamedTuple` argument prepended, which gives
 the names and values of passed keyword arguments. The kwsorter's job is to move keyword arguments
@@ -220,10 +220,10 @@ circle((0,0), 1.0, color = red; other...)
 is lowered to:
 
 ```julia
-kwfunc(circle)(merge((color = red,), other), circle, (0,0), 1.0)
+kwcall(merge((color = red,), other), circle, (0,0), 1.0)
 ```
 
- `kwfunc` (also in`Core`) fetches the kwsorter for the called function.
+ `kwcall` (also in`Core`) denotes a kwcall signature and dispatch.
 The keyword splatting operation (written as `other...`) calls the named tuple `merge` function.
 This function further unpacks each *element* of `other`, expecting each one to contain two values
 (a symbol and a value).
diff --git a/doc/src/devdocs/gc.md b/doc/src/devdocs/gc.md
new file mode 100644
index 0000000000000..2ebd3412c8010
--- /dev/null
+++ b/doc/src/devdocs/gc.md
@@ -0,0 +1,71 @@
+# Garbage Collection in Julia
+
+## Introduction
+
+Julia has a generational non-moving mark-sweep garbage collector.
+Native objects are precisely scanned and foreign ones are conservatively marked.
+
+## Memory layout of objects and GC bits
+
+An opaque tag is stored in the front of GC managed objects, and its lowest two bits are
+used for garbage collection.  The lowest bit is set for marked objects and the second
+lowest bit stores age information (e.g. it's only set for old objects).
+
+Objects are aligned by a multiple of 4 bytes to ensure this pointer tagging is legal.
+
+## Pool allocation
+
+Sufficiently small objects (up to 2032 bytes) are pool-allocated.
+
+A three-level tree (analogous to a three-level page-table) is used to keep metadata
+(e.g. whether a page has been allocated, whether contains marked objects, number of free objects etc.)
+about address ranges spanning at least one page.
+Sweeping a pool allocated object consists of inserting it back into the free list
+maintained by its pool.
+
+## Malloc'd arrays and big objects
+
+Two lists are used to keep track of the remaining allocated objects:
+one for sufficiently large malloc'd arrays (`mallocarray_t`) and one for
+sufficiently large objects (`bigval_t`).
+
+Sweeping these objects consists of unlinking them from their list and calling `free` on the
+corresponding address.
+
+## Generational and remembered sets
+
+Field writes into old objects trigger a write barrier if the written field
+points to a young object and if a write barrier has not been triggered on the old object yet.
+In this case, the old object being written to is enqueued into a remembered set, and
+its mark bit is set to indicate that a write barrier has already been triggered on it.
+
+There is no explicit flag to determine whether a marking pass will scan the
+entire heap or only through young objects and remebered set.
+The mark bits of the objects themselves are used to determine whether a full mark happens.
+The mark-sweep algorithm follows this sequence of steps:
+
+- Objects in the remembered set have their GC mark bits reset
+(these are set once write barrier is triggered, as described above) and are enqueued.
+
+- Roots (e.g. thread locals) are enqueued.
+
+- Object graph is traversed and mark bits are set.
+
+- Object pools, malloc'd arrays and big objects are sweeped. On a full sweep,
+the mark bits of all marked objects are reset. On a generational sweep,
+only the mark bits of marked young objects are reset.
+
+- Mark bits of objects in the remembered set are set,
+so we don't trigger the write barrier on them again.
+
+After these stages, old objects will be left with their mark bits set,
+so that references from them are not explored in a subsequent generational collection.
+This scheme eliminates the need of explicitly keeping a flag to indicate a full mark
+(though a flag to indicate a full sweep is necessary).
+
+## Heuristics
+
+GC heuristics tune the GC by changing the size of the allocation interval between garbage collections.
+If a GC was unproductive, then we increase the size of the allocation interval to allow objects more time to die.
+If a GC returns a lot of space we can shrink the interval. The goal is to find a steady state where we are
+allocating just about the same amount as we are collecting.
diff --git a/doc/src/devdocs/locks.md b/doc/src/devdocs/locks.md
index 6cc0c1270ca85..f2ddc26fb954d 100644
--- a/doc/src/devdocs/locks.md
+++ b/doc/src/devdocs/locks.md
@@ -127,7 +127,7 @@ These data structures each need locks due to being shared mutable global state.
 list for the above lock priority list. This list does not include level 1 leaf resources due to
 their simplicity.
 
-MethodTable modifications (def, cache, kwsorter type) : MethodTable->writelock
+MethodTable modifications (def, cache) : MethodTable->writelock
 
 Type declarations : toplevel lock
 
diff --git a/doc/src/devdocs/types.md b/doc/src/devdocs/types.md
index 003574f99c182..c3afc26600c65 100644
--- a/doc/src/devdocs/types.md
+++ b/doc/src/devdocs/types.md
@@ -198,7 +198,6 @@ TypeName
     defs: Nothing nothing
     cache: Nothing nothing
     max_args: Int64 0
-    kwsorter: #undef
     module: Module Core
     : Int64 0
     : Int64 0
diff --git a/doc/src/manual/arrays.md b/doc/src/manual/arrays.md
index 3126f1c2a3270..17204b73a0240 100644
--- a/doc/src/manual/arrays.md
+++ b/doc/src/manual/arrays.md
@@ -1,4 +1,4 @@
-# [Multi-dimensional Arrays](@id man-multi-dim-arrays)
+# [Single- and multi-dimensional Arrays](@id man-multi-dim-arrays)
 
 Julia, like most technical computing languages, provides a first-class array implementation. Most
 technical computing languages pay a lot of attention to their array implementation at the expense
diff --git a/doc/src/manual/control-flow.md b/doc/src/manual/control-flow.md
index 92c927f9aa2da..18a84957fe625 100644
--- a/doc/src/manual/control-flow.md
+++ b/doc/src/manual/control-flow.md
@@ -139,7 +139,7 @@ julia> test(1,2)
 x is less than y.
 
 julia> test(2,1)
-ERROR: UndefVarError: relation not defined
+ERROR: UndefVarError: `relation` not defined
 Stacktrace:
  [1] test(::Int64, ::Int64) at ./none:7
 ```
@@ -433,7 +433,7 @@ julia> for j = 1:3
 3
 
 julia> j
-ERROR: UndefVarError: j not defined
+ERROR: UndefVarError: `j` not defined
 ```
 
 ```jldoctest
@@ -669,7 +669,7 @@ Additionally, some exception types take one or more arguments that are used for
 
 ```jldoctest
 julia> throw(UndefVarError(:x))
-ERROR: UndefVarError: x not defined
+ERROR: UndefVarError: `x` not defined
 ```
 
 This mechanism can be implemented easily by custom exception types following the way [`UndefVarError`](@ref)
diff --git a/doc/src/manual/distributed-computing.md b/doc/src/manual/distributed-computing.md
index 544dace1a99ec..b8e63a73e38ff 100644
--- a/doc/src/manual/distributed-computing.md
+++ b/doc/src/manual/distributed-computing.md
@@ -209,7 +209,7 @@ MyType(7)
 
 julia> fetch(@spawnat 2 MyType(7))
 ERROR: On worker 2:
-UndefVarError: MyType not defined
+UndefVarError: `MyType` not defined
 ⋮
 
 julia> fetch(@spawnat 2 DummyModule.MyType(7))
diff --git a/doc/src/manual/documentation.md b/doc/src/manual/documentation.md
index 99d46e364b3eb..68bd114100031 100644
--- a/doc/src/manual/documentation.md
+++ b/doc/src/manual/documentation.md
@@ -17,7 +17,7 @@ environments provide a way to access documentation directly:
   You can also use the Julia panel in the sidebar to search for documentation.
 - In [Pluto](https://github.com/fonsp/Pluto.jl), open the "Live Docs" panel on the bottom right.
 - In [Juno](https://junolab.org) using `Ctrl-J, Ctrl-D` will show the documentation for the object
-under the cursor.
+  under the cursor.
 
 ## Writing Documentation
 
diff --git a/doc/src/manual/embedding.md b/doc/src/manual/embedding.md
index 26904d9ccffcd..d384880728e45 100644
--- a/doc/src/manual/embedding.md
+++ b/doc/src/manual/embedding.md
@@ -604,7 +604,7 @@ The second condition above implies that you can not safely call `jl_...()` funct
 void *func(void*)
 {
     // Wrong, jl_eval_string() called from thread that was not started by Julia
-    jl_eval_string("println(Threads.nthreads())");
+    jl_eval_string("println(Threads.threadid())");
     return NULL;
 }
 
@@ -630,7 +630,7 @@ void *func(void*)
     // Okay, all jl_...() calls from the same thread,
     // even though it is not the main application thread
     jl_init();
-    jl_eval_string("println(Threads.nthreads())");
+    jl_eval_string("println(Threads.threadid())");
     jl_atexit_hook(0);
     return NULL;
 }
@@ -670,7 +670,7 @@ int main()
     jl_eval_string("func(i) = ccall(:c_func, Float64, (Int32,), i)");
 
     // Call func() multiple times, using multiple threads to do so
-    jl_eval_string("println(Threads.nthreads())");
+    jl_eval_string("println(Threads.threadpoolsize())");
     jl_eval_string("use(i) = println(\"[J $(Threads.threadid())] i = $(i) -> $(func(i))\")");
     jl_eval_string("Threads.@threads for i in 1:5 use(i) end");
 
diff --git a/doc/src/manual/faq.md b/doc/src/manual/faq.md
index fc6a459cb34bb..ef3e77b14f1db 100644
--- a/doc/src/manual/faq.md
+++ b/doc/src/manual/faq.md
@@ -154,7 +154,7 @@ while x < 10
 end
 ```
 and notice that it works fine in an interactive environment (like the Julia REPL),
-but gives `UndefVarError: x not defined` when you try to run it in script or other
+but gives ```UndefVarError: `x` not defined``` when you try to run it in script or other
 file.   What is going on is that Julia generally requires you to **be explicit about assigning to global variables in a local scope**.
 
 Here, `x` is a global variable, `while` defines a [local scope](@ref scope-of-variables), and `x += 1` is
@@ -705,7 +705,7 @@ julia> module Foo
 
 julia> Foo.foo()
 ERROR: On worker 2:
-UndefVarError: Foo not defined
+UndefVarError: `Foo` not defined
 Stacktrace:
 [...]
 ```
@@ -726,7 +726,7 @@ julia> @everywhere module Foo
 
 julia> Foo.foo()
 ERROR: On worker 2:
-UndefVarError: gvar not defined
+UndefVarError: `gvar` not defined
 Stacktrace:
 [...]
 ```
@@ -762,7 +762,7 @@ bar (generic function with 1 method)
 
 julia> remotecall_fetch(bar, 2)
 ERROR: On worker 2:
-UndefVarError: #bar not defined
+UndefVarError: `#bar` not defined
 [...]
 
 julia> anon_bar  = ()->1
diff --git a/doc/src/manual/integers-and-floating-point-numbers.md b/doc/src/manual/integers-and-floating-point-numbers.md
index 2d073b83aec0a..173ca7847616e 100644
--- a/doc/src/manual/integers-and-floating-point-numbers.md
+++ b/doc/src/manual/integers-and-floating-point-numbers.md
@@ -185,7 +185,9 @@ determining storage size of a literal. So `0x01` is a `UInt8` while `0x0001` is
 
 That allows the user to control the size.
 
-Values which cannot be stored in `UInt128` cannot be written as such literals.
+Unsigned literals (starting with `0x`) that encode integers too large to be represented as
+`UInt128` values will construct `BigInt` values instead. This is not an unsigned type but
+it is the only built-in type big enough to represent such large integer values.
 
 Binary, octal, and hexadecimal literals may be signed by a `-` immediately preceding the
 unsigned literal. They produce an unsigned integer of the same size as the unsigned literal
diff --git a/doc/src/manual/metaprogramming.md b/doc/src/manual/metaprogramming.md
index 8308914f34f79..75bf2a678d7ad 100644
--- a/doc/src/manual/metaprogramming.md
+++ b/doc/src/manual/metaprogramming.md
@@ -364,7 +364,7 @@ julia> ex = :(a + b)
 :(a + b)
 
 julia> eval(ex)
-ERROR: UndefVarError: b not defined
+ERROR: UndefVarError: `b` not defined
 [...]
 
 julia> a = 1; b = 2;
@@ -382,7 +382,7 @@ julia> ex = :(x = 1)
 :(x = 1)
 
 julia> x
-ERROR: UndefVarError: x not defined
+ERROR: UndefVarError: `x` not defined
 
 julia> eval(ex)
 1
diff --git a/doc/src/manual/modules.md b/doc/src/manual/modules.md
index 9a250fdf716a8..f0a9a5110ded4 100644
--- a/doc/src/manual/modules.md
+++ b/doc/src/manual/modules.md
@@ -281,7 +281,7 @@ julia> using .A, .B
 
 julia> f
 WARNING: both B and A export "f"; uses of it in module Main must be qualified
-ERROR: UndefVarError: f not defined
+ERROR: UndefVarError: `f` not defined
 ```
 
 Here, Julia cannot decide which `f` you are referring to, so you have to make a choice. The following solutions are commonly used:
@@ -397,7 +397,7 @@ x = 0
 
 module Sub
 using ..TestPackage
-z = y # ERROR: UndefVarError: y not defined
+z = y # ERROR: UndefVarError: `y` not defined
 end
 
 y = 1
@@ -413,7 +413,7 @@ For similar reasons, you cannot use a cyclic ordering:
 module A
 
 module B
-using ..C # ERROR: UndefVarError: C not defined
+using ..C # ERROR: UndefVarError: `C` not defined
 end
 
 module C
diff --git a/doc/src/manual/multi-threading.md b/doc/src/manual/multi-threading.md
index 9ebba4fd7f676..b012de27ac81f 100644
--- a/doc/src/manual/multi-threading.md
+++ b/doc/src/manual/multi-threading.md
@@ -267,7 +267,7 @@ avoid the race:
 ```julia-repl
 julia> using Base.Threads
 
-julia> nthreads()
+julia> Threads.nthreads()
 4
 
 julia> acc = Ref(0)
diff --git a/doc/src/manual/noteworthy-differences.md b/doc/src/manual/noteworthy-differences.md
index dc3093ad8db6b..81a36e2e60743 100644
--- a/doc/src/manual/noteworthy-differences.md
+++ b/doc/src/manual/noteworthy-differences.md
@@ -351,6 +351,97 @@ For users coming to Julia from R, these are some noteworthy differences:
     it's more general than that since methods are dispatched on every argument type, not only `this`,
     using the most-specific-declaration rule).
 
+### Julia &hArr; C/C++: Namespaces
+  * C/C++ `namespace`s correspond roughly to Julia `module`s.
+  * There are no private globals or fields in Julia.  Everything is publicly accessible
+    through fully qualified paths (or relative paths, if desired).
+  * `using MyNamespace::myfun` (C++) corresponds roughly to `import MyModule: myfun` (Julia).
+  * `using namespace MyNamespace` (C++) corresponds roughly to `using MyModule` (Julia)
+    * In Julia, only `export`ed symbols are made available to the calling module.
+    * In C++, only elements found in the included (public) header files are made available.
+  * Caveat: `import`/`using` keywords (Julia) also *load* modules (see below).
+  * Caveat: `import`/`using` (Julia) works only at the global scope level (`module`s)
+    * In C++, `using namespace X` works within arbitrary scopes (ex: function scope).
+
+### Julia &hArr; C/C++: Module loading
+  * When you think of a C/C++ "**library**", you are likely looking for a Julia "**package**".
+    * Caveat: C/C++ libraries often house multiple "software modules" whereas Julia
+      "packages" typically house one.
+    * Reminder: Julia `module`s are global scopes (not necessarily "software modules").
+  * **Instead of build/`make` scripts**, Julia uses "Project Environments" (sometimes called
+    either "Project" or "Environment").
+    * Build scripts are only needed for more complex applications
+      (like those needing to compile or download C/C++ executables).
+    * To develop application or project in Julia, you can initialize its root directory
+      as a "Project Environment", and house application-specific code/packages there.
+      This provides good control over project dependencies, and future reproducibility.
+    * Available packages are added to a "Project Environment" with the `Pkg.add()` function or Pkg REPL mode.
+      (This does not **load** said package, however).
+    * The list of available packages (direct dependencies) for a "Project Environment" are
+      saved in its `Project.toml` file.
+    * The *full* dependency information for a "Project Environment" is auto-generated & saved
+      in its `Manifest.toml` file by `Pkg.resolve()`.
+  * Packages ("software modules") available to the "Project Environment" are loaded with
+    `import` or `using`.
+    * In C/C++, you `#include <moduleheader>` to get object/function delarations, and link in
+      libraries when you build the executable.
+    * In Julia, calling using/import again just brings the existing module into scope, but does not load it again
+      (similar to adding the non-standard `#pragma once` to C/C++).
+  * **Directory-based package repositories** (Julia) can be made available by adding repository
+    paths to the `Base.LOAD_PATH` array.
+    * Packages from directory-based repositories do not require the `Pkg.add()` tool prior to
+      being loaded with `import` or `using`. They are simply available to the project.
+    * Directory-based package repositories are the **quickest solution** to developping local
+      libraries of "software modules".
+
+### Julia &hArr; C/C++: Assembling modules
+  * In C/C++, `.c`/`.cpp` files are compiled & added to a library with build/`make` scripts.
+    * In Julia, `import [PkgName]`/`using [PkgName]` statements load `[PkgName].jl` located
+      in a package's `[PkgName]/src/` subdirectory.
+    * In turn, `[PkgName].jl` typically loads associated source files with calls to
+      `include "[someotherfile].jl"`.
+  * `include "./path/to/somefile.jl"` (Julia) is very similar to
+    `#include "./path/to/somefile.jl"` (C/C++).
+    * However `include "..."` (Julia) is not used to include header files (not required).
+    * **Do not use** `include "..."` (Julia) to load code from other "software modules"
+      (use `import`/`using` instead).
+    * `include "path/to/some/module.jl"` (Julia) would instantiate multiple versions of the
+      same code in different modules (creating *distinct* types (etc.) with the *same* names).
+    * `include "somefile.jl"` is typically used to assemble multiple files *within the same
+      Julia package* ("software module"). It is therefore relatively straightforward to ensure
+      file are `include`d only once (No `#ifdef` confusion).
+
+### Julia &hArr; C/C++: Module interface
+  * C++ exposes interfaces using "public" `.h`/`.hpp` files whereas Julia `module`s `export`
+    symbols that are intended for their users.
+    * Often, Julia `module`s simply add functionality by generating new "methods" to existing
+      functions (ex: `Base.push!`).
+    * Developers of Julia packages therefore cannot rely on header files for interface
+      documentation.
+    * Interfaces for Julia packages are typically described using docstrings, README.md,
+      static web pages, ...
+  * Some developers choose not to `export` all symbols required to use their package/module.
+    * Users might be expected to access these components by qualifying functions/structs/...
+      with the package/module name (ex: `MyModule.run_this_task(...)`).
+
+### Julia &hArr; C/C++: Quick reference
+
+| Software Concept   | Julia | C/C++ |
+| :---               | :---  | :---  |
+| unnamed scope      | `begin` ... `end`        | `{` ... `}`                                  |
+| function scope     | `function x()` ... `end` | `int x() {` ... `}`                          |
+| global scope       | `module MyMod` ... `end` | `namespace MyNS {` ... `}`                   |
+| software module    | A Julia "package"        | `.h`/`.hpp` files<br>+compiled `somelib.a`   |
+| assembling<br>software modules | `SomePkg.jl`: ...<br>`import("subfile1.jl")`<br>`import("subfile2.jl")`<br>... | `$(AR) *.o` &rArr; `somelib.a` |
+| import<br>software module | `import SomePkg`  | `#include <somelib>`<br>+link in `somelib.a` |
+| module library     | `LOAD_PATH[]`, \*Git repository,<br>\*\*custom package registry  | more `.h`/`.hpp` files<br>+bigger compiled `somebiglib.a` |
+
+\* The Julia package manager supports registering multiple packages from a single Git repository.<br>
+\* This allows users to house a library of related packages in a single repository.<br>
+\*\* Julia registries are primarily designed to provide versionning \& distribution of packages.<br>
+\*\* Custom package registries can be used to create a type of module library.
+
+
 ## Noteworthy differences from Common Lisp
 
 - Julia uses 1-based indexing for arrays by default, and it can also handle arbitrary [index offsets](@ref man-custom-indices).
diff --git a/doc/src/manual/performance-tips.md b/doc/src/manual/performance-tips.md
index 5a4e2d3c32e5a..f2ad9884e9548 100644
--- a/doc/src/manual/performance-tips.md
+++ b/doc/src/manual/performance-tips.md
@@ -90,7 +90,14 @@ On the first call (`@time sum_global()`) the function gets compiled. (If you've
 in this session, it will also compile functions needed for timing.)  You should not take the results
 of this run seriously. For the second run, note that in addition to reporting the time, it also
 indicated that a significant amount of memory was allocated. We are here just computing a sum over all elements in
-a vector of 64-bit floats so there should be no need to allocate memory (at least not on the heap which is what `@time` reports).
+a vector of 64-bit floats so there should be no need to allocate (heap) memory.
+
+We should clarify that what `@time` reports is specifically *heap* allocations, which are typically needed for either
+mutable objects or for creating/growing variable-sized containers (such as `Array` or `Dict`, strings, or "type-unstable"
+objects whose type is only known at runtime).  Allocating (or deallocating) such blocks of memory may require an expensive
+system call (e.g. via `malloc` in C), and they must be tracked for garbage collection.  In contrast, immutable values like
+numbers (except bignums), tuples, and immutable `struct`s can be stored much more cheaply, e.g. in stack or CPU-register
+memory, so one doesn’t typically worry about the performance cost of "allocating" them.
 
 Unexpected memory allocation is almost always a sign of some problem with your code, usually a
 problem with type-stability or creating many small temporary arrays.
@@ -98,8 +105,8 @@ Consequently, in addition to the allocation itself, it's very likely
 that the code generated for your function is far from optimal. Take such indications seriously
 and follow the advice below.
 
-If we instead pass `x` as an argument to the function it no longer allocates memory
-(the allocation reported below is due to running the `@time` macro in global scope)
+In this particular case, the memory allocation is due to the usage of a type-unstable global variable `x`, so if we instead pass `x` as an argument to the function it no longer allocates memory
+(the remaining allocation reported below is due to running the `@time` macro in global scope)
 and is significantly faster after the first call:
 
 ```jldoctest sumarg; setup = :(using Random; Random.seed!(1234)), filter = r"[0-9\.]+ seconds \(.*?\)"
diff --git a/doc/src/manual/profile.md b/doc/src/manual/profile.md
index 3444d444d259f..045bbab6f34d2 100644
--- a/doc/src/manual/profile.md
+++ b/doc/src/manual/profile.md
@@ -346,7 +346,7 @@ allocation while it is running. It can be invoked with
 This information about the allocations is returned as an array of `Alloc`
 objects, wrapped in an `AllocResults` object. The best way to visualize
 these is currently with the [PProf.jl](https://github.com/JuliaPerf/PProf.jl)
-library, which can visualize the call stacks which are making the most
+package, which can visualize the call stacks which are making the most
 allocations.
 
 The allocation profiler does have significant overhead, so a `sample_rate`
@@ -362,7 +362,7 @@ Passing `sample_rate=1.0` will make it record everything (which is slow);
     `Profile.Allocs.UnknownType`.
 
     You can read more about the missing types and the plan to improve this, here:
-    https://github.com/JuliaLang/julia/issues/43688.
+    <https://github.com/JuliaLang/julia/issues/43688>.
 
 ## External Profiling
 
diff --git a/doc/src/manual/types.md b/doc/src/manual/types.md
index 8a2b5ab1d4a5b..594be0b333f74 100644
--- a/doc/src/manual/types.md
+++ b/doc/src/manual/types.md
@@ -247,8 +247,8 @@ default method by many combinations of concrete types. Thanks to multiple dispat
 has full control over whether the default or more specific method is used.
 
 An important point to note is that there is no loss in performance if the programmer relies on
-a function whose arguments are abstract types, because it is recompiled for each tuple of argument
-concrete types with which it is invoked. (There may be a performance issue, however, in the case
+a function whose arguments are abstract types, because it is recompiled for each tuple of concrete
+argument types with which it is invoked. (There may be a performance issue, however, in the case
 of function arguments that are containers of abstract types; see [Performance Tips](@ref man-performance-abstract-container).)
 
 ## Primitive Types
diff --git a/doc/src/manual/variables-and-scoping.md b/doc/src/manual/variables-and-scoping.md
index ca6ebc2157b71..ebb4559b3e854 100644
--- a/doc/src/manual/variables-and-scoping.md
+++ b/doc/src/manual/variables-and-scoping.md
@@ -90,7 +90,7 @@ julia> module B
 julia> module D
            b = a # errors as D's global scope is separate from A's
        end;
-ERROR: UndefVarError: a not defined
+ERROR: UndefVarError: `a` not defined
 ```
 
 If a top-level expression contains a variable declaration with keyword `local`,
@@ -187,7 +187,7 @@ julia> greet()
 hello
 
 julia> x # global
-ERROR: UndefVarError: x not defined
+ERROR: UndefVarError: `x` not defined
 ```
 
 Inside of the `greet` function, the assignment `x = "hello"` causes `x` to be a new local variable
@@ -256,7 +256,7 @@ julia> sum_to(10)
 55
 
 julia> s # global
-ERROR: UndefVarError: s not defined
+ERROR: UndefVarError: `s` not defined
 ```
 
 Since `s` is local to the function `sum_to`, calling the function has no effect on the global
@@ -343,7 +343,7 @@ hello
 hello
 
 julia> x
-ERROR: UndefVarError: x not defined
+ERROR: UndefVarError: `x` not defined
 ```
 
 Since the global `x` is not defined when the `for` loop is evaluated, the first clause of the soft
@@ -408,7 +408,7 @@ julia> code = """
 julia> include_string(Main, code)
 ┌ Warning: Assignment to `s` in soft scope is ambiguous because a global variable by the same name exists: `s` will be treated as a new local. Disambiguate by using `local s` to suppress this warning or `global s` to assign to the existing global variable.
 └ @ string:4
-ERROR: LoadError: UndefVarError: s not defined
+ERROR: LoadError: UndefVarError: `s` not defined
 ```
 
 Here we use [`include_string`](@ref), to evaluate `code` as though it were the contents of a file.
@@ -559,7 +559,7 @@ julia> let x = 1, z
            println("z: $z") # errors as z has not been assigned yet but is local
        end
 x: 1, y: -1
-ERROR: UndefVarError: z not defined
+ERROR: UndefVarError: `z` not defined
 ```
 
 The assignments are evaluated in order, with each right-hand side evaluated in the scope before
diff --git a/src/Makefile b/src/Makefile
index 8b996f28aeee0..886a0a546ff3a 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -45,7 +45,7 @@ SRCS := \
 	dlload sys init task array dump staticdata toplevel jl_uv datatype \
 	simplevector runtime_intrinsics precompile jloptions \
 	threading partr stackwalk gc gc-debug gc-pages gc-stacks gc-alloc-profiler method \
-	jlapi signal-handling safepoint timing subtype rtutils \
+	jlapi signal-handling safepoint timing subtype rtutils gc-heap-snapshot \
 	crc32c APInt-C processor ircode opaque_closure codegen-stubs coverage runtime_ccall
 
 RT_LLVMLINK :=
@@ -153,12 +153,12 @@ OSLIBS += $(SRCDIR)/mach_dyld_atfork.tbd
 endif
 
 COMMON_LIBPATHS := -L$(build_libdir) -L$(build_shlibdir)
-RT_LIBS := $(LIBUV) $(LIBUTF8PROC) $(NO_WHOLE_ARCHIVE) $(LIBUNWIND) $(RT_LLVMLINK) $(OSLIBS)
-CG_LIBS := $(NO_WHOLE_ARCHIVE) $(LIBUV) $(LIBUNWIND) $(CG_LLVMLINK) $(OSLIBS)
+RT_LIBS := $(WHOLE_ARCHIVE) $(LIBUV) $(WHOLE_ARCHIVE) $(LIBUTF8PROC) $(NO_WHOLE_ARCHIVE) $(LIBUNWIND) $(RT_LLVMLINK) $(OSLIBS)
+CG_LIBS := $(LIBUNWIND) $(CG_LLVMLINK) $(OSLIBS)
 RT_DEBUG_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(BUILDDIR)/flisp/libflisp-debug.a $(WHOLE_ARCHIVE) $(BUILDDIR)/support/libsupport-debug.a -ljulia-debug $(RT_LIBS)
-CG_DEBUG_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(WHOLE_ARCHIVE) $(CG_LIBS) -ljulia-debug -ljulia-internal-debug
+CG_DEBUG_LIBS := $(COMMON_LIBPATHS) $(CG_LIBS) -ljulia-debug -ljulia-internal-debug
 RT_RELEASE_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(BUILDDIR)/flisp/libflisp.a $(WHOLE_ARCHIVE) $(BUILDDIR)/support/libsupport.a -ljulia $(RT_LIBS)
-CG_RELEASE_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(WHOLE_ARCHIVE) $(CG_LIBS) -ljulia -ljulia-internal
+CG_RELEASE_LIBS := $(COMMON_LIBPATHS) $(CG_LIBS) -ljulia -ljulia-internal
 
 OBJS := $(SRCS:%=$(BUILDDIR)/%.o)
 DOBJS := $(SRCS:%=$(BUILDDIR)/%.dbg.obj)
@@ -288,12 +288,15 @@ $(BUILDDIR)/ast.o $(BUILDDIR)/ast.dbg.obj: $(BUILDDIR)/julia_flisp.boot.inc $(SR
 $(BUILDDIR)/builtins.o $(BUILDDIR)/builtins.dbg.obj: $(SRCDIR)/iddict.c $(SRCDIR)/builtin_proto.h
 $(BUILDDIR)/codegen.o $(BUILDDIR)/codegen.dbg.obj: $(addprefix $(SRCDIR)/,\
 	intrinsics.cpp jitlayers.h debug-registry.h intrinsics.h codegen_shared.h cgutils.cpp ccall.cpp abi_*.cpp processor.h builtin_proto.h)
+$(BUILDDIR)/datatype.o $(BUILDDIR)/datatype.dbg.obj: $(SRCDIR)/support/htable.h $(SRCDIR)/support/htable.inc
 $(BUILDDIR)/debuginfo.o $(BUILDDIR)/debuginfo.dbg.obj: $(addprefix $(SRCDIR)/,debuginfo.h processor.h jitlayers.h debug-registry.h)
 $(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h $(SRCDIR)/processor.h
 $(BUILDDIR)/dump.o $(BUILDDIR)/dump.dbg.obj: $(addprefix $(SRCDIR)/,common_symbols1.inc common_symbols2.inc builtin_proto.h serialize.h)
 $(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc.h
 $(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc.h
-$(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-alloc-profiler.h
+$(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h
+$(BUILDDIR)/gc-heap-snapshot.o $(BUILDDIR)/gc-heap-snapshot.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h
+$(BUILDDIR)/gc-alloc-profiler.o $(BUILDDIR)/gc-alloc-profiler.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-alloc-profiler.h
 $(BUILDDIR)/init.o $(BUILDDIR)/init.dbg.obj: $(SRCDIR)/builtin_proto.h
 $(BUILDDIR)/interpreter.o $(BUILDDIR)/interpreter.dbg.obj: $(SRCDIR)/builtin_proto.h
 $(BUILDDIR)/jitlayers.o $(BUILDDIR)/jitlayers.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/codegen_shared.h $(SRCDIR)/debug-registry.h
diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp
index 98777ddd175a1..2714bc664eb57 100644
--- a/src/aotcompile.cpp
+++ b/src/aotcompile.cpp
@@ -590,7 +590,11 @@ void jl_dump_native_impl(void *native_code,
     // do the actual work
     auto add_output = [&] (Module &M, StringRef unopt_bc_Name, StringRef bc_Name, StringRef obj_Name, StringRef asm_Name) {
         preopt.run(M, empty.MAM);
-        if (bc_fname || obj_fname || asm_fname) optimizer.run(M);
+        if (bc_fname || obj_fname || asm_fname) {
+            assert(!verifyModule(M, &errs()));
+            optimizer.run(M);
+            assert(!verifyModule(M, &errs()));
+        }
 
         // We would like to emit an alias or an weakref alias to redirect these symbols
         // but LLVM doesn't let us emit a GlobalAlias to a declaration...
@@ -1031,6 +1035,7 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, siz
             // and will better match what's actually in sysimg.
             for (auto &global : output.globals)
                 global.second->setLinkage(GlobalValue::ExternalLinkage);
+            assert(!verifyModule(*m.getModuleUnlocked(), &errs()));
             if (optimize) {
 #ifndef JL_USE_NEW_PM
                 legacy::PassManager PM;
@@ -1042,6 +1047,7 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, siz
 #endif
                 //Safe b/c context lock is held by output
                 PM.run(*m.getModuleUnlocked());
+                assert(!verifyModule(*m.getModuleUnlocked(), &errs()));
             }
             const std::string *fname;
             if (decls.functionObject == "jl_fptr_args" || decls.functionObject == "jl_fptr_sparam")
diff --git a/src/builtin_proto.h b/src/builtin_proto.h
index f61f76c3966f8..64e3fbd1af366 100644
--- a/src/builtin_proto.h
+++ b/src/builtin_proto.h
@@ -62,12 +62,6 @@ DECLARE_BUILTIN(finalizer);
 DECLARE_BUILTIN(_compute_sparams);
 DECLARE_BUILTIN(_svec_ref);
 
-JL_CALLABLE(jl_f_invoke_kwsorter);
-#ifdef DEFINE_BUILTIN_GLOBALS
-JL_DLLEXPORT jl_fptr_args_t jl_f_invoke_kwsorter_addr = &jl_f_invoke_kwsorter;
-#else
-JL_DLLEXPORT extern jl_fptr_args_t jl_f_invoke_kwsorter_addr;
-#endif
 JL_CALLABLE(jl_f__structtype);
 JL_CALLABLE(jl_f__abstracttype);
 JL_CALLABLE(jl_f__primitivetype);
diff --git a/src/builtins.c b/src/builtins.c
index 595014e97ee50..323a42b91ca92 100644
--- a/src/builtins.c
+++ b/src/builtins.c
@@ -1359,50 +1359,6 @@ JL_CALLABLE(jl_f_invoke)
     return res;
 }
 
-JL_CALLABLE(jl_f_invoke_kwsorter)
-{
-    JL_NARGSV(invoke, 3);
-    jl_value_t *kwargs = args[0];
-    // args[1] is `invoke` itself
-    jl_value_t *func = args[2];
-    jl_value_t *argtypes = args[3];
-    jl_value_t *kws = jl_get_keyword_sorter(func);
-    JL_GC_PUSH1(&argtypes);
-    if (jl_is_tuple_type(argtypes)) {
-        // construct a tuple type for invoking a keyword sorter by putting the kw container type
-        // and the type of the function at the front.
-        size_t i, nt = jl_nparams(argtypes) + 2;
-        if (nt < jl_page_size/sizeof(jl_value_t*)) {
-            jl_value_t **types = (jl_value_t**)alloca(nt*sizeof(jl_value_t*));
-            types[0] = (jl_value_t*)jl_namedtuple_type;
-            types[1] = jl_is_type(func) ? (jl_value_t*)jl_wrap_Type(func) : jl_typeof(func);
-            for (i = 2; i < nt; i++)
-                types[i] = jl_tparam(argtypes, i - 2);
-            argtypes = (jl_value_t*)jl_apply_tuple_type_v(types, nt);
-        }
-        else {
-            jl_svec_t *types = jl_alloc_svec_uninit(nt);
-            JL_GC_PUSH1(&types);
-            jl_svecset(types, 0, jl_namedtuple_type);
-            jl_svecset(types, 1, jl_is_type(func) ? (jl_value_t*)jl_wrap_Type(func) : jl_typeof(func));
-            for (i = 2; i < nt; i++)
-                jl_svecset(types, i, jl_tparam(argtypes, i - 2));
-            argtypes = (jl_value_t*)jl_apply_tuple_type(types);
-            JL_GC_POP();
-        }
-    }
-    else {
-        // invoke will throw an error
-    }
-    args[0] = kws;
-    args[1] = argtypes;
-    args[2] = kwargs;
-    args[3] = func;
-    jl_value_t *res = jl_f_invoke(NULL, args, nargs);
-    JL_GC_POP();
-    return res;
-}
-
 // Expr constructor for internal use ------------------------------------------
 
 jl_expr_t *jl_exprn(jl_sym_t *head, size_t n)
@@ -2011,11 +1967,6 @@ void jl_init_primitives(void) JL_GC_DISABLED
     // method table utils
     jl_builtin_applicable = add_builtin_func("applicable", jl_f_applicable);
     jl_builtin_invoke = add_builtin_func("invoke", jl_f_invoke);
-    jl_typename_t *itn = ((jl_datatype_t*)jl_typeof(jl_builtin_invoke))->name;
-    jl_value_t *ikws = jl_new_generic_function_with_supertype(itn->name, jl_core_module, jl_builtin_type);
-    itn->mt->kwsorter = ikws;
-    jl_gc_wb(itn->mt, ikws);
-    jl_mk_builtin_func((jl_datatype_t*)jl_typeof(ikws), jl_symbol_name(jl_gf_name(ikws)), jl_f_invoke_kwsorter);
 
     // internal functions
     jl_builtin_apply_type = add_builtin_func("apply_type", jl_f_apply_type);
diff --git a/src/ccall.cpp b/src/ccall.cpp
index fb5799b081537..b2e66a1345f96 100644
--- a/src/ccall.cpp
+++ b/src/ccall.cpp
@@ -1552,7 +1552,8 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         assert(lrt == getVoidTy(ctx.builder.getContext()));
         assert(!isVa && !llvmcall && nccallargs == 0);
         JL_GC_POP();
-        emit_gc_safepoint(ctx);
+        ctx.builder.CreateCall(prepare_call(gcroot_flush_func));
+        emit_gc_safepoint(ctx.builder, get_current_ptls(ctx), ctx.tbaa().tbaa_const);
         return ghostValue(ctx, jl_nothing_type);
     }
     else if (is_libjulia_func("jl_get_ptls_states")) {
@@ -1655,7 +1656,8 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         ctx.builder.SetInsertPoint(checkBB);
         ctx.builder.CreateLoad(
                 getSizeTy(ctx.builder.getContext()),
-                ctx.builder.CreateConstInBoundsGEP1_32(getSizeTy(ctx.builder.getContext()), get_current_signal_page(ctx), -1),
+                ctx.builder.CreateConstInBoundsGEP1_32(getSizeTy(ctx.builder.getContext()),
+                    get_current_signal_page_from_ptls(ctx.builder, get_current_ptls(ctx), ctx.tbaa().tbaa_const), -1),
                 true);
         ctx.builder.CreateBr(contBB);
         ctx.f->getBasicBlockList().push_back(contBB);
diff --git a/src/cgutils.cpp b/src/cgutils.cpp
index c42e6f14473b3..56b020f2c72c2 100644
--- a/src/cgutils.cpp
+++ b/src/cgutils.cpp
@@ -8,7 +8,6 @@
 
 STATISTIC(EmittedPointerFromObjref, "Number of emitted pointer_from_objref calls");
 STATISTIC(EmittedPointerBitcast, "Number of emitted pointer bitcasts");
-STATISTIC(EmittedNthPtrAddr, "Number of emitted nth pointer address instructions");
 STATISTIC(EmittedTypeof, "Number of emitted typeof instructions");
 STATISTIC(EmittedErrors, "Number of emitted errors");
 STATISTIC(EmittedConditionalErrors, "Number of emitted conditional errors");
@@ -42,7 +41,6 @@ STATISTIC(EmittedCPointerChecks, "Number of C pointer checks emitted");
 STATISTIC(EmittedAllocObjs, "Number of object allocations emitted");
 STATISTIC(EmittedWriteBarriers, "Number of write barriers emitted");
 STATISTIC(EmittedNewStructs, "Number of new structs emitted");
-STATISTIC(EmittedSignalFences, "Number of signal fences emitted");
 STATISTIC(EmittedDeferSignal, "Number of deferred signals emitted");
 
 static Value *track_pjlvalue(jl_codectx_t &ctx, Value *V)
@@ -971,41 +969,20 @@ static void emit_memcpy(jl_codectx_t &ctx, Value *dst, MDNode *tbaa_dst, const j
     emit_memcpy_llvm(ctx, dst, tbaa_dst, data_pointer(ctx, src), src.tbaa, sz, align, is_volatile);
 }
 
-static Value *emit_nthptr_addr(jl_codectx_t &ctx, Value *v, ssize_t n, bool gctracked = true)
-{
-    ++EmittedNthPtrAddr;
-    return ctx.builder.CreateInBoundsGEP(
-            ctx.types().T_prjlvalue,
-            emit_bitcast(ctx, maybe_decay_tracked(ctx, v), ctx.types().T_pprjlvalue),
-            ConstantInt::get(getSizeTy(ctx.builder.getContext()), n));
-}
-
-static Value *emit_nthptr_addr(jl_codectx_t &ctx, Value *v, Value *idx)
+static LoadInst *emit_nthptr_recast(jl_codectx_t &ctx, Value *v, Value *idx, MDNode *tbaa, Type *type)
 {
-    ++EmittedNthPtrAddr;
-    return ctx.builder.CreateInBoundsGEP(
+    // p = (jl_value_t**)v; *(type*)&p[n]
+    Value *vptr = ctx.builder.CreateInBoundsGEP(
             ctx.types().T_prjlvalue,
             emit_bitcast(ctx, maybe_decay_tracked(ctx, v), ctx.types().T_pprjlvalue),
             idx);
+    LoadInst *load = ctx.builder.CreateLoad(type, emit_bitcast(ctx, vptr, PointerType::get(type, 0)));
+    tbaa_decorate(tbaa, load);
+    return load;
 }
 
-static LoadInst *emit_nthptr_recast(jl_codectx_t &ctx, Value *v, Value *idx, MDNode *tbaa, Type *type)
-{
-    // p = (jl_value_t**)v; *(type*)&p[n]
-    Value *vptr = emit_nthptr_addr(ctx, v, idx);
-    return cast<LoadInst>(tbaa_decorate(tbaa, ctx.builder.CreateLoad(type,
-        emit_bitcast(ctx, vptr, PointerType::get(type, 0)))));
-}
-
-static LoadInst *emit_nthptr_recast(jl_codectx_t &ctx, Value *v, ssize_t n, MDNode *tbaa, Type *type)
-{
-    // p = (jl_value_t**)v; *(type*)&p[n]
-    Value *vptr = emit_nthptr_addr(ctx, v, n);
-    return cast<LoadInst>(tbaa_decorate(tbaa, ctx.builder.CreateLoad(type,
-        emit_bitcast(ctx, vptr, PointerType::get(type, 0)))));
- }
-
 static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &v,  bool is_promotable=false);
+
 static Value *emit_typeof(jl_codectx_t &ctx, Value *v, bool maybenull);
 
 static jl_cgval_t emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull)
@@ -1177,8 +1154,12 @@ static Value *emit_datatype_isprimitivetype(jl_codectx_t &ctx, Value *dt)
 
 static Value *emit_datatype_name(jl_codectx_t &ctx, Value *dt)
 {
-    Value *vptr = emit_nthptr_addr(ctx, dt, (ssize_t)(offsetof(jl_datatype_t, name) / sizeof(char*)));
-    return tbaa_decorate(ctx.tbaa().tbaa_const, ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, vptr, Align(sizeof(void*))));
+    unsigned n = offsetof(jl_datatype_t, name) / sizeof(char*);
+    Value *vptr = ctx.builder.CreateInBoundsGEP(
+            ctx.types().T_pjlvalue,
+            emit_bitcast(ctx, maybe_decay_tracked(ctx, dt), ctx.types().T_ppjlvalue),
+            ConstantInt::get(getSizeTy(ctx.builder.getContext()), n));
+    return tbaa_decorate(ctx.tbaa().tbaa_const, ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, vptr, Align(sizeof(void*))));
 }
 
 // --- generating various error checks ---
@@ -1508,8 +1489,8 @@ static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
         // so the isa test reduces to a comparison of the typename by pointer
         return std::make_pair(
                 ctx.builder.CreateICmpEQ(
-                    mark_callee_rooted(ctx, emit_datatype_name(ctx, emit_typeof_boxed(ctx, x))),
-                    mark_callee_rooted(ctx, literal_pointer_val(ctx, (jl_value_t*)dt->name))),
+                    emit_datatype_name(ctx, emit_typeof_boxed(ctx, x)),
+                    literal_pointer_val(ctx, (jl_value_t*)dt->name)),
                 false);
     }
     if (jl_is_uniontype(intersected_type) &&
@@ -3445,10 +3426,10 @@ static void emit_cpointercheck(jl_codectx_t &ctx, const jl_cgval_t &x, const std
     emit_typecheck(ctx, mark_julia_type(ctx, t, true, jl_any_type), (jl_value_t*)jl_datatype_type, msg);
 
     Value *istype =
-        ctx.builder.CreateICmpEQ(mark_callee_rooted(ctx, emit_datatype_name(ctx, t)),
-                                 mark_callee_rooted(ctx, literal_pointer_val(ctx, (jl_value_t*)jl_pointer_typename)));
-    BasicBlock *failBB = BasicBlock::Create(ctx.builder.getContext(),"fail",ctx.f);
-    BasicBlock *passBB = BasicBlock::Create(ctx.builder.getContext(),"pass");
+        ctx.builder.CreateICmpEQ(emit_datatype_name(ctx, t),
+                                 literal_pointer_val(ctx, (jl_value_t*)jl_pointer_typename));
+    BasicBlock *failBB = BasicBlock::Create(ctx.builder.getContext(), "fail", ctx.f);
+    BasicBlock *passBB = BasicBlock::Create(ctx.builder.getContext(), "pass");
     ctx.builder.CreateCondBr(istype, passBB, failBB);
     ctx.builder.SetInsertPoint(failBB);
 
@@ -3896,8 +3877,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
 
 static void emit_signal_fence(jl_codectx_t &ctx)
 {
-    ++EmittedSignalFences;
-    ctx.builder.CreateFence(AtomicOrdering::SequentiallyConsistent, SyncScope::SingleThread);
+    emit_signal_fence(ctx.builder);
 }
 
 static Value *emit_defer_signal(jl_codectx_t &ctx)
@@ -3910,70 +3890,6 @@ static Value *emit_defer_signal(jl_codectx_t &ctx)
     return ctx.builder.CreateInBoundsGEP(ctx.types().T_sigatomic, ptls, ArrayRef<Value*>(offset), "jl_defer_signal");
 }
 
-static void emit_gc_safepoint(jl_codectx_t &ctx)
-{
-    ctx.builder.CreateCall(prepare_call(gcroot_flush_func));
-    emit_signal_fence(ctx);
-    ctx.builder.CreateLoad(getSizeTy(ctx.builder.getContext()), get_current_signal_page(ctx), true);
-    emit_signal_fence(ctx);
-}
-
-static Value *emit_gc_state_set(jl_codectx_t &ctx, Value *state, Value *old_state)
-{
-    Type *T_int8 = state->getType();
-    Value *ptls = emit_bitcast(ctx, get_current_ptls(ctx), getInt8PtrTy(ctx.builder.getContext()));
-    Constant *offset = ConstantInt::getSigned(getInt32Ty(ctx.builder.getContext()), offsetof(jl_tls_states_t, gc_state));
-    Value *gc_state = ctx.builder.CreateInBoundsGEP(T_int8, ptls, ArrayRef<Value*>(offset), "gc_state");
-    if (old_state == nullptr) {
-        old_state = ctx.builder.CreateLoad(T_int8, gc_state);
-        cast<LoadInst>(old_state)->setOrdering(AtomicOrdering::Monotonic);
-    }
-    ctx.builder.CreateAlignedStore(state, gc_state, Align(sizeof(void*)))->setOrdering(AtomicOrdering::Release);
-    if (auto *C = dyn_cast<ConstantInt>(old_state))
-        if (C->isZero())
-            return old_state;
-    if (auto *C = dyn_cast<ConstantInt>(state))
-        if (!C->isZero())
-            return old_state;
-    BasicBlock *passBB = BasicBlock::Create(ctx.builder.getContext(), "safepoint", ctx.f);
-    BasicBlock *exitBB = BasicBlock::Create(ctx.builder.getContext(), "after_safepoint", ctx.f);
-    Constant *zero8 = ConstantInt::get(T_int8, 0);
-    ctx.builder.CreateCondBr(ctx.builder.CreateAnd(ctx.builder.CreateICmpNE(old_state, zero8), // if (old_state && !state)
-                                                   ctx.builder.CreateICmpEQ(state, zero8)),
-                             passBB, exitBB);
-    ctx.builder.SetInsertPoint(passBB);
-    emit_gc_safepoint(ctx);
-    ctx.builder.CreateBr(exitBB);
-    ctx.builder.SetInsertPoint(exitBB);
-    return old_state;
-}
-
-static Value *emit_gc_unsafe_enter(jl_codectx_t &ctx)
-{
-    Value *state = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0);
-    return emit_gc_state_set(ctx, state, nullptr);
-}
-
-static Value *emit_gc_unsafe_leave(jl_codectx_t &ctx, Value *state)
-{
-    Value *old_state = ConstantInt::get(state->getType(), 0);
-    return emit_gc_state_set(ctx, state, old_state);
-}
-
-//static Value *emit_gc_safe_enter(jl_codectx_t &ctx)
-//{
-//    Value *state = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), JL_GC_STATE_SAFE);
-//    return emit_gc_state_set(ctx, state, nullptr);
-//}
-//
-//static Value *emit_gc_safe_leave(jl_codectx_t &ctx, Value *state)
-//{
-//    Value *old_state = ConstantInt::get(state->getType(), JL_GC_STATE_SAFE);
-//    return emit_gc_state_set(ctx, state, old_state);
-//}
-
-
-
 #ifndef JL_NDEBUG
 static int compare_cgparams(const jl_cgparams_t *a, const jl_cgparams_t *b)
 {
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 9c09314c9aee1..f8af6c79e7e2b 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -588,6 +588,11 @@ static const auto jlpgcstack_func = new JuliaFunction{
     nullptr,
 };
 
+static const auto jladoptthread_func = new JuliaFunction{
+    "julia.get_pgcstack_or_new",
+    jlpgcstack_func->_type,
+    jlpgcstack_func->_attrs,
+};
 
 
 // important functions
@@ -1169,7 +1174,6 @@ static const auto &builtin_func_map() {
           { jl_f_svec_addr,               new JuliaFunction{XSTR(jl_f_svec), get_func_sig, get_func_attrs} },
           { jl_f_applicable_addr,         new JuliaFunction{XSTR(jl_f_applicable), get_func_sig, get_func_attrs} },
           { jl_f_invoke_addr,             new JuliaFunction{XSTR(jl_f_invoke), get_func_sig, get_func_attrs} },
-          { jl_f_invoke_kwsorter_addr,    new JuliaFunction{XSTR(jl_f_invoke_kwsorter), get_func_sig, get_func_attrs} },
           { jl_f_isdefined_addr,          new JuliaFunction{XSTR(jl_f_isdefined), get_func_sig, get_func_attrs} },
           { jl_f_getfield_addr,           new JuliaFunction{XSTR(jl_f_getfield), get_func_sig, get_func_attrs} },
           { jl_f_setfield_addr,           new JuliaFunction{XSTR(jl_f_setfield), get_func_sig, get_func_attrs} },
@@ -1205,6 +1209,7 @@ extern "C" {
         1,
 #endif
         (int) DICompileUnit::DebugEmissionKind::FullDebug,
+        1,
         jl_rettype_inferred, NULL };
 }
 
@@ -1492,11 +1497,9 @@ static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t
 static jl_cgval_t emit_checked_var(jl_codectx_t &ctx, Value *bp, jl_sym_t *name, bool isvol, MDNode *tbaa);
 static jl_cgval_t emit_sparam(jl_codectx_t &ctx, size_t i);
 static Value *emit_condition(jl_codectx_t &ctx, const jl_cgval_t &condV, const std::string &msg);
-static void allocate_gc_frame(jl_codectx_t &ctx, BasicBlock *b0);
 static Value *get_current_task(jl_codectx_t &ctx);
 static Value *get_current_ptls(jl_codectx_t &ctx);
 static Value *get_last_age_field(jl_codectx_t &ctx);
-static Value *get_current_signal_page(jl_codectx_t &ctx);
 static void CreateTrap(IRBuilder<> &irbuilder, bool create_new_block = true);
 static CallInst *emit_jlcall(jl_codectx_t &ctx, Function *theFptr, Value *theF,
                              const jl_cgval_t *args, size_t nargs, JuliaFunction *trampoline);
@@ -5314,21 +5317,17 @@ JL_GCC_IGNORE_STOP
 // --- generate function bodies ---
 
 // gc frame emission
-static void allocate_gc_frame(jl_codectx_t &ctx, BasicBlock *b0)
+static void allocate_gc_frame(jl_codectx_t &ctx, BasicBlock *b0, bool or_new=false)
 {
     // allocate a placeholder gc instruction
     // this will require the runtime, but it gets deleted later if unused
-    ctx.topalloca = ctx.builder.CreateCall(prepare_call(jlpgcstack_func));
+    ctx.topalloca = ctx.builder.CreateCall(prepare_call(or_new ? jladoptthread_func : jlpgcstack_func));
     ctx.pgcstack = ctx.topalloca;
 }
 
 static Value *get_current_task(jl_codectx_t &ctx)
 {
-    const int ptls_offset = offsetof(jl_task_t, gcstack);
-    return ctx.builder.CreateInBoundsGEP(
-        ctx.types().T_pjlvalue, emit_bitcast(ctx, ctx.pgcstack, ctx.types().T_ppjlvalue),
-        ConstantInt::get(getSizeTy(ctx.builder.getContext()), -(ptls_offset / sizeof(void *))),
-        "current_task");
+    return get_current_task_from_pgcstack(ctx.builder, ctx.pgcstack);
 }
 
 // Get PTLS through current task.
@@ -5348,15 +5347,6 @@ static Value *get_last_age_field(jl_codectx_t &ctx)
             "world_age");
 }
 
-// Get signal page through current task.
-static Value *get_current_signal_page(jl_codectx_t &ctx)
-{
-    // return ctx.builder.CreateCall(prepare_call(reuse_signal_page_func));
-    Value *ptls = get_current_ptls(ctx);
-    int nthfield = offsetof(jl_tls_states_t, safepoint) / sizeof(void *);
-    return emit_nthptr_recast(ctx, ptls, nthfield, ctx.tbaa().tbaa_const, getSizePtrTy(ctx.builder.getContext()));
-}
-
 static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, Module *M, jl_codegen_params_t &params)
 {
     ++EmittedToJLInvokes;
@@ -5641,19 +5631,11 @@ static Function* gen_cfun_wrapper(
     ctx.builder.SetInsertPoint(b0);
     DebugLoc noDbg;
     ctx.builder.SetCurrentDebugLocation(noDbg);
-    allocate_gc_frame(ctx, b0);
+    allocate_gc_frame(ctx, b0, true);
 
-    Value *dummy_world = ctx.builder.CreateAlloca(getSizeTy(ctx.builder.getContext()));
-    Value *have_tls = ctx.builder.CreateIsNotNull(ctx.pgcstack);
-    // TODO: in the future, initialize a full TLS context here
     Value *world_age_field = get_last_age_field(ctx);
-    world_age_field = ctx.builder.CreateSelect(have_tls, world_age_field, dummy_world);
     Value *last_age = tbaa_decorate(ctx.tbaa().tbaa_gcframe,
             ctx.builder.CreateAlignedLoad(getSizeTy(ctx.builder.getContext()), world_age_field, Align(sizeof(size_t))));
-    Value *last_gc_state = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), JL_GC_STATE_SAFE);
-    last_gc_state = emit_guarded_test(ctx, have_tls, last_gc_state, [&] {
-        return emit_gc_unsafe_enter(ctx);
-    });
 
     Value *world_v = ctx.builder.CreateAlignedLoad(getSizeTy(ctx.builder.getContext()),
         prepare_global_in(jl_Module, jlgetworld_global), Align(sizeof(size_t)));
@@ -5668,12 +5650,7 @@ static Function* gen_cfun_wrapper(
                     emit_bitcast(ctx, literal_pointer_val(ctx, (jl_value_t*)codeinst), getSizePtrTy(ctx.builder.getContext())),
                     offsetof(jl_code_instance_t, max_world) / sizeof(size_t)),
                 Align(sizeof(size_t)));
-        // XXX: age is always OK if we don't have a TLS. This is a hack required due to `@threadcall` abuse.
-        // and adds quite a bit of complexity here, even though it's still wrong
-        // (anything that tries to interact with the runtime will fault)
         age_ok = ctx.builder.CreateICmpUGE(lam_max, world_v);
-        world_v = ctx.builder.CreateSelect(ctx.builder.CreateOr(have_tls, age_ok), world_v, lam_max);
-        age_ok = ctx.builder.CreateOr(ctx.builder.CreateNot(have_tls), age_ok);
     }
     ctx.builder.CreateStore(world_v, world_age_field);
 
@@ -6030,12 +6007,6 @@ static Function* gen_cfun_wrapper(
     }
 
     ctx.builder.CreateStore(last_age, world_age_field);
-    if (!sig.retboxed) {
-        emit_guarded_test(ctx, have_tls, nullptr, [&] {
-            emit_gc_unsafe_leave(ctx, last_gc_state);
-            return nullptr;
-        });
-    }
     ctx.builder.CreateRet(r);
 
     ctx.builder.SetCurrentDebugLocation(noDbg);
@@ -7486,8 +7457,11 @@ static jl_llvm_functions_t
 
     Instruction &prologue_end = ctx.builder.GetInsertBlock()->back();
 
+    // step 11a. Emit the entry safepoint
+    if (JL_FEAT_TEST(ctx, safepoint_on_entry))
+        emit_gc_safepoint(ctx.builder, get_current_ptls(ctx), ctx.tbaa().tbaa_const);
 
-    // step 11. Do codegen in control flow order
+    // step 11b. Do codegen in control flow order
     std::vector<int> workstack;
     std::map<int, BasicBlock*> BB;
     std::map<size_t, BasicBlock*> come_from_bb;
@@ -8458,6 +8432,7 @@ static void init_jit_functions(void)
     add_named_global(jl_write_barrier_func, (void*)NULL);
     add_named_global(jl_write_barrier_binding_func, (void*)NULL);
     add_named_global(jldlsym_func, &jl_load_and_lookup);
+    add_named_global("jl_adopt_thread", &jl_adopt_thread);
     add_named_global(jlgetcfunctiontrampoline_func, &jl_get_cfunction_trampoline);
     add_named_global(jlgetnthfieldchecked_func, &jl_get_nth_field_checked);
     add_named_global(diff_gc_total_bytes_func, &jl_gc_diff_total_bytes);
diff --git a/src/codegen_shared.h b/src/codegen_shared.h
index 0e68668378f4e..329cc567e8c5f 100644
--- a/src/codegen_shared.h
+++ b/src/codegen_shared.h
@@ -22,6 +22,7 @@ enum AddressSpace {
 };
 
 static inline auto getSizeTy(llvm::LLVMContext &ctxt) {
+    //return M.getDataLayout().getIntPtrType(M.getContext());
     if (sizeof(size_t) > sizeof(uint32_t)) {
         return llvm::Type::getInt64Ty(ctxt);
     } else {
@@ -176,26 +177,127 @@ static inline llvm::Value *emit_bitcast_with_builder(llvm::IRBuilder<> &builder,
     }
 }
 
+// Get PTLS through current task.
+static inline llvm::Value *get_current_task_from_pgcstack(llvm::IRBuilder<> &builder, llvm::Value *pgcstack)
+{
+    using namespace llvm;
+    auto T_ppjlvalue = JuliaType::get_ppjlvalue_ty(builder.getContext());
+    auto T_pjlvalue = JuliaType::get_pjlvalue_ty(builder.getContext());
+    const int pgcstack_offset = offsetof(jl_task_t, gcstack);
+    return builder.CreateInBoundsGEP(
+            T_pjlvalue, emit_bitcast_with_builder(builder, pgcstack, T_ppjlvalue),
+            ConstantInt::get(getSizeTy(builder.getContext()), -(pgcstack_offset / sizeof(void *))),
+            "current_task");
+}
+
 // Get PTLS through current task.
 static inline llvm::Value *get_current_ptls_from_task(llvm::IRBuilder<> &builder, llvm::Value *current_task, llvm::MDNode *tbaa)
 {
     using namespace llvm;
     auto T_ppjlvalue = JuliaType::get_ppjlvalue_ty(builder.getContext());
     auto T_pjlvalue = JuliaType::get_pjlvalue_ty(builder.getContext());
-    auto T_size = builder.GetInsertBlock()->getModule()->getDataLayout().getIntPtrType(builder.getContext());
+    auto T_size = getSizeTy(builder.getContext());
     const int ptls_offset = offsetof(jl_task_t, ptls);
     llvm::Value *pptls = builder.CreateInBoundsGEP(
-        T_pjlvalue, current_task,
-        ConstantInt::get(T_size, ptls_offset / sizeof(void *)),
-        "ptls_field");
+            T_pjlvalue, current_task,
+            ConstantInt::get(T_size, ptls_offset / sizeof(void *)),
+            "ptls_field");
     LoadInst *ptls_load = builder.CreateAlignedLoad(T_pjlvalue,
-        emit_bitcast_with_builder(builder, pptls, T_ppjlvalue), Align(sizeof(void *)), "ptls_load");
+            emit_bitcast_with_builder(builder, pptls, T_ppjlvalue), Align(sizeof(void *)), "ptls_load");
     // Note: Corresponding store (`t->ptls = ptls`) happens in `ctx_switch` of tasks.c.
     tbaa_decorate(tbaa, ptls_load);
-    // Using `CastInst::Create` to get an `Instruction*` without explicit cast:
-    auto ptls = CastInst::Create(Instruction::BitCast, ptls_load, T_ppjlvalue, "ptls");
-    builder.Insert(ptls);
-    return ptls;
+    return builder.CreateBitCast(ptls_load, T_ppjlvalue, "ptls");
+}
+
+// Get signal page through current task.
+static inline llvm::Value *get_current_signal_page_from_ptls(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::MDNode *tbaa)
+{
+    using namespace llvm;
+    // return builder.CreateCall(prepare_call(reuse_signal_page_func));
+    auto T_size = getSizeTy(builder.getContext());
+    auto T_psize = T_size->getPointerTo();
+    auto T_ppsize = T_psize->getPointerTo();
+    int nthfield = offsetof(jl_tls_states_t, safepoint) / sizeof(void *);
+    ptls = emit_bitcast_with_builder(builder, ptls, T_ppsize);
+    llvm::Value *psafepoint = builder.CreateInBoundsGEP(
+            T_psize, ptls, ConstantInt::get(T_size, nthfield));
+    LoadInst *ptls_load = builder.CreateAlignedLoad(
+            T_psize, psafepoint, Align(sizeof(void *)), "safepoint");
+    tbaa_decorate(tbaa, ptls_load);
+    return ptls_load;
+}
+
+static inline void emit_signal_fence(llvm::IRBuilder<> &builder)
+{
+    using namespace llvm;
+    builder.CreateFence(AtomicOrdering::SequentiallyConsistent, SyncScope::SingleThread);
+}
+
+static inline void emit_gc_safepoint(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::MDNode *tbaa)
+{
+    emit_signal_fence(builder);
+    builder.CreateLoad(getSizeTy(builder.getContext()), get_current_signal_page_from_ptls(builder, ptls, tbaa), true);
+    emit_signal_fence(builder);
+}
+
+static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::Value *state, llvm::Value *old_state)
+{
+    using namespace llvm;
+    Type *T_int8 = state->getType();
+    ptls = emit_bitcast_with_builder(builder, ptls, builder.getInt8PtrTy());
+    Constant *offset = ConstantInt::getSigned(builder.getInt32Ty(), offsetof(jl_tls_states_t, gc_state));
+    Value *gc_state = builder.CreateInBoundsGEP(T_int8, ptls, ArrayRef<Value*>(offset), "gc_state");
+    if (old_state == nullptr) {
+        old_state = builder.CreateLoad(T_int8, gc_state);
+        cast<LoadInst>(old_state)->setOrdering(AtomicOrdering::Monotonic);
+    }
+    builder.CreateAlignedStore(state, gc_state, Align(sizeof(void*)))->setOrdering(AtomicOrdering::Release);
+    if (auto *C = dyn_cast<ConstantInt>(old_state))
+        if (C->isZero())
+            return old_state;
+    if (auto *C = dyn_cast<ConstantInt>(state))
+        if (!C->isZero())
+            return old_state;
+    BasicBlock *passBB = BasicBlock::Create(builder.getContext(), "safepoint", builder.GetInsertBlock()->getParent());
+    BasicBlock *exitBB = BasicBlock::Create(builder.getContext(), "after_safepoint", builder.GetInsertBlock()->getParent());
+    Constant *zero8 = ConstantInt::get(T_int8, 0);
+    builder.CreateCondBr(builder.CreateAnd(builder.CreateICmpNE(old_state, zero8), // if (old_state && !state)
+                                           builder.CreateICmpEQ(state, zero8)),
+                         passBB, exitBB);
+    builder.SetInsertPoint(passBB);
+    MDNode *tbaa = get_tbaa_const(builder.getContext());
+    emit_gc_safepoint(builder, ptls, tbaa);
+    builder.CreateBr(exitBB);
+    builder.SetInsertPoint(exitBB);
+    return old_state;
+}
+
+static inline llvm::Value *emit_gc_unsafe_enter(llvm::IRBuilder<> &builder, llvm::Value *ptls)
+{
+    using namespace llvm;
+    Value *state = builder.getInt8(0);
+    return emit_gc_state_set(builder, ptls, state, nullptr);
+}
+
+static inline llvm::Value *emit_gc_unsafe_leave(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::Value *state)
+{
+    using namespace llvm;
+    Value *old_state = builder.getInt8(0);
+    return emit_gc_state_set(builder, ptls, state, old_state);
+}
+
+static inline llvm::Value *emit_gc_safe_enter(llvm::IRBuilder<> &builder, llvm::Value *ptls)
+{
+    using namespace llvm;
+    Value *state = builder.getInt8(JL_GC_STATE_SAFE);
+    return emit_gc_state_set(builder, ptls, state, nullptr);
+}
+
+static inline llvm::Value *emit_gc_safe_leave(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::Value *state)
+{
+    using namespace llvm;
+    Value *old_state = builder.getInt8(JL_GC_STATE_SAFE);
+    return emit_gc_state_set(builder, ptls, state, old_state);
 }
 
 // Compatibility shims for LLVM attribute APIs that were renamed in LLVM 14.
@@ -327,5 +429,4 @@ inline Attribute getAttributeAtIndex(const AttributeList &L, unsigned Index, Att
     return L.getAttribute(Index, Kind);
 #endif
 }
-
 }
diff --git a/src/common_symbols1.inc b/src/common_symbols1.inc
index 7d445289e80fa..867961bc9a1d2 100644
--- a/src/common_symbols1.inc
+++ b/src/common_symbols1.inc
@@ -70,7 +70,7 @@ jl_symbol("toInt64"),
 jl_symbol("arraylen"),
 jl_symbol("typeassert"),
 jl_symbol("map"),
-jl_symbol("kwfunc"),
+jl_symbol("kwcall"),
 jl_symbol("ArgumentError"),
 jl_symbol("lshr_int"),
 jl_symbol("axes"),
diff --git a/src/datatype.c b/src/datatype.c
index a88e283e564de..fb63b67e5b830 100644
--- a/src/datatype.c
+++ b/src/datatype.c
@@ -52,7 +52,6 @@ JL_DLLEXPORT jl_methtable_t *jl_new_method_table(jl_sym_t *name, jl_module_t *mo
     jl_atomic_store_relaxed(&mt->leafcache, (jl_array_t*)jl_an_empty_vec_any);
     jl_atomic_store_relaxed(&mt->cache, jl_nothing);
     mt->max_args = 0;
-    mt->kwsorter = NULL;
     mt->backedges = NULL;
     JL_MUTEX_INIT(&mt->writelock);
     mt->offs = 0;
@@ -113,6 +112,63 @@ jl_datatype_t *jl_new_uninitialized_datatype(void)
     return t;
 }
 
+#include "support/htable.inc"
+
+static uint32_t _hash_djb2(uint32_t hash, const char *mem, size_t s) JL_NOTSAFEPOINT
+{
+    for (size_t i = 0; i < s; i++)
+        hash = ((hash << 5) + hash) + mem[i];
+    return hash;
+}
+
+static uint32_t _hash_layout_djb2(uintptr_t _layout, void *unused) JL_NOTSAFEPOINT
+{
+    (void)unused;
+    jl_datatype_layout_t* layout = (jl_datatype_layout_t *)_layout;
+    assert(layout);
+    size_t own_size = sizeof(jl_datatype_layout_t);
+    const char *fields = jl_dt_layout_fields(layout);
+    assert(fields);
+    size_t fields_size = layout->nfields * jl_fielddesc_size(layout->fielddesc_type);
+    const char *pointers = jl_dt_layout_ptrs(layout);
+    assert(pointers);
+    size_t pointers_size = (layout->npointers << layout->fielddesc_type);
+
+    uint_t hash = 5381;
+    hash = _hash_djb2(hash, (char *)layout, own_size);
+    hash = _hash_djb2(hash, fields, fields_size);
+    hash = _hash_djb2(hash, pointers, pointers_size);
+    return hash;
+}
+
+static int layout_eq(void *_l1, void *_l2, void *unused) JL_NOTSAFEPOINT
+{
+    (void)unused;
+    jl_datatype_layout_t *l1 = (jl_datatype_layout_t *)_l1;
+    jl_datatype_layout_t *l2 = (jl_datatype_layout_t *)_l2;
+    if (memcmp(l1, l2, sizeof(jl_datatype_layout_t)))
+        return 0;
+    const char *f1 = jl_dt_layout_fields(l1);
+    const char *f2 = jl_dt_layout_fields(l2);
+    size_t fields_size = l1->nfields * jl_fielddesc_size(l1->fielddesc_type);
+    if (memcmp(f1, f2, fields_size))
+        return 0;
+    const char *p1 = jl_dt_layout_ptrs(l1);
+    const char *p2 = jl_dt_layout_ptrs(l2);
+    size_t pointers_size = (l1->npointers << l1->fielddesc_type);
+    if (memcmp(p1, p2, pointers_size))
+        return 0;
+    return 1;
+}
+
+//HTPROT(layoutcache)
+static void **layoutcache_lookup_bp_r(htable_t *h, void *key, void *ctx) JL_NOTSAFEPOINT;
+static void **layoutcache_peek_bp_r(htable_t *h, void *key, void *ctx) JL_NOTSAFEPOINT;
+HTPROT_R(layoutcache)
+HTIMPL_R(layoutcache, _hash_layout_djb2, layout_eq)
+static htable_t layoutcache;
+static int layoutcache_initialized = 0;
+
 static jl_datatype_layout_t *jl_get_layout(uint32_t nfields,
                                            uint32_t npointers,
                                            uint32_t alignment,
@@ -147,12 +203,15 @@ static jl_datatype_layout_t *jl_get_layout(uint32_t nfields,
         }
     }
 
-    // allocate a new descriptor
-    // TODO: lots of these are the same--take advantage of the fact these are immutable to combine them
-    uint32_t fielddesc_size = jl_fielddesc_size(fielddesc_type);
-    jl_datatype_layout_t *flddesc = (jl_datatype_layout_t*)jl_gc_perm_alloc(
-                sizeof(jl_datatype_layout_t) + nfields * fielddesc_size + (npointers << fielddesc_type),
-                0, 4, 0);
+    // allocate a new descriptor, on the stack if possible.
+    size_t fields_size = nfields * jl_fielddesc_size(fielddesc_type);
+    size_t pointers_size = (npointers << fielddesc_type);
+    size_t flddesc_sz = sizeof(jl_datatype_layout_t) + fields_size + pointers_size;
+    int should_malloc = flddesc_sz >= jl_page_size;
+    jl_datatype_layout_t *mallocmem = (jl_datatype_layout_t *)(should_malloc ? malloc(flddesc_sz) : NULL);
+    jl_datatype_layout_t *allocamem = (jl_datatype_layout_t *)(should_malloc ? NULL : alloca(flddesc_sz));
+    jl_datatype_layout_t *flddesc = should_malloc ? mallocmem : allocamem;
+    assert(flddesc);
     flddesc->nfields = nfields;
     flddesc->alignment = alignment;
     flddesc->haspadding = haspadding;
@@ -161,9 +220,9 @@ static jl_datatype_layout_t *jl_get_layout(uint32_t nfields,
     flddesc->first_ptr = (npointers > 0 ? pointers[0] : -1);
 
     // fill out the fields of the new descriptor
-    jl_fielddesc8_t* desc8 = (jl_fielddesc8_t*)jl_dt_layout_fields(flddesc);
-    jl_fielddesc16_t* desc16 = (jl_fielddesc16_t*)jl_dt_layout_fields(flddesc);
-    jl_fielddesc32_t* desc32 = (jl_fielddesc32_t*)jl_dt_layout_fields(flddesc);
+    jl_fielddesc8_t *desc8 = (jl_fielddesc8_t *)jl_dt_layout_fields(flddesc);
+    jl_fielddesc16_t *desc16 = (jl_fielddesc16_t *)jl_dt_layout_fields(flddesc);
+    jl_fielddesc32_t *desc32 = (jl_fielddesc32_t *)jl_dt_layout_fields(flddesc);
     for (size_t i = 0; i < nfields; i++) {
         if (fielddesc_type == 0) {
             desc8[i].offset = desc[i].offset;
@@ -181,9 +240,9 @@ static jl_datatype_layout_t *jl_get_layout(uint32_t nfields,
             desc32[i].isptr = desc[i].isptr;
         }
     }
-    uint8_t* ptrs8 = (uint8_t*)jl_dt_layout_ptrs(flddesc);
-    uint16_t* ptrs16 = (uint16_t*)jl_dt_layout_ptrs(flddesc);
-    uint32_t* ptrs32 = (uint32_t*)jl_dt_layout_ptrs(flddesc);
+    uint8_t *ptrs8 = (uint8_t *)jl_dt_layout_ptrs(flddesc);
+    uint16_t *ptrs16 = (uint16_t *)jl_dt_layout_ptrs(flddesc);
+    uint32_t *ptrs32 = (uint32_t *)jl_dt_layout_ptrs(flddesc);
     for (size_t i = 0; i < npointers; i++) {
         if (fielddesc_type == 0) {
             ptrs8[i] = pointers[i];
@@ -195,7 +254,32 @@ static jl_datatype_layout_t *jl_get_layout(uint32_t nfields,
             ptrs32[i] = pointers[i];
         }
     }
-    return flddesc;
+
+    if (__unlikely(!layoutcache_initialized)) {
+        htable_new(&layoutcache, 4096);
+        layoutcache_initialized = 1;
+    }
+
+    // Check the cache to see if this object already exists.
+    // Add to cache if not present, free temp buffer, return.
+    jl_datatype_layout_t *ret =
+            (jl_datatype_layout_t *)layoutcache_get_r(&layoutcache, flddesc, NULL);
+    if ((void*)ret == HT_NOTFOUND) {
+        if (!should_malloc) {
+            char *perm_mem = (char *)jl_gc_perm_alloc(flddesc_sz, 0, 4, 0);
+            assert(perm_mem);
+            ret = (jl_datatype_layout_t *)perm_mem;
+            memcpy(perm_mem, flddesc, flddesc_sz);
+        }
+        else {
+            ret = mallocmem;
+        }
+        layoutcache_put_r(&layoutcache, ret, ret, NULL);
+        return ret;
+    }
+
+    if (should_malloc) free(flddesc);
+    return ret;
 }
 
 // Determine if homogeneous tuple with fields of type t will have
diff --git a/src/dlload.c b/src/dlload.c
index 717a598260b6a..57310c18b0e46 100644
--- a/src/dlload.c
+++ b/src/dlload.c
@@ -351,6 +351,10 @@ JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags,
 #ifdef _OS_WINDOWS_
         err = GetLastError();
         break; // LoadLibrary already tested the rest
+#else
+        // bail out and show the error if file actually exists
+        if (jl_stat(path, (char*)&stbuf) == 0)
+            break;
 #endif
     }
 
diff --git a/src/dump.c b/src/dump.c
index f267fa135b599..7631aa6d12d18 100644
--- a/src/dump.c
+++ b/src/dump.c
@@ -172,7 +172,7 @@ static jl_array_t *newly_inferred JL_GLOBALLY_ROOTED;
 static htable_t queued_method_roots;
 
 // inverse of backedges graph (caller=>callees hash)
-htable_t edges_map;
+jl_array_t *edges_map JL_GLOBALLY_ROOTED; // rooted for the duration of our uses of this
 
 // list of requested ccallable signatures
 static arraylist_t ccallable_list;
@@ -396,10 +396,11 @@ static int has_backedge_to_worklist(jl_method_instance_t *mi, htable_t *visited,
 // build, select those that are external and have at least one
 // relocatable CodeInstance and are inferred to be called from the worklist
 // or explicitly added by a precompile statement.
-static size_t queue_external_mis(jl_array_t *list)
+// Also prepares external_mis for method_instance_in_queue queries.
+static jl_array_t *queue_external_mis(jl_array_t *list)
 {
     if (list == NULL)
-        return 0;
+        return NULL;
     size_t i, n = 0;
     htable_t visited;
     assert(jl_is_array(list));
@@ -412,17 +413,16 @@ static size_t queue_external_mis(jl_array_t *list)
             jl_method_t *m = mi->def.method;
             if (!module_in_worklist(m->module)) {
                 jl_code_instance_t *ci = mi->cache;
-                int relocatable = 0;
                 while (ci) {
-                    if (ci->max_world == ~(size_t)0)
-                        relocatable |= ci->relocatability;
-                    ci = ci->next;
+                    if (ci->max_world == ~(size_t)0 && ci->relocatability && ci->inferred)
+                        break;
+                    ci = jl_atomic_load_relaxed(&ci->next);
                 }
-                if (relocatable && ptrhash_get(&external_mis, mi) == HT_NOTFOUND) {
+                if (ci && ptrhash_get(&external_mis, mi) == HT_NOTFOUND) {
                     int found = has_backedge_to_worklist(mi, &visited, 1);
                     assert(found == 0 || found == 1);
                     if (found == 1) {
-                        ptrhash_put(&external_mis, mi, mi);
+                        ptrhash_put(&external_mis, mi, ci);
                         n++;
                     }
                 }
@@ -430,7 +430,18 @@ static size_t queue_external_mis(jl_array_t *list)
         }
     }
     htable_free(&visited);
-    return n;
+    if (n == 0)
+        return NULL;
+    jl_array_t *mi_list = jl_alloc_vec_any(n);
+    n = 0;
+    for (size_t i = 0; i < external_mis.size; i += 2) {
+        void *ci = external_mis.table[i+1];
+        if (ci != HT_NOTFOUND) {
+            jl_array_ptr_set(mi_list, n++, (jl_value_t*)ci);
+        }
+    }
+    assert(n == jl_array_len(mi_list));
+    return mi_list;
 }
 
 static void jl_serialize_datatype(jl_serializer_state *s, jl_datatype_t *dt) JL_GC_DISABLED
@@ -464,40 +475,6 @@ static void jl_serialize_datatype(jl_serializer_state *s, jl_datatype_t *dt) JL_
         tag = 12;
     }
 
-    char *dtname = jl_symbol_name(dt->name->name);
-    size_t dtnl = strlen(dtname);
-    if (dtnl > 4 && strcmp(&dtname[dtnl - 4], "##kw") == 0 && !internal && tag != 0) {
-        /* XXX: yuck, this is horrible, but the auto-generated kw types from the serializer isn't a real type, so we *must* be very careful */
-        assert(tag == 6); // other struct types should never exist
-        tag = 9;
-        if (jl_type_type_mt->kwsorter != NULL && dt == (jl_datatype_t*)jl_typeof(jl_type_type_mt->kwsorter)) {
-            dt = jl_datatype_type; // any representative member with this MethodTable
-        }
-        else if (jl_nonfunction_mt->kwsorter != NULL && dt == (jl_datatype_t*)jl_typeof(jl_nonfunction_mt->kwsorter)) {
-            dt = jl_symbol_type; // any representative member with this MethodTable
-        }
-        else {
-            // search for the representative member of this MethodTable
-            jl_methtable_t *mt = dt->name->mt;
-            size_t l = strlen(jl_symbol_name(mt->name));
-            char *prefixed;
-            prefixed = (char*)malloc_s(l + 2);
-            prefixed[0] = '#';
-            strcpy(&prefixed[1], jl_symbol_name(mt->name));
-            // remove ##kw suffix
-            prefixed[l-3] = 0;
-            jl_sym_t *tname = jl_symbol(prefixed);
-            free(prefixed);
-            jl_value_t *primarydt = jl_get_global(mt->module, tname);
-            if (!primarydt)
-                primarydt = jl_get_global(mt->module, mt->name);
-            primarydt = jl_unwrap_unionall(primarydt);
-            assert(jl_is_datatype(primarydt));
-            assert(primarydt == (jl_value_t*)jl_any_type || jl_typeof(((jl_datatype_t*)primarydt)->name->mt->kwsorter) == (jl_value_t*)dt);
-            dt = (jl_datatype_t*)primarydt;
-        }
-    }
-
     write_uint8(s->s, TAG_DATATYPE);
     write_uint8(s->s, tag);
     if (tag == 6 || tag == 7) {
@@ -506,10 +483,6 @@ static void jl_serialize_datatype(jl_serializer_state *s, jl_datatype_t *dt) JL_
         jl_serialize_value(s, dt->parameters);
         return;
     }
-    if (tag == 9) {
-        jl_serialize_value(s, dt);
-        return;
-    }
 
     write_int32(s->s, dt->size);
     int has_instance = (dt->instance != NULL);
@@ -699,20 +672,16 @@ static int jl_serialize_generic(jl_serializer_state *s, jl_value_t *v) JL_GC_DIS
 }
 
 static void jl_serialize_code_instance(jl_serializer_state *s, jl_code_instance_t *codeinst,
-                                       int skip_partial_opaque, int internal,
-                                       int force) JL_GC_DISABLED
+                                       int skip_partial_opaque, int force) JL_GC_DISABLED
 {
-    if (internal > 2) {
-        while (codeinst && !codeinst->relocatability)
-            codeinst = codeinst->next;
-    }
     if (!force && jl_serialize_generic(s, (jl_value_t*)codeinst)) {
         return;
     }
     assert(codeinst != NULL); // handle by jl_serialize_generic, but this makes clang-sa happy
 
     int validate = 0;
-    if (codeinst->max_world == ~(size_t)0)
+    if (codeinst->max_world == ~(size_t)0 && codeinst->inferred)
+        // TODO: also check if this object is part of the codeinst cache and in edges_map
         validate = 1; // can check on deserialize if this cache entry is still valid
     int flags = validate << 0;
     if (codeinst->invoke == jl_fptr_const_return)
@@ -727,7 +696,7 @@ static void jl_serialize_code_instance(jl_serializer_state *s, jl_code_instance_
     if (write_ret_type && codeinst->rettype_const &&
             jl_typeis(codeinst->rettype_const, jl_partial_opaque_type)) {
         if (skip_partial_opaque) {
-            jl_serialize_code_instance(s, codeinst->next, skip_partial_opaque, internal, 0);
+            jl_serialize_code_instance(s, codeinst->next, skip_partial_opaque, 0);
             return;
         }
         else {
@@ -754,7 +723,7 @@ static void jl_serialize_code_instance(jl_serializer_state *s, jl_code_instance_
         jl_serialize_value(s, jl_nothing);
     }
     write_uint8(s->s, codeinst->relocatability);
-    jl_serialize_code_instance(s, codeinst->next, skip_partial_opaque, internal, 0);
+    jl_serialize_code_instance(s, codeinst->next, skip_partial_opaque, 0);
 }
 
 enum METHOD_SERIALIZATION_MODE {
@@ -976,8 +945,6 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_li
             internal = 1;
         else if (module_in_worklist(mi->def.method->module))
             internal = 2;
-        else if (ptrhash_get(&external_mis, (void*)mi) != HT_NOTFOUND)
-            internal = 3;
         write_uint8(s->s, internal);
         if (!internal) {
             // also flag this in the backref table as special
@@ -1015,10 +982,10 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_li
         }
         jl_serialize_value(s, (jl_value_t*)backedges);
         jl_serialize_value(s, (jl_value_t*)NULL); //callbacks
-        jl_serialize_code_instance(s, mi->cache, 1, internal, 0);
+        jl_serialize_code_instance(s, mi->cache, 1, 0);
     }
     else if (jl_is_code_instance(v)) {
-        jl_serialize_code_instance(s, (jl_code_instance_t*)v, 0, 2, 1);
+        jl_serialize_code_instance(s, (jl_code_instance_t*)v, 0, 1);
     }
     else if (jl_typeis(v, jl_module_type)) {
         jl_serialize_module(s, (jl_module_t*)v);
@@ -1188,26 +1155,10 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_li
     }
 }
 
-// Used to serialize the external method instances queued in queued_method_roots (from newly_inferred)
-static void serialize_htable_keys(jl_serializer_state *s, htable_t *ht, int nitems)
-{
-    write_int32(s->s, nitems);
-    void **table = ht->table;
-    size_t i, n = 0, sz = ht->size;
-    (void)n;
-    for (i = 0; i < sz; i += 2) {
-        if (table[i+1] != HT_NOTFOUND) {
-            jl_serialize_value(s, (jl_value_t*)table[i]);
-            n += 1;
-        }
-    }
-    assert(n == nitems);
-}
 
 // Create the forward-edge map (caller => callees)
 // the intent of these functions is to invert the backedges tree
 // for anything that points to a method not part of the worklist
-// or method instances not in the queue
 //
 // from MethodTables
 static void jl_collect_missing_backedges(jl_methtable_t *mt)
@@ -1218,30 +1169,39 @@ static void jl_collect_missing_backedges(jl_methtable_t *mt)
         for (i = 1; i < l; i += 2) {
             jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(backedges, i);
             jl_value_t *missing_callee = jl_array_ptr_ref(backedges, i - 1);  // signature of abstract callee
-            jl_array_t **edges = (jl_array_t**)ptrhash_bp(&edges_map, (void*)caller);
-            if (*edges == HT_NOTFOUND)
-                *edges = jl_alloc_vec_any(0);
-            // To stay synchronized with the format from MethodInstances (specifically for `invoke`d calls),
-            // we have to push a pair of values. But in this case the callee is unknown, so we leave it NULL.
-            push_edge(*edges, missing_callee, NULL);
+            jl_array_t *edges = (jl_array_t*)jl_eqtable_get(edges_map, (jl_value_t*)caller, NULL);
+            if (edges == NULL) {
+                edges = jl_alloc_vec_any(0);
+                JL_GC_PUSH1(&edges);
+                edges_map = jl_eqtable_put(edges_map, (jl_value_t*)caller, (jl_value_t*)edges, NULL);
+                JL_GC_POP();
+            }
+            jl_array_ptr_1d_push(edges, NULL);
+            jl_array_ptr_1d_push(edges, missing_callee);
         }
     }
 }
 
+
 // from MethodInstances
-static void collect_backedges(jl_method_instance_t *callee) JL_GC_DISABLED
+static void collect_backedges(jl_method_instance_t *callee, int internal) JL_GC_DISABLED
 {
     jl_array_t *backedges = callee->backedges;
     if (backedges) {
         size_t i = 0, l = jl_array_len(backedges);
-        jl_value_t *invokeTypes;
-        jl_method_instance_t *caller;
         while (i < l) {
+            jl_value_t *invokeTypes;
+            jl_method_instance_t *caller;
             i = get_next_edge(backedges, i, &invokeTypes, &caller);
-            jl_array_t **edges = (jl_array_t**)ptrhash_bp(&edges_map, caller);
-            if (*edges == HT_NOTFOUND)
-                *edges = jl_alloc_vec_any(0);
-            push_edge(*edges, invokeTypes, callee);
+            jl_array_t *edges = (jl_array_t*)jl_eqtable_get(edges_map, (jl_value_t*)caller, NULL);
+            if (edges == NULL) {
+                edges = jl_alloc_vec_any(0);
+                JL_GC_PUSH1(&edges);
+                edges_map = jl_eqtable_put(edges_map, (jl_value_t*)caller, (jl_value_t*)edges, NULL);
+                JL_GC_POP();
+            }
+            jl_array_ptr_1d_push(edges, invokeTypes);
+            jl_array_ptr_1d_push(edges, (jl_value_t*)callee);
         }
     }
 }
@@ -1250,24 +1210,21 @@ static void collect_backedges(jl_method_instance_t *callee) JL_GC_DISABLED
 // For functions owned by modules not on the worklist, call this on each method.
 // - if the method is owned by a worklist module, add it to the list of things to be
 //   fully serialized
-// - otherwise (i.e., if it's an external method), check all of its specializations.
-//   Collect all external backedges (may be needed later when we invert this list).
+// - Collect all backedges (may be needed later when we invert this list).
 static int jl_collect_methcache_from_mod(jl_typemap_entry_t *ml, void *closure) JL_GC_DISABLED
 {
     jl_array_t *s = (jl_array_t*)closure;
     jl_method_t *m = ml->func.method;
-    if (module_in_worklist(m->module)) {
+    if (s && module_in_worklist(m->module)) {
         jl_array_ptr_1d_push(s, (jl_value_t*)m);
         jl_array_ptr_1d_push(s, (jl_value_t*)ml->simplesig);
     }
-    else {
-        jl_svec_t *specializations = m->specializations;
-        size_t i, l = jl_svec_len(specializations);
-        for (i = 0; i < l; i++) {
-            jl_method_instance_t *callee = (jl_method_instance_t*)jl_svecref(specializations, i);
-            if ((jl_value_t*)callee != jl_nothing)
-                collect_backedges(callee);
-        }
+    jl_svec_t *specializations = m->specializations;
+    size_t i, l = jl_svec_len(specializations);
+    for (i = 0; i < l; i++) {
+        jl_method_instance_t *callee = (jl_method_instance_t*)jl_svecref(specializations, i);
+        if ((jl_value_t*)callee != jl_nothing)
+            collect_backedges(callee, !s);
     }
     return 1;
 }
@@ -1282,8 +1239,8 @@ static void jl_collect_methtable_from_mod(jl_array_t *s, jl_methtable_t *mt) JL_
 // Also collect relevant backedges
 static void jl_collect_extext_methods_from_mod(jl_array_t *s, jl_module_t *m) JL_GC_DISABLED
 {
-    if (module_in_worklist(m))
-        return;
+    if (s && module_in_worklist(m))
+        s = NULL; // do not collect any methods
     size_t i;
     void **table = m->bindings.table;
     for (i = 1; i < m->bindings.size; i += 2) {
@@ -1298,8 +1255,10 @@ static void jl_collect_extext_methods_from_mod(jl_array_t *s, jl_module_t *m) JL
                         if (mt != NULL &&
                                 (jl_value_t*)mt != jl_nothing &&
                                 (mt != jl_type_type_mt && mt != jl_nonfunction_mt)) {
+                            assert(mt->module == tn->module);
                             jl_collect_methtable_from_mod(s, mt);
-                            jl_collect_missing_backedges(mt);
+                            if (s)
+                                jl_collect_missing_backedges(mt);
                         }
                     }
                 }
@@ -1325,134 +1284,154 @@ static void jl_collect_extext_methods_from_mod(jl_array_t *s, jl_module_t *m) JL
     }
 }
 
-static void register_backedge(htable_t *all_callees, jl_value_t *invokeTypes, jl_value_t *c)
+static void jl_record_edges(jl_method_instance_t *caller, arraylist_t *wq, jl_array_t *edges) JL_GC_DISABLED
 {
-    if (invokeTypes)
-        ptrhash_put(all_callees, invokeTypes, c);
-    else
-        ptrhash_put(all_callees, c, c);
-
-}
-
-// flatten the backedge map reachable from caller into callees
-static void jl_collect_backedges_to(jl_method_instance_t *caller, htable_t *all_callees) JL_GC_DISABLED
-{
-    if (module_in_worklist(caller->def.method->module) || method_instance_in_queue(caller))
-        return;
-    if (ptrhash_has(&edges_map, caller)) {
-        jl_array_t **pcallees = (jl_array_t**)ptrhash_bp(&edges_map, (void*)caller),
-                    *callees = *pcallees;
-        assert(callees != HT_NOTFOUND);
-        *pcallees = (jl_array_t*) HT_NOTFOUND;
-        size_t i = 0, l = jl_array_len(callees);
-        jl_method_instance_t *c;
-        jl_value_t *invokeTypes;
-        while (i < l) {
-            i = get_next_edge(callees, i, &invokeTypes, &c);
-            register_backedge(all_callees, invokeTypes, (jl_value_t*)c);
+    jl_array_t *callees = (jl_array_t*)jl_eqtable_pop(edges_map, (jl_value_t*)caller, NULL, NULL);
+    if (callees != NULL) {
+        jl_array_ptr_1d_push(edges, (jl_value_t*)caller);
+        jl_array_ptr_1d_push(edges, (jl_value_t*)callees);
+        size_t i, l = jl_array_len(callees);
+        for (i = 1; i < l; i += 2) {
+            jl_method_instance_t *c = (jl_method_instance_t*)jl_array_ptr_ref(callees, i);
             if (c && jl_is_method_instance(c)) {
-                jl_collect_backedges_to((jl_method_instance_t*)c, all_callees);
+                arraylist_push(wq, c);
             }
         }
     }
 }
 
+
 // Extract `edges` and `ext_targets` from `edges_map`
-// This identifies internal->external edges in the call graph, pulling them out for special treatment.
-static void jl_collect_backedges(jl_array_t *edges, jl_array_t *ext_targets)
-{
-    htable_t all_targets;         // target => tgtindex mapping
-    htable_t all_callees;         // MIs called by worklist methods (eff. Set{MethodInstance})
-    htable_new(&all_targets, 0);
-    htable_new(&all_callees, 0);
-    jl_value_t *invokeTypes;
-    jl_method_instance_t *c;
-    size_t i;
-    size_t world = jl_get_world_counter();
-    void **table = edges_map.table;    // edges is caller => callees
-    size_t table_size = edges_map.size;
-    for (i = 0; i < table_size; i += 2) {
-        assert(table == edges_map.table && table_size == edges_map.size &&
+// `edges` = [caller1, targets_indexes1, ...], the list of methods and their edges
+// `ext_targets` is [invokesig1, callee1, matches1, ...], the edges for each target
+static void jl_collect_edges(jl_array_t *edges, jl_array_t *ext_targets)
+{
+    size_t world = jl_atomic_load_acquire(&jl_world_counter);
+    arraylist_t wq;
+    arraylist_new(&wq, 0);
+    void **table = (void**)jl_array_data(edges_map);    // edges is caller => callees
+    size_t table_size = jl_array_len(edges_map);
+    for (size_t i = 0; i < table_size; i += 2) {
+        assert(table == jl_array_data(edges_map) && table_size == jl_array_len(edges_map) &&
                "edges_map changed during iteration");
         jl_method_instance_t *caller = (jl_method_instance_t*)table[i];
         jl_array_t *callees = (jl_array_t*)table[i + 1];
-        if (callees == HT_NOTFOUND)
+        if (callees == NULL)
             continue;
         assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method));
-        if (module_in_worklist(caller->def.method->module) || method_instance_in_queue(caller)) {
-            size_t i = 0, l = jl_array_len(callees);
-            while (i < l) {
-                i = get_next_edge(callees, i, &invokeTypes, &c);
-                register_backedge(&all_callees, invokeTypes, (jl_value_t*)c);
-                if (c && jl_is_method_instance(c)) {
-                    jl_collect_backedges_to((jl_method_instance_t*)c, &all_callees);
-                }
+        if (module_in_worklist(caller->def.method->module) ||
+            method_instance_in_queue(caller)) {
+            jl_record_edges(caller, &wq, edges);
+        }
+    }
+    while (wq.len) {
+        jl_method_instance_t *caller = (jl_method_instance_t*)arraylist_pop(&wq);
+        jl_record_edges(caller, &wq, edges);
+    }
+    arraylist_free(&wq);
+    edges_map = NULL;
+    htable_t edges_map2;
+    htable_new(&edges_map2, 0);
+    htable_t edges_ids;
+    size_t l = jl_array_len(edges);
+    htable_new(&edges_ids, l);
+    for (size_t i = 0; i < l / 2; i++) {
+        jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, i * 2);
+        void *target = (void*)((char*)HT_NOTFOUND + i + 1);
+        ptrhash_put(&edges_ids, (void*)caller, target);
+    }
+    // process target list to turn it into a memoized validity table
+    // and compute the old methods list, ready for serialization
+    jl_value_t *matches = NULL;
+    jl_array_t *callee_ids = NULL;
+    JL_GC_PUSH2(&matches, &callee_ids);
+    for (size_t i = 0; i < l; i += 2) {
+        jl_array_t *callees = (jl_array_t*)jl_array_ptr_ref(edges, i + 1);
+        size_t l = jl_array_len(callees);
+        callee_ids = jl_alloc_array_1d(jl_array_int32_type, l + 1);
+        int32_t *idxs = (int32_t*)jl_array_data(callee_ids);
+        idxs[0] = 0;
+        size_t nt = 0;
+        for (size_t j = 0; j < l; j += 2) {
+            jl_value_t *invokeTypes = jl_array_ptr_ref(callees, j);
+            jl_value_t *callee = jl_array_ptr_ref(callees, j + 1);
+            assert(callee && "unsupported edge");
+
+            if (jl_is_method_instance(callee)) {
+                jl_methtable_t *mt = jl_method_get_table(((jl_method_instance_t*)callee)->def.method);
+                if (module_in_worklist(mt->module))
+                    continue;
             }
-            callees = jl_alloc_array_1d(jl_array_int32_type, 0);
-            void **pc = all_callees.table;
-            size_t j;
-            int valid = 1;
-            int mode;
-            for (j = 0; valid && j < all_callees.size; j += 2) {
-                if (pc[j + 1] != HT_NOTFOUND) {
-                    jl_value_t *callee = (jl_value_t*)pc[j];
-                    void *target = ptrhash_get(&all_targets, (void*)callee);
-                    if (target == HT_NOTFOUND) {
-                        jl_value_t *sig;
-                        if (jl_is_method_instance(callee)) {
-                            sig = ((jl_method_instance_t*)callee)->specTypes;
-                            mode = 1;
-                        }
-                        else {
-                            sig = callee;
-                            callee = (jl_value_t*)pc[j+1];
-                            mode = 2;
-                        }
-                        size_t min_valid = 0;
-                        size_t max_valid = ~(size_t)0;
-                        int ambig = 0;
-                        jl_value_t *matches;
-                        if (mode == 2 && callee && jl_is_method_instance(callee) && jl_is_type(sig)) {
-                            // invoke, use subtyping
-                            jl_methtable_t *mt = jl_method_get_table(((jl_method_instance_t*)callee)->def.method);
-                            size_t min_world, max_world;
-                            matches = jl_gf_invoke_lookup_worlds(sig, (jl_value_t*)mt, world, &min_world, &max_world);
-                            if (matches == jl_nothing) {
-                                valid = 0;
-                                break;
-                            }
-                            matches = (jl_value_t*)((jl_method_match_t*)matches)->method;
-                        } else {
-                            matches = jl_matching_methods((jl_tupletype_t*)sig, jl_nothing, -1, 0, jl_atomic_load_acquire(&jl_world_counter), &min_valid, &max_valid, &ambig);
-                            if (matches == jl_false) {
-                                valid = 0;
-                                break;
-                            }
-                            size_t k;
-                            for (k = 0; k < jl_array_len(matches); k++) {
-                                jl_method_match_t *match = (jl_method_match_t *)jl_array_ptr_ref(matches, k);
-                                jl_array_ptr_set(matches, k, match->method);
-                            }
+
+            // (nullptr, c) => call
+            // (invokeTypes, c) => invoke
+            // (nullptr, invokeTypes) => missing call
+            // (invokeTypes, nullptr) => missing invoke (unused--inferred as Any)
+            void *target = ptrhash_get(&edges_map2, invokeTypes ? (void*)invokeTypes : (void*)callee);
+            if (target == HT_NOTFOUND) {
+                size_t min_valid = 0;
+                size_t max_valid = ~(size_t)0;
+                if (invokeTypes) {
+                    jl_methtable_t *mt = jl_method_get_table(((jl_method_instance_t*)callee)->def.method);
+                    if ((jl_value_t*)mt == jl_nothing) {
+                        callee_ids = NULL; // invalid
+                        break;
+                    }
+                    else {
+                        matches = jl_gf_invoke_lookup_worlds(invokeTypes, (jl_value_t*)mt, world, &min_valid, &max_valid);
+                        if (matches == jl_nothing) {
+                            callee_ids = NULL; // invalid
+                            break;
                         }
-                        jl_array_ptr_1d_push(ext_targets, mode == 1 ? NULL : sig);
-                        jl_array_ptr_1d_push(ext_targets, callee);
-                        jl_array_ptr_1d_push(ext_targets, matches);
-                        target = (char*)HT_NOTFOUND + jl_array_len(ext_targets) / 3;
-                        ptrhash_put(&all_targets, (void*)callee, target);
+                        matches = (jl_value_t*)((jl_method_match_t*)matches)->method;
+                    }
+                }
+                else {
+                    jl_value_t *sig;
+                    if (jl_is_method_instance(callee))
+                        sig = ((jl_method_instance_t*)callee)->specTypes;
+                    else
+                        sig = callee;
+                    int ambig = 0;
+                    matches = jl_matching_methods((jl_tupletype_t*)sig, jl_nothing,
+                            -1, 0, world, &min_valid, &max_valid, &ambig);
+                    if (matches == jl_false) {
+                        callee_ids = NULL; // invalid
+                        break;
+                    }
+                    size_t k;
+                    for (k = 0; k < jl_array_len(matches); k++) {
+                        jl_method_match_t *match = (jl_method_match_t *)jl_array_ptr_ref(matches, k);
+                        jl_array_ptr_set(matches, k, match->method);
                     }
-                    jl_array_grow_end(callees, 1);
-                    ((int32_t*)jl_array_data(callees))[jl_array_len(callees) - 1] = (char*)target - (char*)HT_NOTFOUND - 1;
                 }
+                jl_array_ptr_1d_push(ext_targets, invokeTypes);
+                jl_array_ptr_1d_push(ext_targets, callee);
+                jl_array_ptr_1d_push(ext_targets, matches);
+                target = (void*)((char*)HT_NOTFOUND + jl_array_len(ext_targets) / 3);
+                ptrhash_put(&edges_map2, (void*)callee, target);
             }
-            htable_reset(&all_callees, 100);
-            if (valid) {
-                jl_array_ptr_1d_push(edges, (jl_value_t*)caller);
-                jl_array_ptr_1d_push(edges, (jl_value_t*)callees);
+            idxs[++nt] = (char*)target - (char*)HT_NOTFOUND - 1;
+        }
+        jl_array_ptr_set(edges, i + 1, callee_ids); // swap callees for ids
+        if (!callee_ids)
+            continue;
+        idxs[0] = nt;
+        // record place of every method in edges
+        // add method edges to the callee_ids list
+        for (size_t j = 0; j < l; j += 2) {
+            jl_value_t *callee = jl_array_ptr_ref(callees, j + 1);
+            if (callee && jl_is_method_instance(callee)) {
+                void *target = ptrhash_get(&edges_ids, (void*)callee);
+                if (target != HT_NOTFOUND) {
+                    idxs[++nt] = (char*)target - (char*)HT_NOTFOUND - 1;
+                }
             }
         }
+        jl_array_del_end(callee_ids, l - nt);
     }
-    htable_free(&all_targets);
-    htable_free(&all_callees);
+    JL_GC_POP();
+    htable_free(&edges_map2);
 }
 
 // serialize information about all loaded modules
@@ -1658,12 +1637,6 @@ static jl_value_t *jl_deserialize_datatype(jl_serializer_state *s, int pos, jl_v
         backref_list.items[pos] = dtv;
         return dtv;
     }
-    if (tag == 9) {
-        jl_datatype_t *primarydt = (jl_datatype_t*)jl_deserialize_value(s, NULL);
-        jl_value_t *dtv = jl_typeof(jl_get_kwsorter((jl_value_t*)primarydt));
-        backref_list.items[pos] = dtv;
-        return dtv;
-    }
     if (!(tag == 0 || tag == 5 || tag == 10 || tag == 11 || tag == 12)) {
         assert(0 && "corrupt deserialization state");
         abort();
@@ -2381,344 +2354,384 @@ static void jl_insert_methods(jl_array_t *list)
     }
 }
 
-void remove_code_instance_from_validation(jl_code_instance_t *codeinst)
+int remove_code_instance_from_validation(jl_code_instance_t *codeinst)
 {
-    ptrhash_remove(&new_code_instance_validate, codeinst);
+    return ptrhash_remove(&new_code_instance_validate, codeinst);
 }
 
-static int do_selective_invoke_backedge_invalidation(jl_methtable_t *mt, jl_value_t *mworld, jl_method_instance_t *mi, size_t world)
-{
-    jl_value_t *invokeTypes;
-    jl_method_instance_t *caller;
-    size_t jins = 0, j0, j = 0, nbe = jl_array_len(mi->backedges);
-    while (j < nbe) {
-        j0 = j;
-        j = get_next_edge(mi->backedges, j, &invokeTypes, &caller);
-        if (invokeTypes) {
-            struct jl_typemap_assoc search = {invokeTypes, world, NULL, 0, ~(size_t)0};
-            jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(mt->defs, &search, /*offs*/0, /*subtype*/0);
-            if (entry) {
-                jl_value_t *imworld = entry->func.value;
-                if (jl_is_method(imworld) && mi->def.method == (jl_method_t*)imworld) {
-                    // this one is OK
-                    // in case we deleted some earlier ones, move this earlier
-                    for (; j0 < j; jins++, j0++) {
-                        jl_array_ptr_set(mi->backedges, jins, jl_array_ptr_ref(mi->backedges, j0));
-                    }
-                    continue;
-                }
-            }
-        }
-        invalidate_backedges(&remove_code_instance_from_validation, caller, world, "jl_insert_method_instance caller");
-        // The codeinst of this mi haven't yet been removed
-        jl_code_instance_t *codeinst = caller->cache;
-        while (codeinst) {
-            remove_code_instance_from_validation(codeinst);
-            codeinst = codeinst->next;
-        }
-    }
-    jl_array_del_end(mi->backedges, j - jins);
-    if (jins == 0) {
-        return 0;
-    }
-    return 1;
-}
-
-static void jl_insert_method_instances(jl_array_t *list) JL_GC_DISABLED
+// verify that these edges intersect with the same methods as before
+static jl_array_t *jl_verify_edges(jl_array_t *targets)
 {
-    size_t i, l = jl_array_len(list);
-    // Validate the MethodInstances
+    size_t world = jl_atomic_load_acquire(&jl_world_counter);
+    size_t i, l = jl_array_len(targets) / 3;
     jl_array_t *valids = jl_alloc_array_1d(jl_array_uint8_type, l);
     memset(jl_array_data(valids), 1, l);
-    size_t world = jl_atomic_load_acquire(&jl_world_counter);
+    jl_value_t *loctag = NULL;
+    jl_value_t *matches = NULL;
+    JL_GC_PUSH3(&valids, &matches, &loctag);
     for (i = 0; i < l; i++) {
-        jl_method_instance_t *mi = (jl_method_instance_t*)jl_array_ptr_ref(list, i);
+        jl_value_t *invokesig = jl_array_ptr_ref(targets, i * 3);
+        jl_value_t *callee = jl_array_ptr_ref(targets, i * 3 + 1);
+        jl_value_t *expected = jl_array_ptr_ref(targets, i * 3 + 2);
         int valid = 1;
-        assert(jl_is_method_instance(mi));
-        if (jl_is_method(mi->def.method)) {
-            jl_method_t *m = mi->def.method;
-            if (m->deleted_world != ~(size_t)0) {
-                // The method we depended on has been deleted, invalidate
+        size_t min_valid = 0;
+        size_t max_valid = ~(size_t)0;
+        if (invokesig) {
+            assert(callee && "unsupported edge");
+            jl_methtable_t *mt = jl_method_get_table(((jl_method_instance_t*)callee)->def.method);
+            if ((jl_value_t*)mt == jl_nothing) {
                 valid = 0;
-            } else {
-                // Is this still the method we'd be calling?
-                jl_methtable_t *mt = jl_method_table_for(mi->specTypes);
-                struct jl_typemap_assoc search = {(jl_value_t*)mi->specTypes, world, NULL, 0, ~(size_t)0};
-                jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(mt->defs, &search, /*offs*/0, /*subtype*/0);
-                if (entry) {
-                    jl_value_t *mworld = entry->func.value;
-                    if (jl_is_method(mworld) && mi->def.method != (jl_method_t*)mworld && jl_type_morespecific(((jl_method_t*)mworld)->sig, mi->def.method->sig)) {
-                        if (!mi->backedges) {
-                            valid = 0;
-                        } else {
-                            // There's still a chance this is valid, if any caller made this via `invoke` and the invoke-signature is still valid.
-                            // Selectively go through all the backedges, invalidating those not made via `invoke` and validating those that are.
-                            if (!do_selective_invoke_backedge_invalidation(mt, mworld, mi, world)) {
-                                m = (jl_method_t*)mworld;
-                                valid = 0;
-                            }
-                        }
-                    }
-                }
-            }
-            if (!valid) {
-                // None of the callers were valid, so invalidate `mi` too
-                jl_array_uint8_set(valids, i, 0);
-                invalidate_backedges(&remove_code_instance_from_validation, mi, world, "jl_insert_method_instance");
-                jl_code_instance_t *codeinst = mi->cache;
-                while (codeinst) {
-                    remove_code_instance_from_validation(codeinst);
-                    codeinst = codeinst->next;
-                }
-                if (_jl_debug_method_invalidation) {
-                    jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)m);
-                    jl_array_ptr_1d_push(_jl_debug_method_invalidation, jl_cstr_to_string("jl_method_table_insert")); // GC disabled
-                }
             }
-        }
-    }
-    // While it's tempting to just remove the invalidated MIs altogether,
-    // this hurts the ability of SnoopCompile to diagnose problems.
-    for (i = 0; i < l; i++) {
-        jl_method_instance_t *mi = (jl_method_instance_t*)jl_array_ptr_ref(list, i);
-        jl_method_instance_t *milive = jl_specializations_get_or_insert(mi);
-        ptrhash_put(&uniquing_table, mi, milive);  // store the association for the 2nd pass
-    }
-    // We may need to fix up the backedges for the ones that didn't "go live"
-    for (i = 0; i < l; i++) {
-        jl_method_instance_t *mi = (jl_method_instance_t*)jl_array_ptr_ref(list, i);
-        jl_method_instance_t *milive = (jl_method_instance_t*)ptrhash_get(&uniquing_table, mi);
-        if (milive != mi) {
-            // A previously-loaded module compiled this method, so the one we deserialized will be dropped.
-            // But make sure the backedges are copied over.
-            jl_value_t *invokeTypes;
-            jl_method_instance_t *be, *belive;
-            if (mi->backedges) {
-                if (!milive->backedges) {
-                    // Copy all the backedges (after looking up the live ones)
-                    size_t j = 0, jlive = 0, n = jl_array_len(mi->backedges);
-                    milive->backedges = jl_alloc_vec_any(n);
-                    jl_gc_wb(milive, milive->backedges);
-                    while (j < n) {
-                        j = get_next_edge(mi->backedges, j, &invokeTypes, &be);
-                        belive = (jl_method_instance_t*)ptrhash_get(&uniquing_table, be);
-                        if (belive == HT_NOTFOUND)
-                            belive = be;
-                        jlive = set_next_edge(milive->backedges, jlive, invokeTypes, belive);
-                    }
-                } else {
-                    // Copy the missing backedges (this is an O(N^2) algorithm, but many methods have few MethodInstances)
-                    size_t j = 0, k, n = jl_array_len(mi->backedges), nlive = jl_array_len(milive->backedges);
-                    jl_value_t *invokeTypes2;
-                    jl_method_instance_t *belive2;
-                    while (j < n) {
-                        j = get_next_edge(mi->backedges, j, &invokeTypes, &be);
-                        belive = (jl_method_instance_t*)ptrhash_get(&uniquing_table, be);
-                        if (belive == HT_NOTFOUND)
-                            belive = be;
-                        int found = 0;
-                        k = 0;
-                        while (k < nlive) {
-                            k = get_next_edge(milive->backedges, k, &invokeTypes2, &belive2);
-                            if (belive == belive2 && ((invokeTypes == NULL && invokeTypes2 == NULL) ||
-                                    (invokeTypes && invokeTypes2 && jl_egal(invokeTypes, invokeTypes2)))) {
-                                found = 1;
-                                break;
-                            }
-                        }
-                        if (!found)
-                            push_edge(milive->backedges, invokeTypes, belive);
-                    }
+            else {
+                matches = jl_gf_invoke_lookup_worlds(invokesig, (jl_value_t*)mt, world, &min_valid, &max_valid);
+                if (matches == jl_nothing) {
+                     valid = 0;
                 }
-            }
-            // Additionally, if we have CodeInstance(s) and the running CodeInstance is world-limited, transfer it
-            if (mi->cache && jl_array_uint8_ref(valids, i)) {
-                if (!milive->cache || milive->cache->max_world < ~(size_t)0) {
-                    jl_code_instance_t *cilive = milive->cache, *ci;
-                    milive->cache = mi->cache;
-                    jl_gc_wb(milive, milive->cache);
-                    ci = mi->cache;
-                    ci->def = milive;
-                    while (ci->next) {
-                        ci = ci->next;
-                        ci->def = milive;
+                else {
+                    matches = (jl_value_t*)((jl_method_match_t*)matches)->method;
+                    if (matches != expected) {
+                        valid = 0;
                     }
-                    ci->next = cilive;
-                    jl_gc_wb(ci, ci->next);
                 }
             }
         }
-    }
-}
-
-// verify that these edges intersect with the same methods as before
-static void jl_verify_edges(jl_array_t *targets, jl_array_t **pvalids)
-{
-    size_t i, l = jl_array_len(targets) / 3;
-    jl_array_t *valids = jl_alloc_array_1d(jl_array_uint8_type, l);
-    memset(jl_array_data(valids), 1, l);
-    jl_value_t *loctag = NULL, *matches = NULL;
-    jl_methtable_t *mt = NULL;
-    JL_GC_PUSH3(&loctag, &matches, &mt);
-    *pvalids = valids;
-    size_t world = jl_get_world_counter();
-    for (i = 0; i < l; i++) {
-        jl_value_t *invokesig = jl_array_ptr_ref(targets, i * 3);
-        jl_value_t *callee = jl_array_ptr_ref(targets, i * 3 + 1);
-        jl_method_instance_t *callee_mi = (jl_method_instance_t*)callee;
-        jl_value_t *sig;
-        if (callee && jl_is_method_instance(callee)) {
-            sig = invokesig == NULL ? callee_mi->specTypes : invokesig;
-        }
         else {
-            sig = callee == NULL ? invokesig : callee;
-        }
-        jl_value_t *expected = jl_array_ptr_ref(targets, i * 3 + 2);
-        int valid = 1;
-        size_t min_valid = 0;
-        size_t max_valid = ~(size_t)0;
-        int ambig = 0;
-        int use_invoke = invokesig == NULL || callee == NULL ? 0 : 1;
-        if (!use_invoke) {
+            jl_value_t *sig;
+            if (jl_is_method_instance(callee))
+                sig = ((jl_method_instance_t*)callee)->specTypes;
+            else
+                sig = callee;
+            assert(jl_is_array(expected));
+            int ambig = 0;
             // TODO: possibly need to included ambiguities too (for the optimizer correctness)?
-            matches = jl_matching_methods((jl_tupletype_t*)sig, jl_nothing, -1, 0, jl_atomic_load_acquire(&jl_world_counter), &min_valid, &max_valid, &ambig);
-            if (matches == jl_false || jl_array_len(matches) != jl_array_len(expected)) {
+            matches = jl_matching_methods((jl_tupletype_t*)sig, jl_nothing,
+                    -1, 0, world, &min_valid, &max_valid, &ambig);
+            if (matches == jl_false) {
                 valid = 0;
             }
             else {
-                assert(jl_is_array(expected));
-                size_t j, k, l = jl_array_len(expected);
+                // setdiff!(matches, expected)
+                size_t j, k, ins = 0;
+                if (jl_array_len(matches) != jl_array_len(expected)) {
+                    valid = 0;
+                }
                 for (k = 0; k < jl_array_len(matches); k++) {
-                    jl_method_match_t *match = (jl_method_match_t*)jl_array_ptr_ref(matches, k);
-                    jl_method_t *m = match->method;
-                    for (j = 0; j < l; j++) {
-                        if (m == (jl_method_t*)jl_array_ptr_ref(expected, j))
+                    jl_method_t *match = ((jl_method_match_t*)jl_array_ptr_ref(matches, k))->method;
+                    size_t l = jl_array_len(expected);
+                    for (j = 0; j < l; j++)
+                        if (match == (jl_method_t*)jl_array_ptr_ref(expected, j))
                             break;
-                    }
                     if (j == l) {
                         // intersection has a new method or a method was
                         // deleted--this is now probably no good, just invalidate
                         // everything about it now
                         valid = 0;
-                        break;
+                        if (!_jl_debug_method_invalidation)
+                            break;
+                        jl_array_ptr_set(matches, ins++, match);
                     }
                 }
-            }
-        } else {
-            mt = jl_method_get_table(((jl_method_instance_t*)callee)->def.method);
-            size_t min_world, max_world;
-            matches = jl_gf_invoke_lookup_worlds(invokesig, (jl_value_t*)mt, world, &min_world, &max_world);
-            if (matches == jl_nothing || expected != (jl_value_t*)((jl_method_match_t*)matches)->method) {
-                valid = 0;
+                if (!valid && _jl_debug_method_invalidation)
+                    jl_array_del_end((jl_array_t*)matches, jl_array_len(matches) - ins);
             }
         }
         jl_array_uint8_set(valids, i, valid);
         if (!valid && _jl_debug_method_invalidation) {
-            jl_array_ptr_1d_push(_jl_debug_method_invalidation, callee ? (jl_value_t*)callee : sig);
+            jl_array_ptr_1d_push(_jl_debug_method_invalidation, invokesig ? (jl_value_t*)invokesig : callee);
             loctag = jl_cstr_to_string("insert_backedges_callee");
             jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
             loctag = jl_box_int32((int32_t)i);
             jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
-            loctag = jl_box_uint64(jl_worklist_key(serializer_worklist));
-            jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
-            if (!use_invoke && matches != jl_false) {
-                // setdiff!(matches, expected)
-                size_t j, k, ins = 0;
-                for (j = 0; j < jl_array_len(matches); j++) {
-                    int found = 0;
-                    jl_method_t *match = ((jl_method_match_t*)jl_array_ptr_ref(matches, j))->method;
-                    for (k = 0; !found && k < jl_array_len(expected); k++)
-                        found |= jl_egal((jl_value_t*)match, jl_array_ptr_ref(expected, k));
-                    if (!found)
-                        jl_array_ptr_set(matches, ins++, match);
+            jl_array_ptr_1d_push(_jl_debug_method_invalidation, matches);
+        }
+        //jl_static_show((JL_STREAM*)ios_stderr, (jl_value_t*)invokesig);
+        //jl_static_show((JL_STREAM*)ios_stderr, (jl_value_t*)callee);
+        //ios_puts(valid ? "valid\n" : "INVALID\n", ios_stderr);
+    }
+    JL_GC_POP();
+    return valids;
+}
+
+// Combine all edges relevant to a method into the visited table
+void jl_verify_methods(jl_array_t *edges, jl_array_t *valids, htable_t *visited)
+{
+    jl_value_t *loctag = NULL;
+    JL_GC_PUSH1(&loctag);
+    size_t i, l = jl_array_len(edges) / 2;
+    htable_new(visited, l);
+    for (i = 0; i < l; i++) {
+        jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * i);
+        assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method));
+        jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, 2 * i + 1);
+        assert(jl_typeis((jl_value_t*)callee_ids, jl_array_int32_type));
+        int valid = 1;
+        if (callee_ids == NULL) {
+            // serializing the edges had failed
+            valid = 0;
+        }
+        else {
+            int32_t *idxs = (int32_t*)jl_array_data(callee_ids);
+            size_t j;
+            for (j = 0; valid && j < idxs[0]; j++) {
+                int32_t idx = idxs[j + 1];
+                valid = jl_array_uint8_ref(valids, idx);
+                if (!valid && _jl_debug_method_invalidation) {
+                    jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)caller);
+                    loctag = jl_cstr_to_string("verify_methods");
+                    jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
+                    loctag = jl_box_int32((int32_t)idx);
+                    jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
                 }
-                jl_array_del_end((jl_array_t*)matches, jl_array_len(matches) - ins);
             }
-            jl_array_ptr_1d_push(_jl_debug_method_invalidation, matches);
         }
+        ptrhash_put(visited, caller, (void*)(((char*)HT_NOTFOUND) + valid + 1));
+        //jl_static_show((JL_STREAM*)ios_stderr, (jl_value_t*)caller);
+        //ios_puts(valid ? "valid\n" : "INVALID\n", ios_stderr);
+        // HT_NOTFOUND: valid (no invalid edges)
+        // HT_NOTFOUND + 1: invalid
+        // HT_NOTFOUND + 2: need to scan
+        // HT_NOTFOUND + 3 + depth: in-progress
     }
     JL_GC_POP();
 }
 
+
+// Propagate the result of cycle-resolution to all edges (recursively)
+static int mark_edges_in_worklist(jl_array_t *edges, int idx, jl_method_instance_t *cycle, htable_t *visited, int found)
+{
+    jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, idx * 2);
+    int oldfound = (char*)ptrhash_get(visited, caller) - (char*)HT_NOTFOUND;
+    if (oldfound < 3)
+        return 0; // not in-progress
+    if (!found) {
+        ptrhash_remove(visited, (void*)caller);
+    }
+    else {
+        ptrhash_put(visited, (void*)caller, (void*)((char*)HT_NOTFOUND + 1 + found));
+    }
+    jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, idx * 2 + 1);
+    assert(jl_typeis((jl_value_t*)callee_ids, jl_array_int32_type));
+    int32_t *idxs = (int32_t*)jl_array_data(callee_ids);
+    size_t i, badidx = 0, n = jl_array_len(callee_ids);
+    for (i = idxs[0] + 1; i < n; i++) {
+        if (mark_edges_in_worklist(edges, idxs[i], cycle, visited, found) && badidx == 0)
+            badidx = i - idxs[0];
+    }
+    if (_jl_debug_method_invalidation) {
+        jl_value_t *loctag = NULL;
+        JL_GC_PUSH1(&loctag);
+        jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)caller);
+        loctag = jl_cstr_to_string("verify_methods");
+        jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
+        jl_method_instance_t *callee = cycle;
+        if (badidx--)
+            callee = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * badidx);
+        jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)callee);
+        JL_GC_POP();
+    }
+    return 1;
+}
+
+
+// Visit the entire call graph, starting from edges[idx] to determine if that method is valid
+static int jl_verify_graph_edge(jl_array_t *edges, int idx, htable_t *visited, int depth)
+{
+    jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, idx * 2);
+    assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method));
+    int found = (char*)ptrhash_get(visited, (void*)caller) - (char*)HT_NOTFOUND;
+    if (found == 0)
+        return 1; // valid
+    if (found == 1)
+        return 0; // invalid
+    if (found != 2)
+        return found - 1; // depth
+    found = 0;
+    ptrhash_put(visited, (void*)caller, (void*)((char*)HT_NOTFOUND + 3 + depth)); // change 2 to in-progress at depth
+    jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, idx * 2 + 1);
+    assert(jl_typeis((jl_value_t*)callee_ids, jl_array_int32_type));
+    int32_t *idxs = (int32_t*)jl_array_data(callee_ids);
+    int cycle = 0;
+    size_t i, n = jl_array_len(callee_ids);
+    for (i = idxs[0] + 1; i < n; i++) {
+        int32_t idx = idxs[i];
+        int child_found = jl_verify_graph_edge(edges, idx, visited, depth + 1);
+        if (child_found == 0) {
+            found = 1;
+            if (_jl_debug_method_invalidation) {
+                jl_value_t *loctag = NULL;
+                JL_GC_PUSH1(&loctag);
+                jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)caller);
+                loctag = jl_cstr_to_string("verify_methods");
+                jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
+                jl_array_ptr_1d_push(_jl_debug_method_invalidation, jl_array_ptr_ref(edges, idx * 2));
+                JL_GC_POP();
+            }
+            break;
+        }
+        else if (child_found >= 2 && child_found - 2 < cycle) {
+            // record the cycle will resolve at depth "cycle"
+            cycle = child_found - 2;
+            assert(cycle);
+        }
+    }
+    if (!found) {
+        if (cycle && cycle != depth)
+            return cycle + 2;
+        ptrhash_remove(visited, (void*)caller);
+    }
+    else { // found invalid
+        ptrhash_put(visited, (void*)caller, (void*)((char*)HT_NOTFOUND + 1 + found));
+    }
+    if (cycle) {
+        // If we are the top of the current cycle, now mark all other parts of
+        // our cycle by re-walking the backedges graph and marking all WIP
+        // items as found.
+        // Be careful to only re-walk as far as we had originally scanned above.
+        // Or if we found a backedge, also mark all of the other parts of the
+        // cycle as also having an backedge.
+        n = i;
+        for (i = idxs[0] + 1; i < n; i++) {
+            mark_edges_in_worklist(edges, idxs[i], caller, visited, found);
+        }
+    }
+    return found ? 0 : 1;
+}
+
+// Visit all entries in edges, verify if they are valid
+static jl_array_t *jl_verify_graph(jl_array_t *edges, htable_t *visited)
+{
+    size_t i, n = jl_array_len(edges) / 2;
+    jl_array_t *valids = jl_alloc_array_1d(jl_array_uint8_type, n);
+    JL_GC_PUSH1(&valids);
+    int8_t *valids_data = (int8_t*)jl_array_data(valids);
+    for (i = 0; i < n; i++) {
+        valids_data[i] = jl_verify_graph_edge(edges, i, visited, 1);
+    }
+    JL_GC_POP();
+    return valids;
+}
+
 // Restore backedges to external targets
 // `edges` = [caller1, targets_indexes1, ...], the list of worklist-owned methods calling external methods.
 // `ext_targets` is [invokesig1, callee1, matches1, ...], the global set of non-worklist callees of worklist-owned methods.
-static void jl_insert_backedges(jl_array_t *edges, jl_array_t *ext_targets)
+static void jl_insert_backedges(jl_array_t *edges, jl_array_t *ext_targets, jl_array_t *mi_list)
 {
-    // foreach(enable, ((edges[2i-1] => ext_targets[edges[2i] .* 3]) for i in 1:length(edges)÷2 if all(valids[edges[2i]])))
-    size_t i, l = jl_array_len(edges);
+    // determine which CodeInstance objects are still valid in our image
     size_t world = jl_atomic_load_acquire(&jl_world_counter);
-    jl_array_t *valids = NULL;
-    jl_value_t *targetidx = NULL;
-    JL_GC_PUSH2(&valids, &targetidx);
-    jl_verify_edges(ext_targets, &valids);
-    for (i = 0; i < l; i += 2) {
-        jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, i);
+    jl_array_t *valids = jl_verify_edges(ext_targets);
+    JL_GC_PUSH1(&valids);
+    htable_t visited;
+    htable_new(&visited, 0);
+    jl_verify_methods(edges, valids, &visited);
+    valids = jl_verify_graph(edges, &visited);
+    size_t i, l = jl_array_len(edges) / 2;
+
+    // next build a map from external_mis to their CodeInstance for insertion
+    if (mi_list == NULL) {
+        htable_reset(&visited, 0);
+    }
+    else {
+        size_t i, l = jl_array_len(mi_list);
+        htable_reset(&visited, l);
+        for (i = 0; i < l; i++) {
+            jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(mi_list, i);
+            ptrhash_put(&visited, (void*)ci->def, (void*)ci);
+        }
+    }
+
+    // next disable any invalid codes, so we do not try to enable them
+    for (i = 0; i < l; i++) {
+        jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * i);
         assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method));
-        jl_array_t *idxs_array = (jl_array_t*)jl_array_ptr_ref(edges, i + 1);
-        assert(jl_isa((jl_value_t*)idxs_array, jl_array_int32_type));
-        int32_t *idxs = (int32_t*)jl_array_data(idxs_array);
-        int valid = 1;
-        size_t j, idxbad = -1;
-        for (j = 0; valid && j < jl_array_len(idxs_array); j++) {
-            int32_t idx = idxs[j];
-            valid = jl_array_uint8_ref(valids, idx);
-            if (!valid)
-                idxbad = idx;
-        }
-        if (valid) {
-            // if this callee is still valid, add all the backedges
-            for (j = 0; j < jl_array_len(idxs_array); j++) {
-                int32_t idx = idxs[j];
-                jl_value_t *callee = jl_array_ptr_ref(ext_targets, idx * 3 + 1);
-                if (callee && jl_is_method_instance(callee)) {
-                    jl_value_t *invokesig = jl_array_ptr_ref(ext_targets, idx * 3);
-                    jl_method_instance_add_backedge((jl_method_instance_t*)callee, invokesig, caller);
-                }
-                else {
-                    jl_value_t *sig = callee == NULL ? jl_array_ptr_ref(ext_targets, idx * 3) : callee;
-                    jl_methtable_t *mt = jl_method_table_for(sig);
-                    // FIXME: rarely, `callee` has an unexpected `Union` signature,
-                    // see https://github.com/JuliaLang/julia/pull/43990#issuecomment-1030329344
-                    // Fix the issue and turn this back into an `assert((jl_value_t*)mt != jl_nothing)`
-                    // This workaround exposes us to (rare) 265-violations.
-                    if ((jl_value_t*)mt != jl_nothing)
-                        jl_method_table_add_backedge(mt, sig, (jl_value_t*)caller);
-                }
-            }
-            // then enable it
+        int valid = jl_array_uint8_ref(valids, i);
+        if (valid)
+            continue;
+        void *ci = ptrhash_get(&visited, (void*)caller);
+        if (ci != HT_NOTFOUND) {
+            assert(jl_is_code_instance(ci));
+            remove_code_instance_from_validation((jl_code_instance_t*)ci); // mark it as handled
+        }
+        else {
             jl_code_instance_t *codeinst = caller->cache;
             while (codeinst) {
-                if (ptrhash_get(&new_code_instance_validate, codeinst) != HT_NOTFOUND && codeinst->min_world > 0)
-                    codeinst->max_world = ~(size_t)0;
-                ptrhash_remove(&new_code_instance_validate, codeinst);  // mark it as handled
+                remove_code_instance_from_validation(codeinst); // should be left invalid
                 codeinst = jl_atomic_load_relaxed(&codeinst->next);
             }
         }
+    }
+
+    // finally enable any applicable new codes
+    for (i = 0; i < l; i++) {
+        jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * i);
+        int valid = jl_array_uint8_ref(valids, i);
+        if (!valid)
+            continue;
+        // if this callee is still valid, add all the backedges
+        jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, 2 * i + 1);
+        int32_t *idxs = (int32_t*)jl_array_data(callee_ids);
+        for (size_t j = 0; j < idxs[0]; j++) {
+            int32_t idx = idxs[j + 1];
+            jl_value_t *invokesig = jl_array_ptr_ref(ext_targets, idx * 3);
+            jl_value_t *callee = jl_array_ptr_ref(ext_targets, idx * 3 + 1);
+            if (callee && jl_is_method_instance(callee)) {
+                jl_method_instance_add_backedge((jl_method_instance_t*)callee, invokesig, caller);
+            }
+            else {
+                jl_value_t *sig = callee == NULL ? invokesig : callee;
+                jl_methtable_t *mt = jl_method_table_for(sig);
+                // FIXME: rarely, `callee` has an unexpected `Union` signature,
+                // see https://github.com/JuliaLang/julia/pull/43990#issuecomment-1030329344
+                // Fix the issue and turn this back into an `assert((jl_value_t*)mt != jl_nothing)`
+                // This workaround exposes us to (rare) 265-violations.
+                if ((jl_value_t*)mt != jl_nothing)
+                    jl_method_table_add_backedge(mt, sig, (jl_value_t*)caller);
+            }
+        }
+        // then enable it
+        void *ci = ptrhash_get(&visited, (void*)caller);
+        if (ci != HT_NOTFOUND) {
+            // have some new external code to use
+            assert(jl_is_code_instance(ci));
+            jl_code_instance_t *codeinst = (jl_code_instance_t*)ci;
+            remove_code_instance_from_validation(codeinst); // mark it as handled
+            assert(codeinst->min_world >= world && codeinst->inferred);
+            codeinst->max_world = ~(size_t)0;
+            if (jl_rettype_inferred(caller, world, ~(size_t)0) == jl_nothing) {
+                jl_mi_cache_insert(caller, codeinst);
+            }
+        }
         else {
             jl_code_instance_t *codeinst = caller->cache;
             while (codeinst) {
-                ptrhash_remove(&new_code_instance_validate, codeinst);  // should be left invalid
+                if (remove_code_instance_from_validation(codeinst)) { // mark it as handled
+                    assert(codeinst->min_world >= world && codeinst->inferred);
+                    codeinst->max_world = ~(size_t)0;
+                }
                 codeinst = jl_atomic_load_relaxed(&codeinst->next);
             }
-            invalidate_backedges(&remove_code_instance_from_validation, caller, world, "insert_backedges");
-            if (_jl_debug_method_invalidation) {
-                targetidx = jl_box_int32((int32_t)idxbad);
-                jl_array_ptr_1d_push(_jl_debug_method_invalidation, targetidx);
-                targetidx = jl_box_uint64(jl_worklist_key(serializer_worklist));
-                jl_array_ptr_1d_push(_jl_debug_method_invalidation, targetidx);
-            }
         }
     }
+
+    htable_free(&visited);
     JL_GC_POP();
 }
 
 static void validate_new_code_instances(void)
 {
+    size_t world = jl_atomic_load_acquire(&jl_world_counter);
     size_t i;
     for (i = 0; i < new_code_instance_validate.size; i += 2) {
         if (new_code_instance_validate.table[i+1] != HT_NOTFOUND) {
-            ((jl_code_instance_t*)new_code_instance_validate.table[i])->max_world = ~(size_t)0;
+            jl_code_instance_t *ci = (jl_code_instance_t*)new_code_instance_validate.table[i];
+            JL_GC_PROMISE_ROOTED(ci); // TODO: this needs a root (or restructuring to avoid it)
+            assert(ci->min_world >= world && ci->inferred);
+            ci->max_world = ~(size_t)0;
+            jl_method_instance_t *caller = ci->def;
+            if (jl_rettype_inferred(caller, world, ~(size_t)0) == jl_nothing) {
+                jl_mi_cache_insert(caller, ci);
+            }
+            //jl_static_show((JL_STREAM*)ios_stderr, (jl_value_t*)caller);
+            //ios_puts("FREE\n", ios_stderr);
         }
     }
 }
@@ -2893,13 +2906,18 @@ JL_DLLEXPORT void jl_set_newly_inferred(jl_value_t* _newly_inferred)
 JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist)
 {
     JL_TIMING(SAVE_MODULE);
+    jl_task_t *ct = jl_current_task;
     ios_t f;
-    jl_array_t *mod_array = NULL, *udeps = NULL;
     if (ios_file(&f, fname, 1, 1, 1, 1) == NULL) {
         jl_printf(JL_STDERR, "Cannot open cache file \"%s\" for writing.\n", fname);
         return 1;
     }
-    JL_GC_PUSH2(&mod_array, &udeps);
+
+    jl_array_t *mod_array = NULL, *udeps = NULL;
+    jl_array_t *extext_methods = NULL, *mi_list = NULL;
+    jl_array_t *ext_targets = NULL, *edges = NULL;
+    JL_GC_PUSH7(&mod_array, &udeps, &extext_methods, &mi_list, &ext_targets, &edges, &edges_map);
+
     mod_array = jl_get_loaded_modules();  // __toplevel__ modules loaded in this session (from Base.loaded_modules_array)
     assert(jl_precompile_toplevel_module == NULL);
     jl_precompile_toplevel_module = (jl_module_t*)jl_array_ptr_ref(worklist, jl_array_len(worklist)-1);
@@ -2918,7 +2936,6 @@ JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist)
     write_mod_list(&f, mod_array);
 
     arraylist_new(&reinit_list, 0);
-    htable_new(&edges_map, 0);
     htable_new(&backref_table, 5000);
     htable_new(&external_mis, newly_inferred ? jl_array_len(newly_inferred) : 0);
     ptrhash_put(&backref_table, jl_main_module, (char*)HT_NOTFOUND + 1);
@@ -2931,18 +2948,14 @@ JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist)
                                                     jl_symbol("BITS_PER_LIMB"))) / 8;
     }
 
-    int en = jl_gc_enable(0); // edges map is not gc-safe
-    jl_array_t *extext_methods = jl_alloc_vec_any(0);  // [method1, simplesig1, ...], worklist-owned "extending external" methods added to functions owned by modules outside the worklist
-    jl_array_t *ext_targets = jl_alloc_vec_any(0);     // [invokesig1, callee1, matches1, ...] non-worklist callees of worklist-owned methods
-                                                       // ordinary dispatch: invokesig=NULL, callee is MethodInstance
-                                                       // `invoke` dispatch: invokesig is signature, callee is MethodInstance
-                                                       // abstract call: callee is signature
-    jl_array_t *edges = jl_alloc_vec_any(0);           // [caller1, ext_targets_indexes1, ...] for worklist-owned methods calling external methods
+    jl_gc_enable_finalizers(ct, 0); // make sure we don't run any Julia code concurrently after this point
 
-    int n_ext_mis = queue_external_mis(newly_inferred);
+    // Save the inferred code from newly inferred, external methods
+    mi_list = queue_external_mis(newly_inferred);
 
-    size_t i;
-    size_t len = jl_array_len(mod_array);
+    edges_map = jl_alloc_vec_any(0);
+    extext_methods = jl_alloc_vec_any(0);  // [method1, simplesig1, ...], worklist-owned "extending external" methods added to functions owned by modules outside the worklist
+    size_t i, len = jl_array_len(mod_array);
     for (i = 0; i < len; i++) {
         jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(mod_array, i);
         assert(jl_is_module(m));
@@ -2953,10 +2966,14 @@ JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist)
     jl_collect_missing_backedges(jl_type_type_mt);
     jl_collect_methtable_from_mod(extext_methods, jl_nonfunction_mt);
     jl_collect_missing_backedges(jl_nonfunction_mt);
-
-    // jl_collect_extext_methods_from_mod and jl_collect_missing_backedges accumulate data in edges_map.
+    // jl_collect_extext_methods_from_mod and jl_collect_missing_backedges also accumulate data in edges_map.
     // Process this to extract `edges` and `ext_targets`.
-    jl_collect_backedges(edges, ext_targets);
+    ext_targets = jl_alloc_vec_any(0);     // [invokesig1, callee1, matches1, ...] non-worklist callees of worklist-owned methods
+                                                       // ordinary dispatch: invokesig=NULL, callee is MethodInstance
+                                                       // `invoke` dispatch: invokesig is signature, callee is MethodInstance
+                                                       // abstract call: callee is signature
+    edges = jl_alloc_vec_any(0);           // [caller1, ext_targets_indexes1, ...] for worklist-owned methods calling external methods
+    jl_collect_edges(edges, ext_targets);
 
     jl_serializer_state s = {
         &f,
@@ -2965,21 +2982,20 @@ JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist)
     };
     jl_serialize_value(&s, worklist);   // serialize module-owned items (those accessible from the bindings table)
     jl_serialize_value(&s, extext_methods);  // serialize new worklist-owned methods for external functions
-    serialize_htable_keys(&s, &external_mis, n_ext_mis);  // serialize external MethodInstances
 
-    // The next two allow us to restore backedges from external "unserialized" (stub-serialized) MethodInstances
-    // to the ones we serialize here
+    // The next three allow us to restore code instances, if still valid
+    jl_serialize_value(&s, mi_list);
     jl_serialize_value(&s, edges);
     jl_serialize_value(&s, ext_targets);
     jl_finalize_serializer(&s);
     serializer_worklist = NULL;
 
-    jl_gc_enable(en);
-    htable_reset(&edges_map, 0);
-    htable_reset(&backref_table, 0);
-    htable_reset(&external_mis, 0);
+    htable_free(&backref_table);
+    htable_free(&external_mis);
     arraylist_free(&reinit_list);
 
+    jl_gc_enable_finalizers(ct, 1); // make sure we don't run any Julia code concurrently before this point
+
     // Write the source-text for the dependent files
     if (udeps) {
         // Go back and update the source-text position to point to the current position
@@ -3359,15 +3375,11 @@ static jl_value_t *_jl_restore_incremental(ios_t *f, jl_array_t *mod_array)
     };
     jl_array_t *restored = (jl_array_t*)jl_deserialize_value(&s, (jl_value_t**)&restored);
     serializer_worklist = restored;
-    assert(jl_isa((jl_value_t*)restored, jl_array_any_type));
+    assert(jl_typeis((jl_value_t*)restored, jl_array_any_type));
 
     // See explanation in jl_save_incremental for variables of the same names
     jl_value_t *extext_methods = jl_deserialize_value(&s, &extext_methods);
-    int i, n_ext_mis = read_int32(s.s);
-    jl_array_t *mi_list = jl_alloc_vec_any(n_ext_mis);   // reload MIs stored by serialize_htable_keys
-    jl_value_t **midata = (jl_value_t**)jl_array_data(mi_list);
-    for (i = 0; i < n_ext_mis; i++)
-        midata[i] = jl_deserialize_value(&s, &(midata[i]));
+    jl_value_t *mi_list = jl_deserialize_value(&s, &mi_list); // reload MIs stored by queue_external_mis
     jl_value_t *edges = jl_deserialize_value(&s, &edges);
     jl_value_t *ext_targets = jl_deserialize_value(&s, &ext_targets);
 
@@ -3381,19 +3393,16 @@ static jl_value_t *_jl_restore_incremental(ios_t *f, jl_array_t *mod_array)
     jl_insert_methods((jl_array_t*)extext_methods); // hook up extension methods for external generic functions (needs to be after recache types)
     jl_recache_other(); // make all of the other objects identities correct (needs to be after insert methods)
     jl_copy_roots();    // copying new roots of external methods (must wait until recaching is complete)
-    // At this point, the novel specializations in mi_list reference the real method, but they haven't been cached in its specializations
-    jl_insert_method_instances(mi_list);   // insert novel specializations
     htable_free(&uniquing_table);
     jl_array_t *init_order = jl_finalize_deserializer(&s, tracee_list); // done with f and s (needs to be after recache)
     if (init_order == NULL)
         init_order = (jl_array_t*)jl_an_empty_vec_any;
-    assert(jl_isa((jl_value_t*)init_order, jl_array_any_type));
+    assert(jl_typeis((jl_value_t*)init_order, jl_array_any_type));
 
-    JL_GC_PUSH4(&init_order, &restored, &edges, &ext_targets);
+    JL_GC_PUSH5(&init_order, &restored, &edges, &ext_targets, &mi_list);
     jl_gc_enable(en); // subtyping can allocate a lot, not valid before recache-other
 
-    jl_insert_backedges((jl_array_t*)edges, (jl_array_t*)ext_targets); // restore external backedges (needs to be last)
-
+    jl_insert_backedges((jl_array_t*)edges, (jl_array_t*)ext_targets, (jl_array_t*)mi_list); // restore external backedges (needs to be last)
     // check new CodeInstances and validate any that lack external backedges
     validate_new_code_instances();
 
diff --git a/src/gc-alloc-profiler.cpp b/src/gc-alloc-profiler.cpp
index 818d6e803c9df..1bcbeb2189f5f 100644
--- a/src/gc-alloc-profiler.cpp
+++ b/src/gc-alloc-profiler.cpp
@@ -80,7 +80,8 @@ extern "C" {  // Needed since these functions doesn't take any arguments.
 
 JL_DLLEXPORT void jl_start_alloc_profile(double sample_rate) {
     // We only need to do this once, the first time this is called.
-    while (g_alloc_profile.per_thread_profiles.size() < (size_t)jl_n_threads) {
+    size_t nthreads = jl_atomic_load_acquire(&jl_n_threads);
+    while (g_alloc_profile.per_thread_profiles.size() < nthreads) {
         g_alloc_profile.per_thread_profiles.push_back(jl_per_thread_alloc_profile_t{});
     }
 
@@ -131,7 +132,10 @@ JL_DLLEXPORT void jl_free_alloc_profile() {
 
 void _maybe_record_alloc_to_profile(jl_value_t *val, size_t size, jl_datatype_t *type) JL_NOTSAFEPOINT {
     auto& global_profile = g_alloc_profile;
-    auto thread_id = jl_atomic_load_relaxed(&jl_current_task->tid);
+    size_t thread_id = jl_atomic_load_relaxed(&jl_current_task->tid);
+    if (thread_id >= global_profile.per_thread_profiles.size())
+        return; // ignore allocations on threads started after the alloc-profile started
+
     auto& profile = global_profile.per_thread_profiles[thread_id];
 
     auto sample_val = double(rand()) / double(RAND_MAX);
diff --git a/src/gc-debug.c b/src/gc-debug.c
index 5d42da196ccf8..aa9dd5abda01b 100644
--- a/src/gc-debug.c
+++ b/src/gc-debug.c
@@ -99,7 +99,7 @@ static arraylist_t bits_save[4];
 
 static void gc_clear_mark_page(jl_gc_pagemeta_t *pg, int bits)
 {
-    jl_ptls_t ptls2 = jl_all_tls_states[pg->thread_n];
+    jl_ptls_t ptls2 = gc_all_tls_states[pg->thread_n];
     jl_gc_pool_t *pool = &ptls2->heap.norm_pools[pg->pool_n];
     jl_taggedvalue_t *pv = (jl_taggedvalue_t*)(pg->data + GC_PAGE_OFFSET);
     char *lim = (char*)pv + GC_PAGE_SZ - GC_PAGE_OFFSET - pool->osize;
@@ -164,8 +164,8 @@ static void clear_mark(int bits)
         }
     }
     bigval_t *v;
-    for (int i = 0;i < jl_n_threads;i++) {
-        v = jl_all_tls_states[i]->heap.big_objects;
+    for (int i = 0; i < gc_n_threads; i++) {
+        v = gc_all_tls_states[i]->heap.big_objects;
         while (v != NULL) {
             void *gcv = &v->header;
             if (!gc_verifying)
@@ -207,8 +207,8 @@ static void gc_verify_track(jl_ptls_t ptls)
         clear_mark(GC_CLEAN);
         gc_mark_queue_all_roots(ptls, &sp);
         gc_mark_queue_finlist(gc_cache, &sp, &to_finalize, 0);
-        for (int i = 0;i < jl_n_threads;i++) {
-            jl_ptls_t ptls2 = jl_all_tls_states[i];
+        for (int i = 0; i < gc_n_threads; i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[i];
             gc_mark_queue_finlist(gc_cache, &sp, &ptls2->finalizers, 0);
         }
         gc_mark_queue_finlist(gc_cache, &sp, &finalizer_list_marked, 0);
@@ -256,8 +256,8 @@ void gc_verify(jl_ptls_t ptls)
     gc_verifying = 1;
     gc_mark_queue_all_roots(ptls, &sp);
     gc_mark_queue_finlist(gc_cache, &sp, &to_finalize, 0);
-    for (int i = 0;i < jl_n_threads;i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[i];
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
         gc_mark_queue_finlist(gc_cache, &sp, &ptls2->finalizers, 0);
     }
     gc_mark_queue_finlist(gc_cache, &sp, &finalizer_list_marked, 0);
@@ -297,7 +297,7 @@ static void gc_verify_tags_page(jl_gc_pagemeta_t *pg)
     // for all pages in use
     int p_n = pg->pool_n;
     int t_n = pg->thread_n;
-    jl_ptls_t ptls2 = jl_all_tls_states[t_n];
+    jl_ptls_t ptls2 = gc_all_tls_states[t_n];
     jl_gc_pool_t *p = &ptls2->heap.norm_pools[p_n];
     int osize = pg->osize;
     char *data = pg->data;
@@ -401,8 +401,8 @@ static void gc_verify_tags_pagetable(void)
 void gc_verify_tags(void)
 {
     // verify the freelist chains look valid
-    for (int t_i = 0; t_i < jl_n_threads; t_i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[t_i];
+    for (int t_i = 0; t_i < gc_n_threads; t_i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
         for (int i = 0; i < JL_GC_N_POOLS; i++) {
             // for all pools, iterate its freelist
             jl_gc_pool_t *p = &ptls2->heap.norm_pools[i];
@@ -467,7 +467,7 @@ static void gc_debug_alloc_init(jl_alloc_num_t *num, const char *name)
         return;
     if (*env == 'r') {
         env++;
-        for (int i = 0;i < 3;i++) {
+        for (int i = 0; i < 3; i++) {
             while (num->random[i] == 0) {
                 num->random[i] = jl_rand();
             }
@@ -577,7 +577,7 @@ static void gc_scrub_task(jl_task_t *ta)
     jl_ptls_t ptls = jl_current_task->ptls;
     jl_ptls_t ptls2 = NULL;
     if (tid != -1)
-        ptls2 = jl_all_tls_states[tid];
+        ptls2 = gc_all_tls_states[tid];
 
     char *low;
     char *high;
@@ -946,8 +946,8 @@ void gc_time_mark_pause(int64_t t0, int64_t scanned_bytes,
 {
     int64_t last_remset_len = 0;
     int64_t remset_nptr = 0;
-    for (int t_i = 0;t_i < jl_n_threads;t_i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[t_i];
+    for (int t_i = 0; t_i < gc_n_threads; t_i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
         last_remset_len += ptls2->heap.last_remset->len;
         remset_nptr = ptls2->heap.remset_nptr;
     }
@@ -1023,7 +1023,7 @@ void jl_gc_debug_init(void)
 #endif
 
 #ifdef OBJPROFILE
-    for (int g = 0;g < 3;g++) {
+    for (int g = 0; g < 3; g++) {
         htable_new(&obj_counts[g], 0);
         htable_new(&obj_sizes[g], 0);
     }
@@ -1085,8 +1085,8 @@ void gc_stats_all_pool(void)
 {
     size_t nb=0, w, tw=0, no=0, tp=0, nold=0, noldbytes=0, np, nol;
     for (int i = 0; i < JL_GC_N_POOLS; i++) {
-        for (int t_i = 0; t_i < jl_n_threads; t_i++) {
-            jl_ptls_t ptls2 = jl_all_tls_states[t_i];
+        for (int t_i = 0; t_i < gc_n_threads; t_i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[t_i];
             size_t b = pool_stats(&ptls2->heap.norm_pools[i], &w, &np, &nol);
             nb += b;
             no += (b / ptls2->heap.norm_pools[i].osize);
@@ -1110,8 +1110,8 @@ void gc_stats_all_pool(void)
 void gc_stats_big_obj(void)
 {
     size_t nused=0, nbytes=0, nused_old=0, nbytes_old=0;
-    for (int t_i = 0; t_i < jl_n_threads; t_i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[t_i];
+    for (int t_i = 0; t_i < gc_n_threads; t_i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
         bigval_t *v = ptls2->heap.big_objects;
         while (v != NULL) {
             if (gc_marked(v->bits.gc)) {
@@ -1219,7 +1219,7 @@ void gc_count_pool(void)
     empty_pages = 0;
     gc_count_pool_pagetable();
     jl_safe_printf("****** Pool stat: ******\n");
-    for (int i = 0;i < 4;i++)
+    for (int i = 0; i < 4; i++)
         jl_safe_printf("bits(%d): %"  PRId64 "\n", i, poolobj_sizes[i]);
     // empty_pages is inaccurate after the sweep since young objects are
     // also GC_CLEAN
@@ -1227,20 +1227,17 @@ void gc_count_pool(void)
     jl_safe_printf("************************\n");
 }
 
-int gc_slot_to_fieldidx(void *obj, void *slot)
+int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT
 {
-    jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj);
     int nf = (int)jl_datatype_nfields(vt);
-    for (int i = 0; i < nf; i++) {
-        void *fieldaddr = (char*)obj + jl_field_offset(vt, i);
-        if (fieldaddr >= slot) {
-            return i;
-        }
+    for (int i = 1; i < nf; i++) {
+        if (slot < (void*)((char*)obj + jl_field_offset(vt, i)))
+            return i - 1;
     }
-    return -1;
+    return nf - 1;
 }
 
-int gc_slot_to_arrayidx(void *obj, void *_slot)
+int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT
 {
     char *slot = (char*)_slot;
     jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj);
@@ -1258,8 +1255,6 @@ int gc_slot_to_arrayidx(void *obj, void *_slot)
     }
     else if (vt->name == jl_array_typename) {
         jl_array_t *a = (jl_array_t*)obj;
-        if (!a->flags.ptrarray)
-            return -1;
         start = (char*)a->data;
         len = jl_array_len(a);
         elsize = a->elsize;
diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp
new file mode 100644
index 0000000000000..c898e27a48ea7
--- /dev/null
+++ b/src/gc-heap-snapshot.cpp
@@ -0,0 +1,506 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "gc-heap-snapshot.h"
+
+#include "julia_internal.h"
+#include "gc.h"
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/DenseMap.h"
+
+#include <vector>
+#include <string>
+#include <sstream>
+
+using std::vector;
+using std::string;
+using std::ostringstream;
+using std::pair;
+using std::make_pair;
+using llvm::StringMap;
+using llvm::DenseMap;
+using llvm::StringRef;
+
+// https://stackoverflow.com/a/33799784/751061
+void print_str_escape_json(ios_t *stream, StringRef s)
+{
+    ios_putc('"', stream);
+    for (auto c = s.begin(); c != s.end(); c++) {
+        switch (*c) {
+        case '"':  ios_write(stream, "\\\"", 2); break;
+        case '\\': ios_write(stream, "\\\\", 2); break;
+        case '\b': ios_write(stream, "\\b",  2); break;
+        case '\f': ios_write(stream, "\\f",  2); break;
+        case '\n': ios_write(stream, "\\n",  2); break;
+        case '\r': ios_write(stream, "\\r",  2); break;
+        case '\t': ios_write(stream, "\\t",  2); break;
+        default:
+            if (('\x00' <= *c) & (*c <= '\x1f')) {
+                ios_printf(stream, "\\u%04x", (int)*c);
+            }
+            else {
+                ios_putc(*c, stream);
+            }
+        }
+    }
+    ios_putc('"', stream);
+}
+
+
+// Edges
+// "edge_fields":
+//   [ "type", "name_or_index", "to_node" ]
+// mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2598-L2601
+
+struct Edge {
+    size_t type; // These *must* match the Enums on the JS side; control interpretation of name_or_index.
+    size_t name_or_index; // name of the field (for objects/modules) or index of array
+    size_t to_node;
+};
+
+// Nodes
+// "node_fields":
+//   [ "type", "name", "id", "self_size", "edge_count", "trace_node_id", "detachedness" ]
+// mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2568-L2575
+
+const int k_node_number_of_fields = 7;
+struct Node {
+    size_t type; // index into snapshot->node_types
+    size_t name;
+    size_t id; // This should be a globally-unique counter, but we use the memory address
+    size_t self_size;
+    size_t trace_node_id;  // This is ALWAYS 0 in Javascript heap-snapshots.
+    // whether the from_node is attached or dettached from the main application state
+    // https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/include/v8-profiler.h#L739-L745
+    int detachedness;  // 0 - unknown, 1 - attached, 2 - detached
+    vector<Edge> edges;
+
+    ~Node() JL_NOTSAFEPOINT = default;
+};
+
+struct StringTable {
+    StringMap<size_t> map;
+    vector<StringRef> strings;
+
+    size_t find_or_create_string_id(StringRef key) JL_NOTSAFEPOINT {
+        auto val = map.insert(make_pair(key, map.size()));
+        if (val.second)
+            strings.push_back(val.first->first());
+        return val.first->second;
+    }
+
+    void print_json_array(ios_t *stream, bool newlines) {
+        ios_printf(stream, "[");
+        bool first = true;
+        for (const auto &str : strings) {
+            if (first) {
+                first = false;
+            }
+            else {
+                ios_printf(stream, newlines ? ",\n" : ",");
+            }
+            print_str_escape_json(stream, str);
+        }
+        ios_printf(stream, "]");
+    }
+};
+
+struct HeapSnapshot {
+    vector<Node> nodes;
+    // edges are stored on each from_node
+
+    StringTable names;
+    StringTable node_types;
+    StringTable edge_types;
+    DenseMap<void *, size_t> node_ptr_to_index_map;
+
+    size_t num_edges = 0; // For metadata, updated as you add each edge. Needed because edges owned by nodes.
+};
+
+// global heap snapshot, mutated by garbage collector
+// when snapshotting is on.
+int gc_heap_snapshot_enabled = 0;
+HeapSnapshot *g_snapshot = nullptr;
+extern jl_mutex_t heapsnapshot_lock;
+
+void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot, char all_one);
+static inline void _record_gc_edge(const char *node_type, const char *edge_type,
+                                   jl_value_t *a, jl_value_t *b, size_t name_or_index) JL_NOTSAFEPOINT;
+void _record_gc_just_edge(const char *edge_type, Node &from_node, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT;
+void _add_internal_root(HeapSnapshot *snapshot);
+
+
+JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream, char all_one)
+{
+    HeapSnapshot snapshot;
+    _add_internal_root(&snapshot);
+
+    jl_mutex_lock(&heapsnapshot_lock);
+
+    // Enable snapshotting
+    g_snapshot = &snapshot;
+    gc_heap_snapshot_enabled = true;
+
+    // Do a full GC mark (and incremental sweep), which will invoke our callbacks on `g_snapshot`
+    jl_gc_collect(JL_GC_FULL);
+
+    // Disable snapshotting
+    gc_heap_snapshot_enabled = false;
+    g_snapshot = nullptr;
+
+    jl_mutex_unlock(&heapsnapshot_lock);
+
+    // When we return, the snapshot is full
+    // Dump the snapshot
+    serialize_heap_snapshot((ios_t*)stream, snapshot, all_one);
+}
+
+// adds a node at id 0 which is the "uber root":
+// a synthetic node which points to all the GC roots.
+void _add_internal_root(HeapSnapshot *snapshot)
+{
+    Node internal_root{
+        snapshot->node_types.find_or_create_string_id("synthetic"),
+        snapshot->names.find_or_create_string_id(""), // name
+        0, // id
+        0, // size
+        0, // size_t trace_node_id (unused)
+        0, // int detachedness;  // 0 - unknown,  1 - attached;  2 - detached
+        vector<Edge>() // outgoing edges
+    };
+    snapshot->nodes.push_back(internal_root);
+}
+
+// mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L597-L597
+// returns the index of the new node
+size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT
+{
+    auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(a, g_snapshot->nodes.size()));
+    if (!val.second) {
+        return val.first->second;
+    }
+
+    ios_t str_;
+    bool ios_need_close = 0;
+
+    // Insert a new Node
+    size_t self_size = 0;
+    StringRef name = "<missing>";
+    StringRef node_type = "object";
+
+    jl_datatype_t *type = (jl_datatype_t*)jl_typeof(a);
+
+    if (jl_is_string(a)) {
+        node_type = "string";
+        name = jl_string_data(a);
+        self_size = jl_string_len(a);
+    }
+    else if (jl_is_symbol(a)) {
+        node_type = "symbol";
+        name = jl_symbol_name((jl_sym_t*)a);
+        self_size = name.size();
+    }
+    else if (jl_is_simplevector(a)) {
+        node_type = "array";
+        name = "SimpleVector";
+        self_size = sizeof(jl_svec_t) + sizeof(void*) * jl_svec_len(a);
+    }
+    else if (jl_is_module(a)) {
+        name = "Module";
+        self_size = sizeof(jl_module_t);
+    }
+    else if (jl_is_task(a)) {
+        name = "Task";
+        self_size = sizeof(jl_task_t);
+    }
+    else {
+        self_size = jl_is_array_type(type)
+            ? sizeof(jl_array_t)
+            : (size_t)jl_datatype_size(type);
+
+        // print full type into ios buffer and get StringRef to it.
+        // The ios is cleaned up below.
+        ios_need_close = 1;
+        ios_mem(&str_, 0);
+        JL_STREAM* str = (JL_STREAM*)&str_;
+        jl_static_show(str, (jl_value_t*)type);
+
+        name = StringRef((const char*)str_.buf, str_.size);
+    }
+
+    g_snapshot->nodes.push_back(Node{
+        g_snapshot->node_types.find_or_create_string_id(node_type), // size_t type;
+        g_snapshot->names.find_or_create_string_id(name), // size_t name;
+        (size_t)a,     // size_t id;
+        // We add 1 to self-size for the type tag that all heap-allocated objects have.
+        // Also because the Chrome Snapshot viewer ignores size-0 leaves!
+        sizeof(void*) + self_size, // size_t self_size;
+        0,             // size_t trace_node_id (unused)
+        0,             // int detachedness;  // 0 - unknown,  1 - attached;  2 - detached
+        vector<Edge>() // outgoing edges
+    });
+
+    if (ios_need_close)
+        ios_close(&str_);
+
+    return val.first->second;
+}
+
+static size_t record_pointer_to_gc_snapshot(void *a, size_t bytes, StringRef name) JL_NOTSAFEPOINT
+{
+    auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(a, g_snapshot->nodes.size()));
+    if (!val.second) {
+        return val.first->second;
+    }
+
+    g_snapshot->nodes.push_back(Node{
+        g_snapshot->node_types.find_or_create_string_id( "object"), // size_t type;
+        g_snapshot->names.find_or_create_string_id(name), // size_t name;
+        (size_t)a,     // size_t id;
+        bytes,         // size_t self_size;
+        0,             // size_t trace_node_id (unused)
+        0,             // int detachedness;  // 0 - unknown,  1 - attached;  2 - detached
+        vector<Edge>() // outgoing edges
+    });
+
+    return val.first->second;
+}
+
+static string _fieldpath_for_slot(void *obj, void *slot) JL_NOTSAFEPOINT
+{
+    string res;
+    jl_datatype_t *objtype = (jl_datatype_t*)jl_typeof(obj);
+
+    while (1) {
+        int i = gc_slot_to_fieldidx(obj, slot, objtype);
+
+        if (jl_is_tuple_type(objtype) || jl_is_namedtuple_type(objtype)) {
+            ostringstream ss;
+            ss << "[" << i << "]";
+            res += ss.str();
+        }
+        else {
+            jl_svec_t *field_names = jl_field_names(objtype);
+            jl_sym_t *name = (jl_sym_t*)jl_svecref(field_names, i);
+            res += jl_symbol_name(name);
+        }
+
+        if (!jl_field_isptr(objtype, i)) {
+            // Tail recurse
+            res += ".";
+            obj = (void*)((char*)obj + jl_field_offset(objtype, i));
+            objtype = (jl_datatype_t*)jl_field_type_concrete(objtype, i);
+        }
+        else {
+            return res;
+        }
+    }
+}
+
+
+void _gc_heap_snapshot_record_root(jl_value_t *root, char *name) JL_NOTSAFEPOINT
+{
+    record_node_to_gc_snapshot(root);
+
+    auto &internal_root = g_snapshot->nodes.front();
+    auto to_node_idx = g_snapshot->node_ptr_to_index_map[root];
+    auto edge_label = g_snapshot->names.find_or_create_string_id(name);
+
+    _record_gc_just_edge("internal", internal_root, to_node_idx, edge_label);
+}
+
+// Add a node to the heap snapshot representing a Julia stack frame.
+// Each task points at a stack frame, which points at the stack frame of
+// the function it's currently calling, forming a linked list.
+// Stack frame nodes point at the objects they have as local variables.
+size_t _record_stack_frame_node(HeapSnapshot *snapshot, void *frame) JL_NOTSAFEPOINT
+{
+    auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(frame, g_snapshot->nodes.size()));
+    if (!val.second) {
+        return val.first->second;
+    }
+
+    snapshot->nodes.push_back(Node{
+        snapshot->node_types.find_or_create_string_id("synthetic"),
+        snapshot->names.find_or_create_string_id("(stack frame)"), // name
+        (size_t)frame, // id
+        1, // size
+        0, // size_t trace_node_id (unused)
+        0, // int detachedness;  // 0 - unknown,  1 - attached;  2 - detached
+        vector<Edge>() // outgoing edges
+    });
+
+    return val.first->second;
+}
+
+void _gc_heap_snapshot_record_frame_to_object_edge(void *from, jl_value_t *to) JL_NOTSAFEPOINT
+{
+    auto from_node_idx = _record_stack_frame_node(g_snapshot, (jl_gcframe_t*)from);
+    auto to_idx = record_node_to_gc_snapshot(to);
+    Node &from_node = g_snapshot->nodes[from_node_idx];
+
+    auto name_idx = g_snapshot->names.find_or_create_string_id("local var");
+    _record_gc_just_edge("internal", from_node, to_idx, name_idx);
+}
+
+void _gc_heap_snapshot_record_task_to_frame_edge(jl_task_t *from, void *to) JL_NOTSAFEPOINT
+{
+    auto from_node_idx = record_node_to_gc_snapshot((jl_value_t*)from);
+    auto to_node_idx = _record_stack_frame_node(g_snapshot, to);
+    Node &from_node = g_snapshot->nodes[from_node_idx];
+
+    auto name_idx = g_snapshot->names.find_or_create_string_id("stack");
+    _record_gc_just_edge("internal", from_node, to_node_idx, name_idx);
+}
+
+void _gc_heap_snapshot_record_frame_to_frame_edge(jl_gcframe_t *from, jl_gcframe_t *to) JL_NOTSAFEPOINT
+{
+    auto from_node_idx = _record_stack_frame_node(g_snapshot, from);
+    auto to_node_idx = _record_stack_frame_node(g_snapshot, to);
+    Node &from_node = g_snapshot->nodes[from_node_idx];
+
+    auto name_idx = g_snapshot->names.find_or_create_string_id("next frame");
+    _record_gc_just_edge("internal", from_node, to_node_idx, name_idx);
+}
+
+void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT
+{
+    _record_gc_edge("array", "element", from, to, index);
+}
+
+void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, void *slot) JL_NOTSAFEPOINT
+{
+    string path = _fieldpath_for_slot(from, slot);
+    _record_gc_edge("object", "property", from, to,
+                    g_snapshot->names.find_or_create_string_id(path));
+}
+
+void _gc_heap_snapshot_record_module_to_binding(jl_module_t* module, jl_binding_t* binding) JL_NOTSAFEPOINT
+{
+    auto from_node_idx = record_node_to_gc_snapshot((jl_value_t*)module);
+    auto to_node_idx = record_pointer_to_gc_snapshot(binding, sizeof(jl_binding_t), jl_symbol_name(binding->name));
+
+    jl_value_t *value = jl_atomic_load_relaxed(&binding->value);
+    auto value_idx = value ? record_node_to_gc_snapshot(value) : 0;
+    jl_value_t *ty = jl_atomic_load_relaxed(&binding->ty);
+    auto ty_idx = ty ? record_node_to_gc_snapshot(ty) : 0;
+    jl_value_t *globalref = jl_atomic_load_relaxed(&binding->globalref);
+    auto globalref_idx = globalref ? record_node_to_gc_snapshot(globalref) : 0;
+
+    auto &from_node = g_snapshot->nodes[from_node_idx];
+    auto &to_node = g_snapshot->nodes[to_node_idx];
+    from_node.type = g_snapshot->node_types.find_or_create_string_id("object");
+
+    _record_gc_just_edge("property", from_node, to_node_idx, g_snapshot->names.find_or_create_string_id("<native>"));
+    if (value_idx)     _record_gc_just_edge("internal", to_node, value_idx, g_snapshot->names.find_or_create_string_id("value"));
+    if (ty_idx)        _record_gc_just_edge("internal", to_node, ty_idx, g_snapshot->names.find_or_create_string_id("ty"));
+    if (globalref_idx) _record_gc_just_edge("internal", to_node, globalref_idx, g_snapshot->names.find_or_create_string_id("globalref"));
+}
+
+void _gc_heap_snapshot_record_internal_array_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT
+{
+    _record_gc_edge("object", "internal", from, to,
+                    g_snapshot->names.find_or_create_string_id("<internal>"));
+}
+
+void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, void* to, size_t bytes) JL_NOTSAFEPOINT
+{
+    size_t name_or_idx = g_snapshot->names.find_or_create_string_id("<native>");
+
+    auto from_node_idx = record_node_to_gc_snapshot(from);
+    auto to_node_idx = record_pointer_to_gc_snapshot(to, bytes, "<malloc>");
+
+    auto &from_node = g_snapshot->nodes[from_node_idx];
+    from_node.type = g_snapshot->node_types.find_or_create_string_id("native");
+
+    _record_gc_just_edge("hidden", from_node, to_node_idx, name_or_idx);
+}
+
+static inline void _record_gc_edge(const char *node_type, const char *edge_type,
+                                   jl_value_t *a, jl_value_t *b, size_t name_or_idx) JL_NOTSAFEPOINT
+{
+    auto from_node_idx = record_node_to_gc_snapshot(a);
+    auto to_node_idx = record_node_to_gc_snapshot(b);
+
+    auto &from_node = g_snapshot->nodes[from_node_idx];
+    from_node.type = g_snapshot->node_types.find_or_create_string_id(node_type);
+
+    _record_gc_just_edge(edge_type, from_node, to_node_idx, name_or_idx);
+}
+
+void _record_gc_just_edge(const char *edge_type, Node &from_node, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT
+{
+    from_node.edges.push_back(Edge{
+        g_snapshot->edge_types.find_or_create_string_id(edge_type),
+        name_or_idx, // edge label
+        to_idx // to
+    });
+
+    g_snapshot->num_edges += 1;
+}
+
+void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot, char all_one)
+{
+    // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2567-L2567
+    ios_printf(stream, "{\"snapshot\":{");
+    ios_printf(stream, "\"meta\":{");
+    ios_printf(stream, "\"node_fields\":[\"type\",\"name\",\"id\",\"self_size\",\"edge_count\",\"trace_node_id\",\"detachedness\"],");
+    ios_printf(stream, "\"node_types\":[");
+    snapshot.node_types.print_json_array(stream, false);
+    ios_printf(stream, ",");
+    ios_printf(stream, "\"string\", \"number\", \"number\", \"number\", \"number\", \"number\"],");
+    ios_printf(stream, "\"edge_fields\":[\"type\",\"name_or_index\",\"to_node\"],");
+    ios_printf(stream, "\"edge_types\":[");
+    snapshot.edge_types.print_json_array(stream, false);
+    ios_printf(stream, ",");
+    ios_printf(stream, "\"string_or_number\",\"from_node\"]");
+    ios_printf(stream, "},\n"); // end "meta"
+    ios_printf(stream, "\"node_count\":%zu,", snapshot.nodes.size());
+    ios_printf(stream, "\"edge_count\":%zu", snapshot.num_edges);
+    ios_printf(stream, "},\n"); // end "snapshot"
+
+    ios_printf(stream, "\"nodes\":[");
+    bool first_node = true;
+    for (const auto &from_node : snapshot.nodes) {
+        if (first_node) {
+            first_node = false;
+        }
+        else {
+            ios_printf(stream, ",");
+        }
+        // ["type","name","id","self_size","edge_count","trace_node_id","detachedness"]
+        ios_printf(stream, "%zu,%zu,%zu,%zu,%zu,%zu,%d\n",
+                            from_node.type,
+                            from_node.name,
+                            from_node.id,
+                            all_one ? (size_t)1 : from_node.self_size,
+                            from_node.edges.size(),
+                            from_node.trace_node_id,
+                            from_node.detachedness);
+    }
+    ios_printf(stream, "],\n");
+
+    ios_printf(stream, "\"edges\":[");
+    bool first_edge = true;
+    for (const auto &from_node : snapshot.nodes) {
+        for (const auto &edge : from_node.edges) {
+            if (first_edge) {
+                first_edge = false;
+            }
+            else {
+                ios_printf(stream, ",");
+            }
+            ios_printf(stream, "%zu,%zu,%zu\n",
+                                edge.type,
+                                edge.name_or_index,
+                                edge.to_node * k_node_number_of_fields);
+        }
+    }
+    ios_printf(stream, "],\n"); // end "edges"
+
+    ios_printf(stream, "\"strings\":");
+
+    snapshot.names.print_json_array(stream, true);
+
+    ios_printf(stream, "}");
+}
diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h
new file mode 100644
index 0000000000000..96cdaf6a9a866
--- /dev/null
+++ b/src/gc-heap-snapshot.h
@@ -0,0 +1,108 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#ifndef JL_GC_HEAP_SNAPSHOT_H
+#define JL_GC_HEAP_SNAPSHOT_H
+
+#include "julia.h"
+#include "ios.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+// ---------------------------------------------------------------------
+// Functions to call from GC when heap snapshot is enabled
+// ---------------------------------------------------------------------
+void _gc_heap_snapshot_record_root(jl_value_t *root, char *name) JL_NOTSAFEPOINT;
+void _gc_heap_snapshot_record_frame_to_object_edge(void *from, jl_value_t *to) JL_NOTSAFEPOINT;
+void _gc_heap_snapshot_record_task_to_frame_edge(jl_task_t *from, void *to) JL_NOTSAFEPOINT;
+void _gc_heap_snapshot_record_frame_to_frame_edge(jl_gcframe_t *from, jl_gcframe_t *to) JL_NOTSAFEPOINT;
+void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT;
+void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, void* slot) JL_NOTSAFEPOINT;
+void _gc_heap_snapshot_record_module_to_binding(jl_module_t* module, jl_binding_t* binding) JL_NOTSAFEPOINT;
+// Used for objects managed by GC, but which aren't exposed in the julia object, so have no
+// field or index.  i.e. they're not reacahable from julia code, but we _will_ hit them in
+// the GC mark phase (so we can check their type tag to get the size).
+void _gc_heap_snapshot_record_internal_array_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT;
+// Used for objects manually allocated in C (outside julia GC), to still tell the heap snapshot about the
+// size of the object, even though we're never going to mark that object.
+void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, void* to, size_t bytes) JL_NOTSAFEPOINT;
+
+
+extern int gc_heap_snapshot_enabled;
+extern int prev_sweep_full;
+
+int gc_slot_to_fieldidx(void *_obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT;
+int gc_slot_to_arrayidx(void *_obj, void *begin) JL_NOTSAFEPOINT;
+
+static inline void gc_heap_snapshot_record_frame_to_object_edge(void *from, jl_value_t *to) JL_NOTSAFEPOINT
+{
+    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
+        _gc_heap_snapshot_record_frame_to_object_edge(from, to);
+    }
+}
+static inline void gc_heap_snapshot_record_task_to_frame_edge(jl_task_t *from, void *to) JL_NOTSAFEPOINT
+{
+    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
+        _gc_heap_snapshot_record_task_to_frame_edge(from, to);
+    }
+}
+static inline void gc_heap_snapshot_record_frame_to_frame_edge(jl_gcframe_t *from, jl_gcframe_t *to) JL_NOTSAFEPOINT
+{
+    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
+        _gc_heap_snapshot_record_frame_to_frame_edge(from, to);
+    }
+}
+static inline void gc_heap_snapshot_record_root(jl_value_t *root, char *name) JL_NOTSAFEPOINT
+{
+    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
+        _gc_heap_snapshot_record_root(root, name);
+    }
+}
+static inline void gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t **to) JL_NOTSAFEPOINT
+{
+    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
+        _gc_heap_snapshot_record_array_edge(from, *to, gc_slot_to_arrayidx(from, to));
+    }
+}
+static inline void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t **to) JL_NOTSAFEPOINT
+{
+    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
+        _gc_heap_snapshot_record_object_edge(from, *to, to);
+    }
+}
+
+static inline void gc_heap_snapshot_record_module_to_binding(jl_module_t* module, jl_binding_t* binding) JL_NOTSAFEPOINT
+{
+    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
+        _gc_heap_snapshot_record_module_to_binding(module, binding);
+    }
+}
+
+static inline void gc_heap_snapshot_record_internal_array_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT
+{
+    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
+        _gc_heap_snapshot_record_internal_array_edge(from, to);
+    }
+}
+
+static inline void gc_heap_snapshot_record_hidden_edge(jl_value_t *from, void* to, size_t bytes) JL_NOTSAFEPOINT
+{
+    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
+        _gc_heap_snapshot_record_hidden_edge(from, to, bytes);
+    }
+}
+
+// ---------------------------------------------------------------------
+// Functions to call from Julia to take heap snapshot
+// ---------------------------------------------------------------------
+JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream, char all_one);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif  // JL_GC_HEAP_SNAPSHOT_H
diff --git a/src/gc-stacks.c b/src/gc-stacks.c
index 8617e773efc67..40292cf472037 100644
--- a/src/gc-stacks.c
+++ b/src/gc-stacks.c
@@ -194,8 +194,9 @@ void sweep_stack_pools(void)
     //            bufsz = t->bufsz
     //            if (stkbuf)
     //                push(free_stacks[sz], stkbuf)
-    for (int i = 0; i < jl_n_threads; i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[i];
+    assert(gc_n_threads);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
 
         // free half of stacks that remain unused since last sweep
         for (int p = 0; p < JL_N_STACK_POOLS; p++) {
diff --git a/src/gc.c b/src/gc.c
index 238a10bcc35dc..b5ddd8ffbcbbd 100644
--- a/src/gc.c
+++ b/src/gc.c
@@ -130,6 +130,9 @@ STATIC_INLINE void import_gc_state(jl_ptls_t ptls, jl_gc_mark_sp_t *sp) {
 static jl_mutex_t finalizers_lock;
 static uv_mutex_t gc_cache_lock;
 
+// mutex for gc-heap-snapshot.
+jl_mutex_t heapsnapshot_lock;
+
 // Flag that tells us whether we need to support conservative marking
 // of objects.
 static _Atomic(int) support_conservative_marking = 0;
@@ -168,16 +171,19 @@ static _Atomic(int) support_conservative_marking = 0;
 
 jl_gc_num_t gc_num = {0};
 static size_t last_long_collect_interval;
+int gc_n_threads;
+jl_ptls_t* gc_all_tls_states;
 
 pagetable_t memory_map;
 
 // List of marked big objects.  Not per-thread.  Accessed only by master thread.
 bigval_t *big_objects_marked = NULL;
 
-// finalization
+// -- Finalization --
 // `ptls->finalizers` and `finalizer_list_marked` might have tagged pointers.
-// If an object pointer has the lowest bit set, the next pointer is an unboxed
-// c function pointer.
+// If an object pointer has the lowest bit set, the next pointer is an unboxed c function pointer.
+// If an object pointer has the second lowest bit set, the current pointer is a c object pointer.
+//   It must be aligned at least 4, and it finalized immediately (at "quiescence").
 // `to_finalize` should not have tagged pointers.
 arraylist_t finalizer_list_marked;
 arraylist_t to_finalize;
@@ -190,12 +196,15 @@ NOINLINE uintptr_t gc_get_stack_ptr(void)
 
 #define should_timeout() 0
 
-static void jl_gc_wait_for_the_world(void)
+void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads)
 {
-    if (jl_n_threads > 1)
+    assert(gc_n_threads);
+    if (gc_n_threads > 1)
         jl_wake_libuv();
-    for (int i = 0; i < jl_n_threads; i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[i];
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 == NULL)
+            continue;
         // This acquire load pairs with the release stores
         // in the signal handler of safepoint so we are sure that
         // all the stores on those threads are visible.
@@ -207,6 +216,9 @@ static void jl_gc_wait_for_the_world(void)
     }
 }
 
+
+void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads);
+
 // malloc wrappers, aligned allocation
 
 #if defined(_OS_WINDOWS_)
@@ -267,17 +279,18 @@ static void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT
     jl_atomic_store_relaxed(&jl_gc_have_pending_finalizers, 1);
 }
 
-static void run_finalizer(jl_task_t *ct, jl_value_t *o, jl_value_t *ff)
+static void run_finalizer(jl_task_t *ct, void *o, void *ff)
 {
-    if (gc_ptr_tag(o, 1)) {
-        ((void (*)(void*))ff)(gc_ptr_clear_tag(o, 1));
+    int ptr_finalizer = gc_ptr_tag(o, 1);
+    o = gc_ptr_clear_tag(o, 3);
+    if (ptr_finalizer) {
+        ((void (*)(void*))ff)((void*)o);
         return;
     }
-    jl_value_t *args[2] = {ff,o};
     JL_TRY {
         size_t last_age = ct->world_age;
         ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
-        jl_apply(args, 2);
+        jl_apply_generic((jl_value_t*)ff, (jl_value_t**)&o, 1);
         ct->world_age = last_age;
     }
     JL_CATCH {
@@ -362,7 +375,7 @@ static void jl_gc_run_finalizers_in_list(jl_task_t *ct, arraylist_t *list)
     arraylist_push(list, list->items[0]);
     arraylist_push(list, list->items[1]);
     jl_gc_push_arraylist(ct, list);
-    jl_value_t **items = (jl_value_t**)list->items;
+    void **items = list->items;
     size_t len = list->len;
     JL_UNLOCK_NOGC(&finalizers_lock);
     // run finalizers in reverse order they were added, so lower-level finalizers run last
@@ -411,7 +424,10 @@ static void run_finalizers(jl_task_t *ct)
     jl_rng_split(ct->rngState, finalizer_rngState);
 
     // This releases the finalizers lock.
+    int8_t was_in_finalizer = ct->ptls->in_finalizer;
+    ct->ptls->in_finalizer = 1;
     jl_gc_run_finalizers_in_list(ct, &copied_list);
+    ct->ptls->in_finalizer = was_in_finalizer;
     arraylist_free(&copied_list);
 
     memcpy(&ct->rngState[0], &save_rngState[0], sizeof(save_rngState));
@@ -423,9 +439,7 @@ JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_task_t *ct)
         ct = jl_current_task;
     jl_ptls_t ptls = ct->ptls;
     if (!ptls->in_finalizer && ptls->locks.len == 0 && ptls->finalizers_inhibited == 0) {
-        ptls->in_finalizer = 1;
         run_finalizers(ct);
-        ptls->in_finalizer = 0;
     }
 }
 
@@ -496,13 +510,18 @@ static void schedule_all_finalizers(arraylist_t *flist) JL_NOTSAFEPOINT
 
 void jl_gc_run_all_finalizers(jl_task_t *ct)
 {
+    int gc_n_threads;
+    jl_ptls_t* gc_all_tls_states;
+    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
+    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
     schedule_all_finalizers(&finalizer_list_marked);
-    // This could be run before we had a chance to setup all threads
-    for (int i = 0;i < jl_n_threads;i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[i];
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
         if (ptls2)
             schedule_all_finalizers(&ptls2->finalizers);
     }
+    gc_n_threads = 0;
+    gc_all_tls_states = NULL;
     run_finalizers(ct);
 }
 
@@ -539,6 +558,13 @@ JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f
     jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 1), f);
 }
 
+// schedule f(v) to call at the next quiescent interval (aka after the next safepoint/region on all threads)
+JL_DLLEXPORT void jl_gc_add_quiescent(jl_ptls_t ptls, void **v, void *f) JL_NOTSAFEPOINT
+{
+    assert(!gc_ptr_tag(v, 3));
+    jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 3), f);
+}
+
 JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT
 {
     if (__unlikely(jl_typeis(f, jl_voidpointer_type))) {
@@ -559,11 +585,18 @@ JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o)
     arraylist_new(&copied_list, 0);
     // No need to check the to_finalize list since the user is apparently
     // still holding a reference to the object
-    for (int i = 0; i < jl_n_threads; i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[i];
-        finalize_object(&ptls2->finalizers, o, &copied_list, jl_atomic_load_relaxed(&ct->tid) != i);
+    int gc_n_threads;
+    jl_ptls_t* gc_all_tls_states;
+    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
+    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2)
+            finalize_object(&ptls2->finalizers, o, &copied_list, jl_atomic_load_relaxed(&ct->tid) != i);
     }
     finalize_object(&finalizer_list_marked, o, &copied_list, 0);
+    gc_n_threads = 0;
+    gc_all_tls_states = NULL;
     if (copied_list.len > 0) {
         // This releases the finalizers lock.
         jl_gc_run_finalizers_in_list(ct, &copied_list);
@@ -595,9 +628,11 @@ static void gc_sweep_foreign_objs_in_list(arraylist_t *objs)
 
 static void gc_sweep_foreign_objs(void)
 {
-    for (int i = 0;i < jl_n_threads; i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[i];
-        gc_sweep_foreign_objs_in_list(&ptls2->sweep_objs);
+    assert(gc_n_threads);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2)
+            gc_sweep_foreign_objs_in_list(&ptls2->sweep_objs);
     }
 }
 
@@ -608,18 +643,19 @@ static int64_t last_gc_total_bytes = 0;
 // under this limit, but we will go above it rather than halting.
 #ifdef _P64
 typedef uint64_t memsize_t;
-#define default_collect_interval (5600*1024*sizeof(void*))
-static size_t max_collect_interval = 1250000000UL;
-// Eventually we can expose this to the user/ci.
-memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024 * 1024 * 1024;
+static const size_t default_collect_interval = 5600 * 1024 * sizeof(void*);
+static const size_t max_collect_interval = 1250000000UL;
+static size_t total_mem;
+// We expose this to the user/ci as jl_gc_set_max_memory
+static memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024 * 1024 * 1024;
 #else
 typedef uint32_t memsize_t;
-#define default_collect_interval (3200*1024*sizeof(void*))
-static size_t max_collect_interval =  500000000UL;
+static const size_t default_collect_interval = 3200 * 1024 * sizeof(void*);
+static const size_t max_collect_interval =  500000000UL;
 // Work really hard to stay within 2GB
 // Alternative is to risk running out of address space
 // on 32 bit architectures.
-memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024;
+static memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024;
 #endif
 
 // global variables for GC stats
@@ -679,7 +715,7 @@ static int mark_reset_age = 0;
 
 static int64_t scanned_bytes; // young bytes scanned while marking
 static int64_t perm_scanned_bytes; // old bytes scanned while marking
-static int prev_sweep_full = 1;
+int prev_sweep_full = 1;
 
 #define inc_sat(v,s) v = (v) >= s ? s : (v)+1
 
@@ -725,9 +761,11 @@ static void gc_sync_cache(jl_ptls_t ptls) JL_NOTSAFEPOINT
 // No other threads can be running marking at the same time
 static void gc_sync_all_caches_nolock(jl_ptls_t ptls)
 {
-    for (int t_i = 0; t_i < jl_n_threads; t_i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[t_i];
-        gc_sync_cache_nolock(ptls, &ptls2->gc_cache);
+    assert(gc_n_threads);
+    for (int t_i = 0; t_i < gc_n_threads; t_i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+        if (ptls2)
+            gc_sync_cache_nolock(ptls, &ptls2->gc_cache);
     }
 }
 
@@ -934,8 +972,11 @@ JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls,
 
 static void clear_weak_refs(void)
 {
-    for (int i = 0; i < jl_n_threads; i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[i];
+    assert(gc_n_threads);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 == NULL)
+            continue;
         size_t n, l = ptls2->heap.weak_refs.len;
         void **lst = ptls2->heap.weak_refs.items;
         for (n = 0; n < l; n++) {
@@ -948,8 +989,11 @@ static void clear_weak_refs(void)
 
 static void sweep_weak_refs(void)
 {
-    for (int i = 0; i < jl_n_threads; i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[i];
+    assert(gc_n_threads);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 == NULL)
+            continue;
         size_t n = 0;
         size_t ndel = 0;
         size_t l = ptls2->heap.weak_refs.len;
@@ -1066,11 +1110,16 @@ static bigval_t **sweep_big_list(int sweep_full, bigval_t **pv) JL_NOTSAFEPOINT
 static void sweep_big(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT
 {
     gc_time_big_start();
-    for (int i = 0;i < jl_n_threads;i++)
-        sweep_big_list(sweep_full, &jl_all_tls_states[i]->heap.big_objects);
+    assert(gc_n_threads);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 == NULL)
+            continue;
+        sweep_big_list(sweep_full, &ptls2->heap.big_objects);
+    }
     if (sweep_full) {
         bigval_t **last_next = sweep_big_list(sweep_full, &big_objects_marked);
-        // Move all survivors from big_objects_marked list to big_objects list.
+        // Move all survivors from big_objects_marked list to the big_objects list of this thread.
         if (ptls->heap.big_objects)
             ptls->heap.big_objects->prev = last_next;
         *last_next = ptls->heap.big_objects;
@@ -1109,8 +1158,12 @@ void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT
 
 static void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT
 {
-    for (int i = 0; i < jl_n_threads; i++) {
-        jl_ptls_t ptls = jl_all_tls_states[i];
+    int gc_n_threads;
+    jl_ptls_t* gc_all_tls_states;
+    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
+    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls = gc_all_tls_states[i];
         if (ptls) {
             dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_num.allocd) + gc_num.interval);
             dest->freed += jl_atomic_load_relaxed(&ptls->gc_num.freed);
@@ -1125,8 +1178,12 @@ static void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT
 
 static void reset_thread_gc_counts(void) JL_NOTSAFEPOINT
 {
-    for (int i = 0; i < jl_n_threads; i++) {
-        jl_ptls_t ptls = jl_all_tls_states[i];
+    int gc_n_threads;
+    jl_ptls_t* gc_all_tls_states;
+    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
+    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls = gc_all_tls_states[i];
         if (ptls) {
             memset(&ptls->gc_num, 0, sizeof(ptls->gc_num));
             jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval);
@@ -1173,8 +1230,11 @@ static void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT
 static void sweep_malloced_arrays(void) JL_NOTSAFEPOINT
 {
     gc_time_mallocd_array_start();
-    for (int t_i = 0;t_i < jl_n_threads;t_i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[t_i];
+    assert(gc_n_threads);
+    for (int t_i = 0; t_i < gc_n_threads; t_i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+        if (ptls2 == NULL)
+            continue;
         mallocarray_t *ma = ptls2->heap.mallocarrays;
         mallocarray_t **pma = &ptls2->heap.mallocarrays;
         while (ma != NULL) {
@@ -1198,11 +1258,10 @@ static void sweep_malloced_arrays(void) JL_NOTSAFEPOINT
 }
 
 // pool allocation
-static inline jl_taggedvalue_t *reset_page(const jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_taggedvalue_t *fl) JL_NOTSAFEPOINT
+static inline jl_taggedvalue_t *reset_page(jl_ptls_t ptls2, const jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_taggedvalue_t *fl) JL_NOTSAFEPOINT
 {
     assert(GC_PAGE_OFFSET >= sizeof(void*));
     pg->nfree = (GC_PAGE_SZ - GC_PAGE_OFFSET) / p->osize;
-    jl_ptls_t ptls2 = jl_all_tls_states[pg->thread_n];
     pg->pool_n = p - ptls2->heap.norm_pools;
     memset(pg->ages, 0, GC_PAGE_SZ / 8 / p->osize + 1);
     jl_taggedvalue_t *beg = (jl_taggedvalue_t*)(pg->data + GC_PAGE_OFFSET);
@@ -1240,7 +1299,7 @@ static NOINLINE jl_taggedvalue_t *add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT
     pg->osize = p->osize;
     pg->ages = (uint8_t*)malloc_s(GC_PAGE_SZ / 8 / p->osize + 1);
     pg->thread_n = ptls->tid;
-    jl_taggedvalue_t *fl = reset_page(p, pg, NULL);
+    jl_taggedvalue_t *fl = reset_page(ptls, p, pg, NULL);
     p->newpages = fl;
     return fl;
 }
@@ -1354,7 +1413,8 @@ static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_t
         // FIXME - need to do accounting on a per-thread basis
         // on quick sweeps, keep a few pages empty but allocated for performance
         if (!sweep_full && lazy_freed_pages <= default_collect_interval / GC_PAGE_SZ) {
-            jl_taggedvalue_t *begin = reset_page(p, pg, p->newpages);
+            jl_ptls_t ptls2 = gc_all_tls_states[pg->thread_n];
+            jl_taggedvalue_t *begin = reset_page(ptls2, p, pg, p->newpages);
             p->newpages = begin;
             begin->next = (jl_taggedvalue_t*)0;
             lazy_freed_pages++;
@@ -1456,7 +1516,7 @@ static inline void sweep_pool_page(jl_taggedvalue_t ***pfl, jl_gc_pagemeta_t *pg
 {
     int p_n = pg->pool_n;
     int t_n = pg->thread_n;
-    jl_ptls_t ptls2 = jl_all_tls_states[t_n];
+    jl_ptls_t ptls2 = gc_all_tls_states[t_n];
     jl_gc_pool_t *p = &ptls2->heap.norm_pools[p_n];
     int osize = pg->osize;
     pfl[t_n * JL_GC_N_POOLS + p_n] = sweep_page(p, pg, pfl[t_n * JL_GC_N_POOLS + p_n], sweep_full, osize);
@@ -1568,9 +1628,9 @@ static void gc_sweep_pool(int sweep_full)
     gc_time_pool_start();
     lazy_freed_pages = 0;
 
-    // For the benfit of the analyzer, which doesn't know that jl_n_threads
+    // For the benefit of the analyzer, which doesn't know that gc_n_threads
     // doesn't change over the course of this function
-    size_t n_threads = jl_n_threads;
+    size_t n_threads = gc_n_threads;
 
     // allocate enough space to hold the end of the free list chain
     // for every thread and pool size
@@ -1579,7 +1639,13 @@ static void gc_sweep_pool(int sweep_full)
     // update metadata of pages that were pointed to by freelist or newpages from a pool
     // i.e. pages being the current allocation target
     for (int t_i = 0; t_i < n_threads; t_i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[t_i];
+        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+        if (ptls2 == NULL) {
+            for (int i = 0; i < JL_GC_N_POOLS; i++) {
+                pfl[t_i * JL_GC_N_POOLS + i] = NULL;
+            }
+            continue;
+        }
         for (int i = 0; i < JL_GC_N_POOLS; i++) {
             jl_gc_pool_t *p = &ptls2->heap.norm_pools[i];
             jl_taggedvalue_t *last = p->freelist;
@@ -1608,6 +1674,9 @@ static void gc_sweep_pool(int sweep_full)
 
     // null out terminal pointers of free lists
     for (int t_i = 0; t_i < n_threads; t_i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+        if (ptls2 == NULL)
+            continue;
         for (int i = 0; i < JL_GC_N_POOLS; i++) {
             *pfl[t_i * JL_GC_N_POOLS + i] = NULL;
         }
@@ -1886,9 +1955,11 @@ STATIC_INLINE int gc_mark_scan_objarray(jl_ptls_t ptls, jl_gc_mark_sp_t *sp,
     (void)jl_assume(objary == (gc_mark_objarray_t*)sp->data);
     for (; begin < end; begin += objary->step) {
         *pnew_obj = *begin;
-        if (*pnew_obj)
+        if (*pnew_obj) {
             verify_parent2("obj array", objary->parent, begin, "elem(%d)",
                            gc_slot_to_arrayidx(objary->parent, begin));
+            gc_heap_snapshot_record_array_edge(objary->parent, begin);
+        }
         if (!gc_try_setmark(*pnew_obj, &objary->nptr, ptag, pbits))
             continue;
         begin += objary->step;
@@ -1922,9 +1993,11 @@ STATIC_INLINE int gc_mark_scan_array8(jl_ptls_t ptls, jl_gc_mark_sp_t *sp,
         for (; elem_begin < elem_end; elem_begin++) {
             jl_value_t **slot = &begin[*elem_begin];
             *pnew_obj = *slot;
-            if (*pnew_obj)
+            if (*pnew_obj) {
                 verify_parent2("array", ary8->elem.parent, slot, "elem(%d)",
                                gc_slot_to_arrayidx(ary8->elem.parent, begin));
+                gc_heap_snapshot_record_array_edge(ary8->elem.parent, slot);
+            }
             if (!gc_try_setmark(*pnew_obj, &ary8->elem.nptr, ptag, pbits))
                 continue;
             elem_begin++;
@@ -1970,9 +2043,11 @@ STATIC_INLINE int gc_mark_scan_array16(jl_ptls_t ptls, jl_gc_mark_sp_t *sp,
         for (; elem_begin < elem_end; elem_begin++) {
             jl_value_t **slot = &begin[*elem_begin];
             *pnew_obj = *slot;
-            if (*pnew_obj)
+            if (*pnew_obj) {
                 verify_parent2("array", ary16->elem.parent, slot, "elem(%d)",
                                gc_slot_to_arrayidx(ary16->elem.parent, begin));
+                gc_heap_snapshot_record_array_edge(ary16->elem.parent, slot);
+            }
             if (!gc_try_setmark(*pnew_obj, &ary16->elem.nptr, ptag, pbits))
                 continue;
             elem_begin++;
@@ -2016,9 +2091,11 @@ STATIC_INLINE int gc_mark_scan_obj8(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, gc_mark
     for (; begin < end; begin++) {
         jl_value_t **slot = &((jl_value_t**)parent)[*begin];
         *pnew_obj = *slot;
-        if (*pnew_obj)
+        if (*pnew_obj) {
             verify_parent2("object", parent, slot, "field(%d)",
-                           gc_slot_to_fieldidx(parent, slot));
+                           gc_slot_to_fieldidx(parent, slot, (jl_datatype_t*)jl_typeof(parent)));
+            gc_heap_snapshot_record_object_edge((jl_value_t*)parent, slot);
+        }
         if (!gc_try_setmark(*pnew_obj, &obj8->nptr, ptag, pbits))
             continue;
         begin++;
@@ -2049,9 +2126,11 @@ STATIC_INLINE int gc_mark_scan_obj16(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, gc_mar
     for (; begin < end; begin++) {
         jl_value_t **slot = &((jl_value_t**)parent)[*begin];
         *pnew_obj = *slot;
-        if (*pnew_obj)
+        if (*pnew_obj) {
             verify_parent2("object", parent, slot, "field(%d)",
-                           gc_slot_to_fieldidx(parent, slot));
+                           gc_slot_to_fieldidx(parent, slot, (jl_datatype_t*)jl_typeof(parent)));
+            gc_heap_snapshot_record_object_edge((jl_value_t*)parent, slot);
+        }
         if (!gc_try_setmark(*pnew_obj, &obj16->nptr, ptag, pbits))
             continue;
         begin++;
@@ -2082,9 +2161,11 @@ STATIC_INLINE int gc_mark_scan_obj32(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, gc_mar
     for (; begin < end; begin++) {
         jl_value_t **slot = &((jl_value_t**)parent)[*begin];
         *pnew_obj = *slot;
-        if (*pnew_obj)
+        if (*pnew_obj) {
             verify_parent2("object", parent, slot, "field(%d)",
-                           gc_slot_to_fieldidx(parent, slot));
+                           gc_slot_to_fieldidx(parent, slot, (jl_datatype_t*)jl_typeof(parent)));
+            gc_heap_snapshot_record_object_edge((jl_value_t*)parent, slot);
+        }
         if (!gc_try_setmark(*pnew_obj, &obj32->nptr, ptag, pbits))
             continue;
         begin++;
@@ -2366,18 +2447,24 @@ stack: {
                     if (gc_ptr_tag(new_obj, 1)) {
                         // handle tagged pointers in finalizer list
                         new_obj = gc_ptr_clear_tag(new_obj, 1);
+                        // skip over the finalizer fptr
                         i++;
                     }
+                    if (gc_ptr_tag(new_obj, 2))
+                        continue;
                 }
                 if (!gc_try_setmark(new_obj, &nptr, &tag, &bits))
                     continue;
+                gc_heap_snapshot_record_frame_to_object_edge(s, new_obj);
                 i++;
                 if (i < nr) {
                     // Haven't done with this one yet. Update the content and push it back
                     stack->i = i;
                     gc_repush_markdata(&sp, gc_mark_stackframe_t);
                 }
+                // TODO stack addresses needs copy stack handling
                 else if ((s = (jl_gcframe_t*)gc_read_stack(&s->prev, offset, lb, ub))) {
+                    gc_heap_snapshot_record_frame_to_frame_edge(stack->s, s);
                     stack->s = s;
                     stack->i = 0;
                     uintptr_t new_nroots = gc_read_stack(&s->nroots, offset, lb, ub);
@@ -2388,7 +2475,9 @@ stack: {
                 goto mark;
             }
             s = (jl_gcframe_t*)gc_read_stack(&s->prev, offset, lb, ub);
+            // walk up one stack frame
             if (s != 0) {
+                gc_heap_snapshot_record_frame_to_frame_edge(stack->s, s);
                 stack->s = s;
                 i = 0;
                 uintptr_t new_nroots = gc_read_stack(&s->nroots, offset, lb, ub);
@@ -2420,6 +2509,7 @@ excstack: {
                 size_t njlvals = jl_bt_num_jlvals(bt_entry);
                 while (jlval_index < njlvals) {
                     new_obj = jl_bt_entry_jlvalue(bt_entry, jlval_index);
+                    gc_heap_snapshot_record_frame_to_object_edge(bt_entry, new_obj);
                     uintptr_t nptr = 0;
                     jlval_index += 1;
                     if (gc_try_setmark(new_obj, &nptr, &tag, &bits)) {
@@ -2434,6 +2524,7 @@ excstack: {
             }
             // The exception comes last - mark it
             new_obj = jl_excstack_exception(excstack, itr);
+            gc_heap_snapshot_record_frame_to_object_edge(excstack, new_obj);
             itr = jl_excstack_next(excstack, itr);
             bt_index = 0;
             jlval_index = 0;
@@ -2472,7 +2563,19 @@ module_binding: {
             }
             void *vb = jl_astaggedvalue(b);
             verify_parent1("module", binding->parent, &vb, "binding_buff");
+            // Record the size used for the box for non-const bindings
+            gc_heap_snapshot_record_module_to_binding(binding->parent, b);
             (void)vb;
+            jl_value_t *ty = jl_atomic_load_relaxed(&b->ty);
+            if (ty && ty != (jl_value_t*)jl_any_type) {
+                verify_parent2("module", binding->parent,
+                               &b->ty, "binding(%s)", jl_symbol_name(b->name));
+                if (gc_try_setmark(ty, &binding->nptr, &tag, &bits)) {
+                    new_obj = ty;
+                    gc_repush_markdata(&sp, gc_mark_binding_t);
+                    goto mark;
+                }
+            }
             jl_value_t *value = jl_atomic_load_relaxed(&b->value);
             jl_value_t *globalref = jl_atomic_load_relaxed(&b->globalref);
             if (value) {
@@ -2545,6 +2648,8 @@ finlist: {
                 begin++;
                 assert(begin < end);
             }
+            if (gc_ptr_tag(new_obj, 2))
+                continue;
             uintptr_t nptr = 0;
             if (!gc_try_setmark(new_obj, &nptr, &tag, &bits))
                 continue;
@@ -2609,6 +2714,7 @@ mark: {
             if (flags.how == 1) {
                 void *val_buf = jl_astaggedvalue((char*)a->data - a->offset * a->elsize);
                 verify_parent1("array", new_obj, &val_buf, "buffer ('loc' addr is meaningless)");
+                gc_heap_snapshot_record_hidden_edge(new_obj, jl_valueof(val_buf), jl_array_nbytes(a));
                 (void)val_buf;
                 gc_setmark_buf_(ptls, (char*)a->data - a->offset * a->elsize,
                                 bits, jl_array_nbytes(a));
@@ -2617,6 +2723,7 @@ mark: {
                 if (update_meta || foreign_alloc) {
                     objprofile_count(jl_malloc_tag, bits == GC_OLD_MARKED,
                                      jl_array_nbytes(a));
+                    gc_heap_snapshot_record_hidden_edge(new_obj, a->data, jl_array_nbytes(a));
                     if (bits == GC_OLD_MARKED) {
                         ptls->gc_cache.perm_scanned_bytes += jl_array_nbytes(a);
                     }
@@ -2628,6 +2735,7 @@ mark: {
             else if (flags.how == 3) {
                 jl_value_t *owner = jl_array_data_owner(a);
                 uintptr_t nptr = (1 << 2) | (bits & GC_OLD);
+                gc_heap_snapshot_record_internal_array_edge(new_obj, owner);
                 int markowner = gc_try_setmark(owner, &nptr, &tag, &bits);
                 gc_mark_push_remset(ptls, new_obj, nptr);
                 if (markowner) {
@@ -2719,13 +2827,18 @@ mark: {
                 int16_t tid = jl_atomic_load_relaxed(&ta->tid);
                 gc_invoke_callbacks(jl_gc_cb_task_scanner_t,
                     gc_cblist_task_scanner,
-                    (ta, tid != -1 && ta == jl_all_tls_states[tid]->root_task));
+                    (ta, tid != -1 && ta == gc_all_tls_states[tid]->root_task));
                 import_gc_state(ptls, &sp);
             }
 #ifdef COPY_STACKS
             void *stkbuf = ta->stkbuf;
-            if (stkbuf && ta->copy_stack)
+            if (stkbuf && ta->copy_stack) {
                 gc_setmark_buf_(ptls, stkbuf, bits, ta->bufsz);
+                // For gc_heap_snapshot_record:
+                // TODO: attribute size of stack
+                // TODO: edge to stack data
+                // TODO: synthetic node for stack data (how big is it?)
+            }
 #endif
             jl_gcframe_t *s = ta->gcstack;
             size_t nroots;
@@ -2736,7 +2849,7 @@ mark: {
             if (stkbuf && ta->copy_stack && ta->ptls == NULL) {
                 int16_t tid = jl_atomic_load_relaxed(&ta->tid);
                 assert(tid >= 0);
-                jl_ptls_t ptls2 = jl_all_tls_states[tid];
+                jl_ptls_t ptls2 = gc_all_tls_states[tid];
                 ub = (uintptr_t)ptls2->stackbase;
                 lb = ub - ta->copy_stack;
                 offset = (uintptr_t)stkbuf - lb;
@@ -2744,12 +2857,15 @@ mark: {
 #endif
             if (s) {
                 nroots = gc_read_stack(&s->nroots, offset, lb, ub);
+                gc_heap_snapshot_record_task_to_frame_edge(ta, s);
+
                 assert(nroots <= UINT32_MAX);
                 gc_mark_stackframe_t stackdata = {s, 0, (uint32_t)nroots, offset, lb, ub};
                 gc_mark_stack_push(&ptls->gc_cache, &sp, gc_mark_laddr(stack),
                                    &stackdata, sizeof(stackdata), 1);
             }
             if (ta->excstack) {
+                gc_heap_snapshot_record_task_to_frame_edge(ta, ta->excstack);
                 gc_setmark_buf_(ptls, ta->excstack, bits, sizeof(jl_excstack_t) +
                                 sizeof(uintptr_t)*ta->excstack->reserved_size);
                 gc_mark_excstack_t stackdata = {ta->excstack, ta->excstack->top, 0, 0};
@@ -2846,14 +2962,31 @@ mark: {
 static void jl_gc_queue_thread_local(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp,
                                      jl_ptls_t ptls2)
 {
-    gc_mark_queue_obj(gc_cache, sp, jl_atomic_load_relaxed(&ptls2->current_task));
-    gc_mark_queue_obj(gc_cache, sp, ptls2->root_task);
-    if (ptls2->next_task)
-        gc_mark_queue_obj(gc_cache, sp, ptls2->next_task);
-    if (ptls2->previous_task) // shouldn't be necessary, but no reason not to
-        gc_mark_queue_obj(gc_cache, sp, ptls2->previous_task);
-    if (ptls2->previous_exception)
+    jl_task_t *task;
+    task = ptls2->root_task;
+    if (task) {
+        gc_mark_queue_obj(gc_cache, sp, task);
+        gc_heap_snapshot_record_root((jl_value_t*)task, "root task");
+    }
+    task = jl_atomic_load_relaxed(&ptls2->current_task);
+    if (task) {
+        gc_mark_queue_obj(gc_cache, sp, task);
+        gc_heap_snapshot_record_root((jl_value_t*)task, "current task");
+    }
+    task = ptls2->next_task;
+    if (task) {
+        gc_mark_queue_obj(gc_cache, sp, task);
+        gc_heap_snapshot_record_root((jl_value_t*)task, "next task");
+    }
+    task = ptls2->previous_task;
+    if (task) { // shouldn't be necessary, but no reason not to
+        gc_mark_queue_obj(gc_cache, sp, task);
+        gc_heap_snapshot_record_root((jl_value_t*)task, "previous task");
+    }
+    if (ptls2->previous_exception) {
         gc_mark_queue_obj(gc_cache, sp, ptls2->previous_exception);
+        gc_heap_snapshot_record_root((jl_value_t*)ptls2->previous_exception, "previous exception");
+    }
 }
 
 extern jl_value_t *cmpswap_names JL_GLOBALLY_ROOTED;
@@ -2863,6 +2996,7 @@ static void mark_roots(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp)
 {
     // modules
     gc_mark_queue_obj(gc_cache, sp, jl_main_module);
+    gc_heap_snapshot_record_root((jl_value_t*)jl_main_module, "main_module");
 
     // invisible builtin values
     if (jl_an_empty_vec_any != NULL)
@@ -2872,16 +3006,19 @@ static void mark_roots(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp)
     for (size_t i = 0; i < jl_current_modules.size; i += 2) {
         if (jl_current_modules.table[i + 1] != HT_NOTFOUND) {
             gc_mark_queue_obj(gc_cache, sp, jl_current_modules.table[i]);
+            gc_heap_snapshot_record_root((jl_value_t*)jl_current_modules.table[i], "top level module");
         }
     }
     gc_mark_queue_obj(gc_cache, sp, jl_anytuple_type_type);
     for (size_t i = 0; i < N_CALL_CACHE; i++) {
         jl_typemap_entry_t *v = jl_atomic_load_relaxed(&call_cache[i]);
-        if (v != NULL)
+        if (v != NULL) {
             gc_mark_queue_obj(gc_cache, sp, v);
+        }
     }
-    if (jl_all_methods != NULL)
+    if (jl_all_methods != NULL) {
         gc_mark_queue_obj(gc_cache, sp, jl_all_methods);
+    }
     if (_jl_debug_method_invalidation != NULL)
         gc_mark_queue_obj(gc_cache, sp, _jl_debug_method_invalidation);
 
@@ -2901,17 +3038,25 @@ static void sweep_finalizer_list(arraylist_t *list)
     size_t j = 0;
     for (size_t i=0; i < len; i+=2) {
         void *v0 = items[i];
-        void *v = gc_ptr_clear_tag(v0, 1);
+        void *v = gc_ptr_clear_tag(v0, 3);
         if (__unlikely(!v0)) {
             // remove from this list
             continue;
         }
 
         void *fin = items[i+1];
-        int isfreed = !gc_marked(jl_astaggedvalue(v)->bits.gc);
-        int isold = (list != &finalizer_list_marked &&
+        int isfreed;
+        int isold;
+        if (gc_ptr_tag(v0, 2)) {
+            isfreed = 1;
+            isold = 0;
+        }
+        else {
+            isfreed = !gc_marked(jl_astaggedvalue(v)->bits.gc);
+            isold = (list != &finalizer_list_marked &&
                      jl_astaggedvalue(v)->bits.gc == GC_OLD_MARKED &&
                      jl_astaggedvalue(fin)->bits.gc == GC_OLD_MARKED);
+        }
         if (isfreed || isold) {
             // remove from this list
         }
@@ -3094,16 +3239,24 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     uint64_t start_mark_time = jl_hrtime();
 
     // 1. fix GC bits of objects in the remset.
-    for (int t_i = 0; t_i < jl_n_threads; t_i++)
-        jl_gc_premark(jl_all_tls_states[t_i]);
+    assert(gc_n_threads);
+    for (int t_i = 0; t_i < gc_n_threads; t_i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+        if (ptls2 != NULL)
+            jl_gc_premark(ptls2);
+    }
 
-    for (int t_i = 0; t_i < jl_n_threads; t_i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[t_i];
+    assert(gc_n_threads);
+    for (int t_i = 0; t_i < gc_n_threads; t_i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+        if (ptls2 == NULL)
+            continue;
         // 2.1. mark every object in the `last_remsets` and `rem_binding`
         jl_gc_queue_remset(gc_cache, &sp, ptls2);
         // 2.2. mark every thread local root
         jl_gc_queue_thread_local(gc_cache, &sp, ptls2);
         // 2.3. mark any managed objects in the backtrace buffer
+        // TODO: treat these as roots for gc_heap_snapshot_record
         jl_gc_queue_bt_buf(gc_cache, &sp, ptls2);
     }
 
@@ -3134,16 +3287,22 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     // mark the object moved to the marked list from the
     // `finalizer_list` by `sweep_finalizer_list`
     size_t orig_marked_len = finalizer_list_marked.len;
-    for (int i = 0;i < jl_n_threads;i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[i];
+    assert(gc_n_threads);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 == NULL)
+            continue;
         sweep_finalizer_list(&ptls2->finalizers);
     }
     if (prev_sweep_full) {
         sweep_finalizer_list(&finalizer_list_marked);
         orig_marked_len = 0;
     }
-    for (int i = 0;i < jl_n_threads;i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[i];
+    assert(gc_n_threads);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 == NULL)
+            continue;
         gc_mark_queue_finlist(gc_cache, &sp, &ptls2->finalizers, 0);
     }
     gc_mark_queue_finlist(gc_cache, &sp, &finalizer_list_marked, orig_marked_len);
@@ -3182,8 +3341,13 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     // 5. next collection decision
     int not_freed_enough = (collection == JL_GC_AUTO) && estimate_freed < (7*(actual_allocd/10));
     int nptr = 0;
-    for (int i = 0;i < jl_n_threads;i++)
-        nptr += jl_all_tls_states[i]->heap.remset_nptr;
+    assert(gc_n_threads);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 == NULL)
+            continue;
+        nptr += ptls2->heap.remset_nptr;
+    }
 
     // many pointers in the intergen frontier => "quick" mark is not quick
     int large_frontier = nptr*sizeof(void*) >= default_collect_interval;
@@ -3198,9 +3362,16 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
         if (large_frontier) {
             sweep_full = 1;
         }
-        if (gc_num.interval > max_collect_interval) {
+        size_t maxmem = 0;
+#ifdef _P64
+        // on a big memory machine, increase max_collect_interval to totalmem / nthreads / 2
+        maxmem = total_mem / gc_n_threads / 2;
+#endif
+        if (maxmem < max_collect_interval)
+            maxmem = max_collect_interval;
+        if (gc_num.interval > maxmem) {
             sweep_full = 1;
-            gc_num.interval = max_collect_interval;
+            gc_num.interval = maxmem;
         }
     }
 
@@ -3214,7 +3385,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     if (gc_sweep_always_full) {
         sweep_full = 1;
     }
-    if (collection == JL_GC_FULL) {
+    if (collection == JL_GC_FULL && !prev_sweep_full) {
         sweep_full = 1;
         recollect = 1;
     }
@@ -3248,8 +3419,11 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     // sweeping is over
     // 6. if it is a quick sweep, put back the remembered objects in queued state
     // so that we don't trigger the barrier again on them.
-    for (int t_i = 0;t_i < jl_n_threads;t_i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[t_i];
+    assert(gc_n_threads);
+    for (int t_i = 0; t_i < gc_n_threads; t_i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+        if (ptls2 == NULL)
+            continue;
         if (!sweep_full) {
             for (int i = 0; i < ptls2->heap.remset->len; i++) {
                 jl_astaggedvalue(ptls2->heap.remset->items[i])->bits.gc = GC_MARKED;
@@ -3361,9 +3535,17 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
 #endif
     // Now we are ready to wait for other threads to hit the safepoint,
     // we can do a few things that doesn't require synchronization.
-    // TODO (concurrently queue objects)
-    // no-op for non-threading
-    jl_gc_wait_for_the_world();
+    //
+    // We must sync here with the tls_lock operations, so that we have a
+    // seq-cst order between these events now we know that either the new
+    // thread must run into our safepoint flag or we must observe the
+    // existence of the thread in the jl_n_threads count.
+    //
+    // TODO: concurrently queue objects
+    jl_fence();
+    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
+    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
+    jl_gc_wait_for_the_world(gc_all_tls_states, gc_n_threads);
     JL_PROBE_GC_STOP_THE_WORLD();
 
     uint64_t t1 = jl_hrtime();
@@ -3386,7 +3568,8 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
         JL_UNLOCK_NOGC(&finalizers_lock);
     }
 
-    // no-op for non-threading
+    gc_n_threads = 0;
+    gc_all_tls_states = NULL;
     jl_safepoint_end_gc();
     jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING);
     JL_PROBE_GC_END();
@@ -3395,10 +3578,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
     // Doing this on all threads is racy (it's impossible to check
     // or wait for finalizers on other threads without dead lock).
     if (!ptls->finalizers_inhibited && ptls->locks.len == 0) {
-        int8_t was_in_finalizer = ptls->in_finalizer;
-        ptls->in_finalizer = 1;
         run_finalizers(ct);
-        ptls->in_finalizer = was_in_finalizer;
     }
     JL_PROBE_GC_FINALIZER();
 
@@ -3413,8 +3593,12 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
 void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_mark_sp_t *sp)
 {
     jl_gc_mark_cache_t *gc_cache = &ptls->gc_cache;
-    for (size_t i = 0; i < jl_n_threads; i++)
-        jl_gc_queue_thread_local(gc_cache, sp, jl_all_tls_states[i]);
+    assert(gc_n_threads);
+    for (size_t i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2)
+            jl_gc_queue_thread_local(gc_cache, sp, ptls2);
+    }
     mark_roots(gc_cache, sp);
 }
 
@@ -3428,8 +3612,6 @@ JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty)
 // Per-thread initialization
 void jl_init_thread_heap(jl_ptls_t ptls)
 {
-    if (ptls->tid == 0)
-        ptls->disable_gc = 1;
     jl_thread_heap_t *heap = &ptls->heap;
     jl_gc_pool_t *p = heap->norm_pools;
     for (int i = 0; i < JL_GC_N_POOLS; i++) {
@@ -3460,13 +3642,13 @@ void jl_init_thread_heap(jl_ptls_t ptls)
     gc_cache->data_stack = (jl_gc_mark_data_t *)malloc_s(init_size * sizeof(jl_gc_mark_data_t));
 
     memset(&ptls->gc_num, 0, sizeof(ptls->gc_num));
-    assert(gc_num.interval == default_collect_interval);
     jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval);
 }
 
 // System-wide initializations
 void jl_gc_init(void)
 {
+    JL_MUTEX_INIT(&heapsnapshot_lock);
     JL_MUTEX_INIT(&finalizers_lock);
     uv_mutex_init(&gc_cache_lock);
     uv_mutex_init(&gc_perm_lock);
@@ -3484,14 +3666,10 @@ void jl_gc_init(void)
     gc_num.max_memory = 0;
 
 #ifdef _P64
-    // on a big memory machine, set max_collect_interval to totalmem / nthreads / 2
-    uint64_t total_mem = uv_get_total_memory();
+    total_mem = uv_get_total_memory();
     uint64_t constrained_mem = uv_get_constrained_memory();
     if (constrained_mem != 0)
         total_mem = constrained_mem;
-    size_t maxmem = total_mem / jl_n_threads / 2;
-    if (maxmem > max_collect_interval)
-        max_collect_interval = maxmem;
 #endif
 
     // We allocate with abandon until we get close to the free memory on the machine.
@@ -3946,7 +4124,7 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p)
             goto valid_object;
         }
         jl_gc_pool_t *pool =
-            jl_all_tls_states[meta->thread_n]->heap.norm_pools +
+            gc_all_tls_states[meta->thread_n]->heap.norm_pools +
             meta->pool_n;
         if (meta->fl_begin_offset == (uint16_t) -1) {
             // case 2: this is a page on the newpages list
diff --git a/src/gc.h b/src/gc.h
index 00c3d48b52935..7b02df69abbc1 100644
--- a/src/gc.h
+++ b/src/gc.h
@@ -24,6 +24,7 @@
 #endif
 #endif
 #include "julia_assert.h"
+#include "gc-heap-snapshot.h"
 #include "gc-alloc-profiler.h"
 
 #ifdef __cplusplus
@@ -393,6 +394,8 @@ extern bigval_t *big_objects_marked;
 extern arraylist_t finalizer_list_marked;
 extern arraylist_t to_finalize;
 extern int64_t lazy_freed_pages;
+extern int gc_n_threads;
+extern jl_ptls_t* gc_all_tls_states;
 
 STATIC_INLINE bigval_t *bigval_header(jl_taggedvalue_t *o) JL_NOTSAFEPOINT
 {
@@ -646,8 +649,10 @@ extern int gc_verifying;
 #define verify_parent2(ty,obj,slot,arg1,arg2) do {} while (0)
 #define gc_verifying (0)
 #endif
-int gc_slot_to_fieldidx(void *_obj, void *slot);
-int gc_slot_to_arrayidx(void *_obj, void *begin);
+
+
+int gc_slot_to_fieldidx(void *_obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT;
+int gc_slot_to_arrayidx(void *_obj, void *begin) JL_NOTSAFEPOINT;
 NOINLINE void gc_mark_loop_unwind(jl_ptls_t ptls, jl_gc_mark_sp_t sp, int pc_offset);
 
 #ifdef GC_DEBUG_ENV
diff --git a/src/gf.c b/src/gf.c
index 138092ab9c93e..1f896921d45f5 100644
--- a/src/gf.c
+++ b/src/gf.c
@@ -222,8 +222,6 @@ JL_DLLEXPORT jl_code_instance_t* jl_new_codeinst(
         int32_t const_flags, size_t min_world, size_t max_world,
         uint32_t ipo_effects, uint32_t effects, jl_value_t *argescapes,
         uint8_t relocatability);
-JL_DLLEXPORT void jl_mi_cache_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMENT,
-                                     jl_code_instance_t *ci JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED);
 
 jl_datatype_t *jl_mk_builtin_func(jl_datatype_t *dt, const char *name, jl_fptr_args_t fptr) JL_GC_DISABLED
 {
@@ -436,7 +434,8 @@ JL_DLLEXPORT void jl_mi_cache_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMEN
         JL_LOCK(&mi->def.method->writelock);
     jl_code_instance_t *oldci = jl_atomic_load_relaxed(&mi->cache);
     jl_atomic_store_relaxed(&ci->next, oldci);
-    jl_gc_wb(ci, oldci); // likely older, but just being careful
+    if (oldci)
+        jl_gc_wb(ci, oldci);
     jl_atomic_store_release(&mi->cache, ci);
     jl_gc_wb(mi, ci);
     if (jl_is_method(mi->def.method))
@@ -872,10 +871,11 @@ JL_DLLEXPORT int jl_isa_compileable_sig(
         unsigned nspec_min = nargs + 1; // min number of non-vararg values before vararg
         unsigned nspec_max = INT32_MAX; // max number of non-vararg values before vararg
         jl_methtable_t *mt = jl_method_table_for(decl);
+        jl_methtable_t *kwmt = mt == jl_kwcall_mt ? jl_kwmethod_table_for(decl) : mt;
         if ((jl_value_t*)mt != jl_nothing) {
             // try to refine estimate of min and max
-            if (mt != jl_type_type_mt && mt != jl_nonfunction_mt)
-                nspec_min = mt->max_args + 2;
+            if (kwmt && kwmt != jl_type_type_mt && kwmt != jl_nonfunction_mt && kwmt != jl_kwcall_mt)
+                nspec_min = kwmt->max_args + 2 + 2 * (mt == jl_kwcall_mt);
             else
                 nspec_max = nspec_min;
         }
@@ -1081,7 +1081,8 @@ static jl_method_instance_t *cache_method(
 
     int cache_with_orig = 1;
     jl_tupletype_t *compilationsig = tt;
-    intptr_t nspec = (mt == NULL || mt == jl_type_type_mt || mt == jl_nonfunction_mt ? definition->nargs + 1 : mt->max_args + 2);
+    jl_methtable_t *kwmt = mt == jl_kwcall_mt ? jl_kwmethod_table_for(definition->sig) : mt;
+    intptr_t nspec = (kwmt == NULL || kwmt == jl_type_type_mt || kwmt == jl_nonfunction_mt || kwmt == jl_kwcall_mt ? definition->nargs + 1 : kwmt->max_args + 2 + 2 * (mt == jl_kwcall_mt));
     jl_compilation_sig(tt, sparams, definition, nspec, &newparams);
     if (newparams) {
         compilationsig = jl_apply_tuple_type(newparams);
@@ -1334,7 +1335,9 @@ static void method_overwrite(jl_typemap_entry_t *newentry, jl_method_t *oldvalue
     jl_method_t *method = (jl_method_t*)newentry->func.method;
     jl_module_t *newmod = method->module;
     jl_module_t *oldmod = oldvalue->module;
-    jl_datatype_t *dt = jl_first_argument_datatype(oldvalue->sig);
+    jl_datatype_t *dt = jl_nth_argument_datatype(oldvalue->sig, 1);
+    if (dt == (jl_datatype_t*)jl_typeof(jl_kwcall_func))
+        dt = jl_nth_argument_datatype(oldvalue->sig, 3);
     int anon = dt && is_anonfn_typename(jl_symbol_name(dt->name->name));
     if ((jl_options.warn_overwrite == JL_OPTIONS_WARN_OVERWRITE_ON) ||
         (jl_options.incremental && jl_generating_output()) || anon) {
@@ -1359,7 +1362,7 @@ static void method_overwrite(jl_typemap_entry_t *newentry, jl_method_t *oldvalue
 
 static void update_max_args(jl_methtable_t *mt, jl_value_t *type)
 {
-    if (mt == jl_type_type_mt || mt == jl_nonfunction_mt)
+    if (mt == jl_type_type_mt || mt == jl_nonfunction_mt || mt == jl_kwcall_mt)
         return;
     type = jl_unwrap_unionall(type);
     assert(jl_is_datatype(type));
@@ -1434,6 +1437,7 @@ static void invalidate_method_instance(void (*f)(jl_code_instance_t*), jl_method
         jl_array_ptr_1d_push(_jl_debug_method_invalidation, boxeddepth);
         JL_GC_POP();
     }
+    //jl_static_show(JL_STDERR, (jl_value_t*)replaced);
     if (!jl_is_method(replaced->def.method))
         return; // shouldn't happen, but better to be safe
     JL_LOCK(&replaced->def.method->writelock);
@@ -1466,10 +1470,11 @@ static void invalidate_method_instance(void (*f)(jl_code_instance_t*), jl_method
 }
 
 // invalidate cached methods that overlap this definition
-void invalidate_backedges(void (*f)(jl_code_instance_t*), jl_method_instance_t *replaced_mi, size_t max_world, const char *why)
+static void invalidate_backedges(void (*f)(jl_code_instance_t*), jl_method_instance_t *replaced_mi, size_t max_world, const char *why)
 {
     JL_LOCK(&replaced_mi->def.method->writelock);
     jl_array_t *backedges = replaced_mi->backedges;
+    //jl_static_show(JL_STDERR, (jl_value_t*)replaced_mi);
     if (backedges) {
         // invalidate callers (if any)
         replaced_mi->backedges = NULL;
@@ -2069,8 +2074,7 @@ static void record_precompile_statement(jl_method_instance_t *mi)
     if (!jl_is_method(def))
         return;
 
-    if (jl_n_threads > 1)
-        JL_LOCK(&precomp_statement_out_lock);
+    JL_LOCK(&precomp_statement_out_lock);
     if (s_precompile == NULL) {
         const char *t = jl_options.trace_compile;
         if (!strncmp(t, "stderr", 6)) {
@@ -2089,8 +2093,7 @@ static void record_precompile_statement(jl_method_instance_t *mi)
         if (s_precompile != JL_STDERR)
             ios_flush(&f_precompile);
     }
-    if (jl_n_threads > 1)
-        JL_UNLOCK(&precomp_statement_out_lock);
+    JL_UNLOCK(&precomp_statement_out_lock);
 }
 
 jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t world)
@@ -2242,7 +2245,8 @@ JL_DLLEXPORT jl_value_t *jl_normalize_to_compilable_sig(jl_methtable_t *mt, jl_t
     jl_tupletype_t *tt = NULL;
     jl_svec_t *newparams = NULL;
     JL_GC_PUSH2(&tt, &newparams);
-    intptr_t nspec = (mt == jl_type_type_mt || mt == jl_nonfunction_mt ? m->nargs + 1 : mt->max_args + 2);
+    jl_methtable_t *kwmt = mt == jl_kwcall_mt ? jl_kwmethod_table_for(m->sig) : mt;
+    intptr_t nspec = (kwmt == NULL || kwmt == jl_type_type_mt || kwmt == jl_nonfunction_mt || kwmt == jl_kwcall_mt ? m->nargs + 1 : kwmt->max_args + 2 + 2 * (mt == jl_kwcall_mt));
     jl_compilation_sig(ti, env, m, nspec, &newparams);
     tt = (newparams ? jl_apply_tuple_type(newparams) : ti);
     int is_compileable = ((jl_datatype_t*)ti)->isdispatchtuple ||
@@ -2278,7 +2282,7 @@ jl_method_instance_t *jl_get_specialization1(jl_tupletype_t *types JL_PROPAGATES
     jl_tupletype_t *ti = match->spec_types;
     jl_method_instance_t *nf = NULL;
     if (jl_is_datatype(ti)) {
-        jl_methtable_t *mt = jl_method_table_for((jl_value_t*)ti);
+        jl_methtable_t *mt = jl_method_get_table(m);
         if ((jl_value_t*)mt != jl_nothing) {
             // get the specialization without caching it
             if (mt_cache && ((jl_datatype_t*)ti)->isdispatchtuple) {
@@ -2758,39 +2762,6 @@ jl_function_t *jl_new_generic_function_with_supertype(jl_sym_t *name, jl_module_
     return (jl_function_t*)f;
 }
 
-JL_DLLEXPORT jl_function_t *jl_get_kwsorter(jl_value_t *ty)
-{
-    jl_methtable_t *mt = jl_argument_method_table(ty);
-    if ((jl_value_t*)mt == jl_nothing)
-        jl_error("cannot get keyword sorter for abstract type");
-    if (!mt->kwsorter) {
-        JL_LOCK(&mt->writelock);
-        if (!mt->kwsorter) {
-            char *name;
-            if (mt == jl_nonfunction_mt) {
-                name = jl_symbol_name(mt->name);
-            }
-            else {
-                jl_datatype_t *dt = (jl_datatype_t*)jl_argument_datatype(ty);
-                assert(jl_is_datatype(dt));
-                name = jl_symbol_name(dt->name->name);
-                if (name[0] == '#')
-                    name++;
-            }
-            size_t l = strlen(name);
-            char *suffixed = (char*)malloc_s(l+5);
-            strcpy(&suffixed[0], name);
-            strcpy(&suffixed[l], "##kw");
-            jl_sym_t *fname = jl_symbol(suffixed);
-            free(suffixed);
-            mt->kwsorter = jl_new_generic_function_with_supertype(fname, mt->module, jl_function_type);
-            jl_gc_wb(mt, mt->kwsorter);
-        }
-        JL_UNLOCK(&mt->writelock);
-    }
-    return mt->kwsorter;
-}
-
 jl_function_t *jl_new_generic_function(jl_sym_t *name, jl_module_t *module)
 {
     return jl_new_generic_function_with_supertype(name, module, jl_function_type);
diff --git a/src/init.c b/src/init.c
index b42aea8bd4883..54cce84c763af 100644
--- a/src/init.c
+++ b/src/init.c
@@ -201,14 +201,18 @@ void jl_task_frame_noreturn(jl_task_t *ct);
 
 JL_DLLEXPORT void jl_atexit_hook(int exitcode)
 {
-    if (jl_all_tls_states == NULL)
+    if (jl_atomic_load_relaxed(&jl_all_tls_states) == NULL)
         return;
 
-    jl_task_t *ct = jl_current_task;
+    jl_task_t *ct = jl_get_current_task();
 
     // we are about to start tearing everything down, so lets try not to get
     // upset by the local mess of things when we run the user's _atexit hooks
-    jl_task_frame_noreturn(ct);
+    if (ct)
+        jl_task_frame_noreturn(ct);
+
+    if (ct == NULL && jl_base_module)
+        ct = container_of(jl_adopt_thread(), jl_task_t, gcstack);
 
     if (exitcode == 0)
         jl_write_compiler_output();
@@ -217,10 +221,16 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode)
         jl_write_coverage_data(jl_options.output_code_coverage);
     if (jl_options.malloc_log)
         jl_write_malloc_log();
+
+    int8_t old_state;
+    if (ct)
+        old_state = jl_gc_unsafe_enter(ct->ptls);
+
     if (jl_base_module) {
         jl_value_t *f = jl_get_global(jl_base_module, jl_symbol("_atexit"));
         if (f != NULL) {
             JL_TRY {
+                assert(ct);
                 size_t last_age = ct->world_age;
                 ct->world_age = jl_get_world_counter();
                 jl_apply(&f, 1);
@@ -240,7 +250,8 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode)
     JL_STDOUT = (uv_stream_t*) STDOUT_FILENO;
     JL_STDERR = (uv_stream_t*) STDERR_FILENO;
 
-    jl_gc_run_all_finalizers(ct);
+    if (ct)
+        jl_gc_run_all_finalizers(ct);
 
     uv_loop_t *loop = jl_global_event_loop();
     if (loop != NULL) {
@@ -248,7 +259,7 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode)
         JL_UV_LOCK();
         uv_walk(loop, jl_uv_exitcleanup_walk, &queue);
         struct uv_shutdown_queue_item *item = queue.first;
-        if (ct != NULL) {
+        if (ct) {
             while (item) {
                 JL_TRY {
                     while (item) {
@@ -289,11 +300,13 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode)
 #endif
 
     jl_teardown_codegen();
+    if (ct)
+        jl_gc_unsafe_leave(ct->ptls, old_state);
 }
 
 JL_DLLEXPORT void jl_postoutput_hook(void)
 {
-    if (jl_all_tls_states == NULL)
+    if (jl_atomic_load_relaxed(&jl_all_tls_states) == NULL)
         return;
 
     if (jl_base_module) {
@@ -772,7 +785,7 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_
 
     if (jl_base_module == NULL) {
         // nthreads > 1 requires code in Base
-        jl_n_threads = 1;
+        jl_atomic_store_relaxed(&jl_n_threads, 1);
     }
     jl_start_threads();
 
@@ -841,6 +854,9 @@ static void post_boot_hooks(void)
     jl_loaderror_type      = (jl_datatype_t*)core("LoadError");
     jl_initerror_type      = (jl_datatype_t*)core("InitError");
     jl_pair_type           = core("Pair");
+    jl_kwcall_func         = core("kwcall");
+    jl_kwcall_mt           = ((jl_datatype_t*)jl_typeof(jl_kwcall_func))->name->mt;
+    jl_kwcall_mt->max_args = 0;
 
     jl_weakref_type = (jl_datatype_t*)core("WeakRef");
     jl_vecelement_typename = ((jl_datatype_t*)jl_unwrap_unionall(core("VecElement")))->name;
diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp
index a0dd11e7f009e..e612c39ca97d2 100644
--- a/src/jitlayers.cpp
+++ b/src/jitlayers.cpp
@@ -14,6 +14,7 @@
 #if JL_LLVM_VERSION >= 130000
 #include <llvm/ExecutionEngine/Orc/ExecutorProcessControl.h>
 #endif
+#include <llvm/IR/Verifier.h>
 #include <llvm/Support/DynamicLibrary.h>
 #include <llvm/Support/FormattedStream.h>
 #include <llvm/Support/SmallVectorMemoryBuffer.h>
@@ -1106,7 +1107,9 @@ namespace {
                 JL_TIMING(LLVM_OPT);
 
                 //Run the optimization
+                assert(!verifyModule(M, &errs()));
                 (***PMs).run(M);
+                assert(!verifyModule(M, &errs()));
 
                 uint64_t end_time = 0;
                 {
diff --git a/src/jl_exported_data.inc b/src/jl_exported_data.inc
index eae13a4ff285e..6f0671ef0d6f7 100644
--- a/src/jl_exported_data.inc
+++ b/src/jl_exported_data.inc
@@ -55,6 +55,7 @@
     XX(jl_interconditional_type) \
     XX(jl_interrupt_exception) \
     XX(jl_intrinsic_type) \
+    XX(jl_kwcall_func) \
     XX(jl_lineinfonode_type) \
     XX(jl_linenumbernode_type) \
     XX(jl_llvmpointer_type) \
@@ -130,7 +131,7 @@
 // Data symbols that are defined inside the public libjulia
 #define JL_EXPORTED_DATA_SYMBOLS(XX) \
     XX(jl_n_threadpools, int) \
-    XX(jl_n_threads, int) \
+    XX(jl_n_threads, _Atomic(int)) \
     XX(jl_options, jl_options_t) \
 
 // end of file
diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc
index b17251d4a5af3..80b56f735b68b 100644
--- a/src/jl_exported_funcs.inc
+++ b/src/jl_exported_funcs.inc
@@ -3,6 +3,7 @@
 #define JL_RUNTIME_EXPORTED_FUNCS(XX) \
     XX(jl_active_task_stack) \
     XX(jl_add_standard_imports) \
+    XX(jl_adopt_thread) \
     XX(jl_alignment) \
     XX(jl_alloc_array_1d) \
     XX(jl_alloc_array_2d) \
@@ -126,7 +127,6 @@
     XX(jl_environ) \
     XX(jl_eof_error) \
     XX(jl_eqtable_get) \
-    XX(jl_eqtable_nextind) \
     XX(jl_eqtable_pop) \
     XX(jl_eqtable_put) \
     XX(jl_errno) \
@@ -151,6 +151,7 @@
     XX(jl_gc_add_finalizer) \
     XX(jl_gc_add_finalizer_th) \
     XX(jl_gc_add_ptr_finalizer) \
+    XX(jl_gc_add_quiescent) \
     XX(jl_gc_allocobj) \
     XX(jl_gc_alloc_0w) \
     XX(jl_gc_alloc_1w) \
@@ -220,8 +221,6 @@
     XX(jl_get_JIT) \
     XX(jl_get_julia_bin) \
     XX(jl_get_julia_bindir) \
-    XX(jl_get_keyword_sorter) \
-    XX(jl_get_kwsorter) \
     XX(jl_get_method_inferred) \
     XX(jl_get_module_binding) \
     XX(jl_get_module_compile) \
diff --git a/src/jlapi.c b/src/jlapi.c
index d1fb1e5aacf25..53a6c9b3c6859 100644
--- a/src/jlapi.c
+++ b/src/jlapi.c
@@ -96,9 +96,15 @@ JL_DLLEXPORT void jl_init_with_image__threading(const char *julia_bindir,
     jl_init_with_image(julia_bindir, image_relative_path);
 }
 
+static void _jl_exception_clear(jl_task_t *ct) JL_NOTSAFEPOINT
+{
+    ct->ptls->previous_exception = NULL;
+}
+
 JL_DLLEXPORT jl_value_t *jl_eval_string(const char *str)
 {
     jl_value_t *r;
+    jl_task_t *ct = jl_current_task;
     JL_TRY {
         const char filename[] = "none";
         jl_value_t *ast = jl_parse_all(str, strlen(str),
@@ -106,10 +112,10 @@ JL_DLLEXPORT jl_value_t *jl_eval_string(const char *str)
         JL_GC_PUSH1(&ast);
         r = jl_toplevel_eval_in(jl_main_module, ast);
         JL_GC_POP();
-        jl_exception_clear();
+        _jl_exception_clear(ct);
     }
     JL_CATCH {
-        jl_current_task->ptls->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception();
         r = NULL;
     }
     return r;
@@ -128,7 +134,7 @@ JL_DLLEXPORT jl_value_t *jl_exception_occurred(void)
 
 JL_DLLEXPORT void jl_exception_clear(void)
 {
-    jl_current_task->ptls->previous_exception = NULL;
+    _jl_exception_clear(jl_current_task);
 }
 
 // get the name of a type as a string
@@ -181,7 +187,7 @@ JL_DLLEXPORT jl_value_t *jl_call(jl_function_t *f, jl_value_t **args, uint32_t n
         v = jl_apply(argv, nargs);
         ct->world_age = last_age;
         JL_GC_POP();
-        jl_exception_clear();
+        _jl_exception_clear(ct);
     }
     JL_CATCH {
         ct->ptls->previous_exception = jl_current_exception();
@@ -201,7 +207,7 @@ JL_DLLEXPORT jl_value_t *jl_call0(jl_function_t *f)
         v = jl_apply_generic(f, NULL, 0);
         ct->world_age = last_age;
         JL_GC_POP();
-        jl_exception_clear();
+        _jl_exception_clear(ct);
     }
     JL_CATCH {
         ct->ptls->previous_exception = jl_current_exception();
@@ -224,7 +230,7 @@ JL_DLLEXPORT jl_value_t *jl_call1(jl_function_t *f, jl_value_t *a)
         v = jl_apply(argv, 2);
         ct->world_age = last_age;
         JL_GC_POP();
-        jl_exception_clear();
+        _jl_exception_clear(ct);
     }
     JL_CATCH {
         ct->ptls->previous_exception = jl_current_exception();
@@ -248,7 +254,7 @@ JL_DLLEXPORT jl_value_t *jl_call2(jl_function_t *f, jl_value_t *a, jl_value_t *b
         v = jl_apply(argv, 3);
         ct->world_age = last_age;
         JL_GC_POP();
-        jl_exception_clear();
+        _jl_exception_clear(ct);
     }
     JL_CATCH {
         ct->ptls->previous_exception = jl_current_exception();
@@ -261,6 +267,7 @@ JL_DLLEXPORT jl_value_t *jl_call3(jl_function_t *f, jl_value_t *a,
                                   jl_value_t *b, jl_value_t *c)
 {
     jl_value_t *v;
+    jl_task_t *ct = jl_current_task;
     JL_TRY {
         jl_value_t **argv;
         JL_GC_PUSHARGS(argv, 4);
@@ -268,16 +275,15 @@ JL_DLLEXPORT jl_value_t *jl_call3(jl_function_t *f, jl_value_t *a,
         argv[1] = a;
         argv[2] = b;
         argv[3] = c;
-        jl_task_t *ct = jl_current_task;
         size_t last_age = ct->world_age;
         ct->world_age = jl_get_world_counter();
         v = jl_apply(argv, 4);
         ct->world_age = last_age;
         JL_GC_POP();
-        jl_exception_clear();
+        _jl_exception_clear(ct);
     }
     JL_CATCH {
-        jl_current_task->ptls->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception();
         v = NULL;
     }
     return v;
@@ -560,8 +566,8 @@ static NOINLINE int true_main(int argc, char *argv[])
         (jl_function_t*)jl_get_global(jl_base_module, jl_symbol("_start")) : NULL;
 
     if (start_client) {
+        jl_task_t *ct = jl_current_task;
         JL_TRY {
-            jl_task_t *ct = jl_current_task;
             size_t last_age = ct->world_age;
             ct->world_age = jl_get_world_counter();
             jl_apply(&start_client, 1);
diff --git a/src/jltypes.c b/src/jltypes.c
index 7c527450b4cd3..76e13ef485902 100644
--- a/src/jltypes.c
+++ b/src/jltypes.c
@@ -2084,17 +2084,17 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_methtable_type->name->mt = jl_nonfunction_mt;
     jl_methtable_type->super = jl_any_type;
     jl_methtable_type->parameters = jl_emptysvec;
-    jl_methtable_type->name->n_uninitialized = 12 - 5;
-    jl_methtable_type->name->names = jl_perm_symsvec(12, "name", "defs",
+    jl_methtable_type->name->n_uninitialized = 11 - 6;
+    jl_methtable_type->name->names = jl_perm_symsvec(11, "name", "defs",
                                                      "leafcache", "cache", "max_args",
-                                                     "kwsorter", "module",
-                                                     "backedges", "", "", "offs", "");
-    jl_methtable_type->types = jl_svec(12, jl_symbol_type, jl_any_type, jl_any_type,
+                                                     "module", "backedges",
+                                                     "", "", "offs", "");
+    jl_methtable_type->types = jl_svec(11, jl_symbol_type, jl_any_type, jl_any_type,
                                        jl_any_type, jl_any_type/*jl_long*/,
-                                       jl_any_type, jl_any_type/*module*/,
-                                       jl_any_type/*any vector*/, jl_any_type/*voidpointer*/, jl_any_type/*int32*/,
+                                       jl_any_type/*module*/, jl_any_type/*any vector*/,
+                                       jl_any_type/*voidpointer*/, jl_any_type/*int32*/,
                                        jl_any_type/*uint8*/, jl_any_type/*uint8*/);
-    const static uint32_t methtable_constfields[1] = { 0x00000040 }; // (1<<6);
+    const static uint32_t methtable_constfields[1] = { 0x00000020 }; // (1<<5);
     jl_methtable_type->name->constfields = methtable_constfields;
     jl_precompute_memoized_dt(jl_methtable_type, 1);
 
@@ -2551,8 +2551,8 @@ void jl_init_types(void) JL_GC_DISABLED
                             //"absolute_max",
                             "ipo_purity_bits", "purity_bits",
                             "argescapes",
-                            "isspecsig", "precompile", "invoke", "specptr", // function object decls
-                            "relocatability"),
+                            "isspecsig", "precompile", "relocatability",
+                            "invoke", "specptr"), // function object decls
                         jl_svec(15,
                             jl_method_instance_type,
                             jl_any_type,
@@ -2567,13 +2567,13 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_any_type,
                             jl_bool_type,
                             jl_bool_type,
-                            jl_any_type, jl_any_type, // fptrs
-                            jl_uint8_type),
+                            jl_uint8_type,
+                            jl_any_type, jl_any_type), // fptrs
                         jl_emptysvec,
                         0, 1, 1);
     jl_svecset(jl_code_instance_type->types, 1, jl_code_instance_type);
-    const static uint32_t code_instance_constfields[1] = { 0b000001010111101 }; // Set fields 1, 3-6, 8, 10 as const
-    const static uint32_t code_instance_atomicfields[1] = { 0b011100101000010 }; // Set fields 2, 7, 9, 12-14 as atomic
+    const static uint32_t code_instance_constfields[1]  = { 0b000001010110001 }; // Set fields 1, 5-6, 8, 10 as const
+    const static uint32_t code_instance_atomicfields[1] = { 0b110100101000010 }; // Set fields 2, 7, 9, 12, 14-15 as atomic
     //Fields 11 and 15 must be protected by locks, and thus all operations on jl_code_instance_t are threadsafe
     jl_code_instance_type->name->constfields = code_instance_constfields;
     jl_code_instance_type->name->atomicfields = code_instance_atomicfields;
@@ -2722,16 +2722,16 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_svecset(jl_typename_type->types, 13, jl_uint8_type);
     jl_svecset(jl_typename_type->types, 14, jl_uint8_type);
     jl_svecset(jl_methtable_type->types, 4, jl_long_type);
-    jl_svecset(jl_methtable_type->types, 6, jl_module_type);
-    jl_svecset(jl_methtable_type->types, 7, jl_array_any_type);
-    jl_svecset(jl_methtable_type->types, 8, jl_long_type); // voidpointer
-    jl_svecset(jl_methtable_type->types, 9, jl_long_type); // uint32_t plus alignment
+    jl_svecset(jl_methtable_type->types, 5, jl_module_type);
+    jl_svecset(jl_methtable_type->types, 6, jl_array_any_type);
+    jl_svecset(jl_methtable_type->types, 7, jl_long_type); // voidpointer
+    jl_svecset(jl_methtable_type->types, 8, jl_long_type); // uint32_t plus alignment
+    jl_svecset(jl_methtable_type->types, 9, jl_uint8_type);
     jl_svecset(jl_methtable_type->types, 10, jl_uint8_type);
-    jl_svecset(jl_methtable_type->types, 11, jl_uint8_type);
     jl_svecset(jl_method_type->types, 12, jl_method_instance_type);
     jl_svecset(jl_method_instance_type->types, 6, jl_code_instance_type);
-    jl_svecset(jl_code_instance_type->types, 12, jl_voidpointer_type);
     jl_svecset(jl_code_instance_type->types, 13, jl_voidpointer_type);
+    jl_svecset(jl_code_instance_type->types, 14, jl_voidpointer_type);
 
     jl_compute_field_offsets(jl_datatype_type);
     jl_compute_field_offsets(jl_typename_type);
diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm
index 8af1bc8b80d23..d774269a92033 100644
--- a/src/julia-syntax.scm
+++ b/src/julia-syntax.scm
@@ -1662,7 +1662,7 @@
   (define (kwcall-unless-empty f pa kw-container-test kw-container)
     `(if (call (top isempty) ,kw-container-test)
          (call ,f ,@pa)
-         (call (call (core kwfunc) ,f) ,kw-container ,f ,@pa)))
+         (call (core kwcall) ,kw-container ,f ,@pa)))
 
   (let ((f            (if (sym-ref? fexpr) fexpr (make-ssavalue)))
         (kw-container (make-ssavalue)))
@@ -1676,7 +1676,7 @@
                                            #t))
       ,(if (every vararg? kw)
            (kwcall-unless-empty f pa kw-container kw-container)
-           `(call (call (core kwfunc) ,f) ,kw-container ,f ,@pa)))))
+           `(call (core kwcall) ,kw-container ,f ,@pa)))))
 
 ;; convert `a+=b` to `a=a+b`
 (define (expand-update-operator- op op= lhs rhs declT)
@@ -2910,18 +2910,17 @@
          ,(construct-loops (reverse itrs) (reverse iv))
          ,result)))))
 
-(define (lhs-vars e)
-  (cond ((symdecl? e)   (list (decl-var e)))
-        ((and (pair? e) (eq? (car e) 'tuple))
-         (apply append (map lhs-vars (cdr e))))
-        (else '())))
-
 (define (lhs-decls e)
   (cond ((symdecl? e)   (list e))
-        ((and (pair? e) (eq? (car e) 'tuple))
+        ((and (pair? e)
+              (or (eq? (car e) 'tuple)
+                  (eq? (car e) 'parameters)))
          (apply append (map lhs-decls (cdr e))))
         (else '())))
 
+(define (lhs-vars e)
+  (map decl-var (lhs-decls e)))
+
 (define (all-decl-vars e)  ;; map decl-var over every level of an assignment LHS
   (cond ((eventually-call? e) e)
         ((decl? e)   (decl-var e))
@@ -3348,9 +3347,9 @@
          (let ((vi (get tab (cadr e) #f)))
            (if vi
                (vinfo:set-called! vi #t))
-           ;; calls to functions with keyword args go through `kwfunc` first
-           (if (and (length= e 3) (equal? (cadr e) '(core kwfunc)))
-               (let ((vi2 (get tab (caddr e) #f)))
+           ;; calls to functions with keyword args have head of `kwcall` first
+           (if (and (length> e 3) (equal? (cadr e) '(core kwcall)))
+               (let ((vi2 (get tab (cadddr e) #f)))
                  (if vi2
                      (vinfo:set-called! vi2 #t))))
            (for-each (lambda (x) (analyze-vars x env captvars sp tab))
diff --git a/src/julia.h b/src/julia.h
index 644ce0dbd78ae..211a8a1ab726c 100644
--- a/src/julia.h
+++ b/src/julia.h
@@ -433,6 +433,7 @@ typedef struct _jl_code_instance_t {
     // compilation state cache
     uint8_t isspecsig; // if specptr is a specialized function signature for specTypes->rettype
     _Atomic(uint8_t) precompile;  // if set, this will be added to the output system image
+    uint8_t relocatability;  // nonzero if all roots are built into sysimg or tagged by module key
     _Atomic(jl_callptr_t) invoke; // jlcall entry point
     union _jl_generic_specptr_t {
         _Atomic(void*) fptr;
@@ -441,7 +442,6 @@ typedef struct _jl_code_instance_t {
         _Atomic(jl_fptr_sparam_t) fptr3;
         // 4 interpreter
     } specptr; // private data for `jlcall entry point
-    uint8_t relocatability;  // nonzero if all roots are built into sysimg or tagged by module key
 } jl_code_instance_t;
 
 // all values are callable as Functions
@@ -656,7 +656,6 @@ typedef struct _jl_methtable_t {
     _Atomic(jl_array_t*) leafcache;
     _Atomic(jl_typemap_t*) cache;
     intptr_t max_args;  // max # of non-vararg arguments in a signature
-    jl_value_t *kwsorter;  // keyword argument sorter function
     jl_module_t *module; // used for incremental serialization to locate original binding
     jl_array_t *backedges; // (sig, caller::MethodInstance) pairs
     jl_mutex_t writelock;
@@ -806,6 +805,7 @@ extern JL_DLLIMPORT jl_value_t *jl_emptytuple JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_true JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_false JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_nothing JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_kwcall_func JL_GLOBALLY_ROOTED;
 
 // gc -------------------------------------------------------------------------
 
@@ -904,6 +904,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t);
 
 JL_DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_gc_add_quiescent(jl_ptls_t ptls, void **v, void *f) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_finalize(jl_value_t *o);
 JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value);
 JL_DLLEXPORT jl_value_t *jl_gc_alloc_0w(void);
@@ -1495,7 +1496,6 @@ JL_DLLEXPORT jl_method_t *jl_method_def(jl_svec_t *argdata, jl_methtable_t *mt,
 JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo);
 JL_DLLEXPORT jl_code_info_t *jl_copy_code_info(jl_code_info_t *src);
 JL_DLLEXPORT size_t jl_get_world_counter(void) JL_NOTSAFEPOINT;
-JL_DLLEXPORT jl_function_t *jl_get_kwsorter(jl_value_t *ty);
 JL_DLLEXPORT jl_value_t *jl_box_bool(int8_t x) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_box_int8(int8_t x) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_box_uint8(uint8_t x) JL_NOTSAFEPOINT;
@@ -1656,9 +1656,10 @@ STATIC_INLINE jl_function_t *jl_get_function(jl_module_t *m, const char *name)
 }
 
 // eq hash tables
-JL_DLLEXPORT jl_array_t *jl_eqtable_put(jl_array_t *h, jl_value_t *key, jl_value_t *val, int *inserted);
-JL_DLLEXPORT jl_value_t *jl_eqtable_get(jl_array_t *h, jl_value_t *key, jl_value_t *deflt) JL_NOTSAFEPOINT;
-jl_value_t *jl_eqtable_getkey(jl_array_t *h, jl_value_t *key, jl_value_t *deflt) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_array_t *jl_eqtable_put(jl_array_t *h JL_ROOTING_ARGUMENT, jl_value_t *key, jl_value_t *val JL_ROOTED_ARGUMENT, int *inserted);
+JL_DLLEXPORT jl_value_t *jl_eqtable_get(jl_array_t *h JL_PROPAGATES_ROOT, jl_value_t *key, jl_value_t *deflt) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_eqtable_pop(jl_array_t *h, jl_value_t *key, jl_value_t *deflt, int *found);
+jl_value_t *jl_eqtable_getkey(jl_array_t *h JL_PROPAGATES_ROOT, jl_value_t *key, jl_value_t *deflt) JL_NOTSAFEPOINT;
 
 // system information
 JL_DLLEXPORT int jl_errno(void) JL_NOTSAFEPOINT;
@@ -1673,7 +1674,7 @@ JL_DLLEXPORT jl_sym_t *jl_get_UNAME(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_sym_t *jl_get_ARCH(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_get_libllvm(void) JL_NOTSAFEPOINT;
 extern JL_DLLIMPORT int jl_n_threadpools;
-extern JL_DLLIMPORT int jl_n_threads;
+extern JL_DLLIMPORT _Atomic(int) jl_n_threads;
 extern JL_DLLIMPORT int *jl_n_threads_per_pool;
 
 // environment entries
@@ -1755,6 +1756,7 @@ JL_DLLEXPORT void jl_atexit_hook(int status);
 JL_DLLEXPORT void jl_postoutput_hook(void);
 JL_DLLEXPORT void JL_NORETURN jl_exit(int status);
 JL_DLLEXPORT const char *jl_pathname_for_handle(void *handle);
+JL_DLLEXPORT jl_gcframe_t **jl_adopt_thread(void);
 
 JL_DLLEXPORT int jl_deserialize_verify_header(ios_t *s);
 JL_DLLEXPORT void jl_preload_sysimg_so(const char *fname);
@@ -1839,6 +1841,7 @@ JL_DLLEXPORT jl_value_t *jl_compress_argnames(jl_array_t *syms);
 JL_DLLEXPORT jl_array_t *jl_uncompress_argnames(jl_value_t *syms);
 JL_DLLEXPORT jl_value_t *jl_uncompress_argname_n(jl_value_t *syms, size_t i);
 
+
 JL_DLLEXPORT int jl_is_operator(char *sym);
 JL_DLLEXPORT int jl_is_unary_operator(char *sym);
 JL_DLLEXPORT int jl_is_unary_and_binary_operator(char *sym);
@@ -2218,9 +2221,11 @@ typedef struct {
 
     // controls the emission of debug-info. mirrors the clang options
     int gnu_pubnames;       // can we emit the gnu pubnames debuginfo
-    int debug_info_kind; // Enum for line-table-only, line-directives-only,
+    int debug_info_kind;    // Enum for line-table-only, line-directives-only,
                             // limited, standalone
 
+    int safepoint_on_entry; // Emit a safepoint on entry to each function
+
     // Cache access. Default: jl_rettype_inferred.
     jl_codeinstance_lookup_t lookup;
 
diff --git a/src/julia_gcext.h b/src/julia_gcext.h
index 631d9c2910330..6523198474771 100644
--- a/src/julia_gcext.h
+++ b/src/julia_gcext.h
@@ -79,7 +79,7 @@ JL_DLLEXPORT void jl_gc_mark_queue_objarray(jl_ptls_t ptls, jl_value_t *parent,
 // will result in the custom sweep function actually being called.
 // This must be done at most once per object and should usually be
 // done right after allocating the object.
-JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t * bj);
+JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *bj);
 
 // The following functions enable support for conservative marking. This
 // functionality allows the user to determine if a machine word can be
diff --git a/src/julia_internal.h b/src/julia_internal.h
index b5fbf9416fcf0..7eb34239e783b 100644
--- a/src/julia_internal.h
+++ b/src/julia_internal.h
@@ -282,6 +282,7 @@ static inline void memmove_refs(void **dstp, void *const *srcp, size_t n) JL_NOT
 // useful constants
 extern jl_methtable_t *jl_type_type_mt JL_GLOBALLY_ROOTED;
 extern jl_methtable_t *jl_nonfunction_mt JL_GLOBALLY_ROOTED;
+extern jl_methtable_t *jl_kwcall_mt JL_GLOBALLY_ROOTED;
 extern JL_DLLEXPORT _Atomic(size_t) jl_world_counter;
 
 typedef void (*tracer_cb)(jl_value_t *tracee);
@@ -289,7 +290,6 @@ extern tracer_cb jl_newmeth_tracer;
 void jl_call_tracer(tracer_cb callback, jl_value_t *tracee);
 void print_func_loc(JL_STREAM *s, jl_method_t *m);
 extern jl_array_t *_jl_debug_method_invalidation JL_GLOBALLY_ROOTED;
-void invalidate_backedges(void (*f)(jl_code_instance_t*), jl_method_instance_t *replaced_mi, size_t max_world, const char *why);
 
 extern JL_DLLEXPORT size_t jl_page_size;
 extern jl_function_t *jl_typeinf_func;
@@ -622,7 +622,6 @@ JL_DLLEXPORT jl_value_t *jl_apply_2va(jl_value_t *f, jl_value_t **args, uint32_t
 void JL_NORETURN jl_method_error(jl_function_t *f, jl_value_t **args, size_t na, size_t world);
 JL_DLLEXPORT jl_value_t *jl_get_exceptionf(jl_datatype_t *exception_type, const char *fmt, ...);
 
-JL_DLLEXPORT jl_value_t *jl_get_keyword_sorter(jl_value_t *f);
 JL_DLLEXPORT void jl_typeassert(jl_value_t *x, jl_value_t *t);
 
 #define JL_CALLABLE(name)                                               \
@@ -715,14 +714,18 @@ jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t nargs, size_t w
 
 jl_value_t *jl_gf_invoke_by_method(jl_method_t *method, jl_value_t *gf, jl_value_t **args, size_t nargs);
 jl_value_t *jl_gf_invoke(jl_value_t *types, jl_value_t *f, jl_value_t **args, size_t nargs);
+JL_DLLEXPORT jl_value_t *jl_gf_invoke_lookup_worlds(jl_value_t *types, jl_value_t *mt, size_t world, size_t *min_world, size_t *max_world);
 JL_DLLEXPORT jl_value_t *jl_matching_methods(jl_tupletype_t *types, jl_value_t *mt, int lim, int include_ambiguous,
                                              size_t world, size_t *min_valid, size_t *max_valid, int *ambig);
 JL_DLLEXPORT jl_value_t *jl_gf_invoke_lookup_worlds(jl_value_t *types, jl_value_t *mt, size_t world, size_t *min_world, size_t *max_world);
 
-JL_DLLEXPORT jl_datatype_t *jl_first_argument_datatype(jl_value_t *argtypes JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
+
+jl_datatype_t *jl_nth_argument_datatype(jl_value_t *argtypes JL_PROPAGATES_ROOT, int n) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_argument_datatype(jl_value_t *argt JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_methtable_t *jl_method_table_for(
     jl_value_t *argtypes JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
+jl_methtable_t *jl_kwmethod_table_for(
+    jl_value_t *argtypes JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_methtable_t *jl_method_get_table(
     jl_method_t *method JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 jl_methtable_t *jl_argument_method_table(jl_value_t *argt JL_PROPAGATES_ROOT);
@@ -958,6 +961,8 @@ JL_DLLEXPORT jl_method_instance_t *jl_specializations_get_linfo(
 jl_method_instance_t *jl_specializations_get_or_insert(jl_method_instance_t *mi_ins);
 JL_DLLEXPORT void jl_method_instance_add_backedge(jl_method_instance_t *callee, jl_value_t *invokesig, jl_method_instance_t *caller);
 JL_DLLEXPORT void jl_method_table_add_backedge(jl_methtable_t *mt, jl_value_t *typ, jl_value_t *caller);
+JL_DLLEXPORT void jl_mi_cache_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMENT,
+                                     jl_code_instance_t *ci JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED);
 
 uint32_t jl_module_next_counter(jl_module_t *m) JL_NOTSAFEPOINT;
 jl_tupletype_t *arg_type_tuple(jl_value_t *arg1, jl_value_t **args, size_t nargs);
diff --git a/src/llvm-alloc-helpers.cpp b/src/llvm-alloc-helpers.cpp
index b2aded025c0d1..7a80985cf0219 100644
--- a/src/llvm-alloc-helpers.cpp
+++ b/src/llvm-alloc-helpers.cpp
@@ -163,7 +163,12 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
     auto check_inst = [&] (Instruction *inst, Use *use) {
         if (isa<LoadInst>(inst)) {
             required.use_info.hasload = true;
-            if (cur.offset == UINT32_MAX || !required.use_info.addMemOp(inst, 0, cur.offset,
+            if (cur.offset == UINT32_MAX) {
+                auto elty = inst->getType();
+                required.use_info.has_unknown_objref |= hasObjref(elty);
+                required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa<PointerType>(elty);
+                required.use_info.hasunknownmem = true;
+            } else if (!required.use_info.addMemOp(inst, 0, cur.offset,
                                                                inst->getType(),
                                                                false, required.DL))
                 required.use_info.hasunknownmem = true;
@@ -232,7 +237,12 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
                 return false;
             }
             auto storev = store->getValueOperand();
-            if (cur.offset == UINT32_MAX || !required.use_info.addMemOp(inst, use->getOperandNo(),
+            if (cur.offset == UINT32_MAX) {
+                auto elty = storev->getType();
+                required.use_info.has_unknown_objref |= hasObjref(elty);
+                required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa<PointerType>(elty);
+                required.use_info.hasunknownmem = true;
+            } else if (!required.use_info.addMemOp(inst, use->getOperandNo(),
                                                                cur.offset, storev->getType(),
                                                                true, required.DL))
                 required.use_info.hasunknownmem = true;
diff --git a/src/llvm-alloc-helpers.h b/src/llvm-alloc-helpers.h
index 7238d71de973f..38a0b2ba181ce 100644
--- a/src/llvm-alloc-helpers.h
+++ b/src/llvm-alloc-helpers.h
@@ -87,6 +87,11 @@ namespace jl_alloc {
         // The object is used in an error function
         bool haserror:1;
 
+        // The alloc has a Julia object reference not in an explicit field.
+        bool has_unknown_objref:1;
+        // The alloc has an aggregate Julia object reference not in an explicit field.
+        bool has_unknown_objrefaggr:1;
+
         void reset()
         {
             escaped = false;
@@ -99,6 +104,8 @@ namespace jl_alloc {
             hasunknownmem = false;
             returned = false;
             haserror = false;
+            has_unknown_objref = false;
+            has_unknown_objrefaggr = false;
             uses.clear();
             preserves.clear();
             memops.clear();
diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp
index 9cc125820d2f3..c2ebdcf662466 100644
--- a/src/llvm-alloc-opt.cpp
+++ b/src/llvm-alloc-opt.cpp
@@ -231,8 +231,8 @@ void Optimizer::optimizeAll()
             removeAlloc(orig);
             continue;
         }
-        bool has_ref = false;
-        bool has_refaggr = false;
+        bool has_ref = use_info.has_unknown_objref;
+        bool has_refaggr = use_info.has_unknown_objrefaggr;
         for (auto memop: use_info.memops) {
             auto &field = memop.second;
             if (field.hasobjref) {
@@ -576,7 +576,9 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
         // treat this as a non-mem2reg'd alloca
         // The ccall root and GC preserve handling below makes sure that
         // the alloca isn't optimized out.
-        buff = prolog_builder.CreateAlloca(pass.T_prjlvalue);
+        const DataLayout &DL = F.getParent()->getDataLayout();
+        auto asize = ConstantInt::get(Type::getInt64Ty(prolog_builder.getContext()), sz / DL.getTypeAllocSize(pass.T_prjlvalue));
+        buff = prolog_builder.CreateAlloca(pass.T_prjlvalue, asize);
         buff->setAlignment(Align(align));
         ptr = cast<Instruction>(prolog_builder.CreateBitCast(buff, Type::getInt8PtrTy(prolog_builder.getContext())));
     }
@@ -1182,7 +1184,9 @@ bool AllocOpt::runOnFunction(Function &F, function_ref<DominatorTree&()> GetDT)
     optimizer.initialize();
     optimizer.optimizeAll();
     bool modified = optimizer.finalize();
+#ifdef JL_VERIFY_PASSES
     assert(!verifyFunction(F, &errs()));
+#endif
     return modified;
 }
 
diff --git a/src/llvm-cpufeatures.cpp b/src/llvm-cpufeatures.cpp
index 6211d284bdd24..45b393151581c 100644
--- a/src/llvm-cpufeatures.cpp
+++ b/src/llvm-cpufeatures.cpp
@@ -110,7 +110,9 @@ bool lowerCPUFeatures(Module &M)
         for (auto I: Materialized) {
             I->eraseFromParent();
         }
+#ifdef JL_VERIFY_PASSES
         assert(!verifyModule(M, &errs()));
+#endif
         return true;
     } else {
         return false;
diff --git a/src/llvm-demote-float16.cpp b/src/llvm-demote-float16.cpp
index dc0179df6d42a..51535e7cb1f9f 100644
--- a/src/llvm-demote-float16.cpp
+++ b/src/llvm-demote-float16.cpp
@@ -153,7 +153,9 @@ static bool demoteFloat16(Function &F)
     if (erase.size() > 0) {
         for (auto V : erase)
             V->eraseFromParent();
+#ifdef JL_VERIFY_PASSES
         assert(!verifyFunction(F, &errs()));
+#endif
         return true;
     }
     else
diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp
index e7e9fe2f4f26a..2eb89a15692d9 100644
--- a/src/llvm-final-gc-lowering.cpp
+++ b/src/llvm-final-gc-lowering.cpp
@@ -9,6 +9,7 @@
 #include <llvm/IR/IntrinsicInst.h>
 #include <llvm/IR/Module.h>
 #include <llvm/IR/IRBuilder.h>
+#include <llvm/IR/Verifier.h>
 #include <llvm/Pass.h>
 #include <llvm/Support/Debug.h>
 #include <llvm/Transforms/Utils/ModuleUtils.h>
@@ -304,7 +305,7 @@ bool FinalLowerGC::runOnFunction(Function &F)
     LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Processing function " << F.getName() << "\n");
     // Check availability of functions again since they might have been deleted.
     initFunctions(*F.getParent());
-    if (!pgcstack_getter)
+    if (!pgcstack_getter && !adoptthread_func)
         return false;
 
     // Look for a call to 'julia.get_pgcstack'.
@@ -390,7 +391,11 @@ bool FinalLowerGCLegacy::doInitialization(Module &M) {
 }
 
 bool FinalLowerGCLegacy::doFinalization(Module &M) {
-    return finalLowerGC.doFinalization(M);
+    auto ret = finalLowerGC.doFinalization(M);
+#ifdef JL_VERIFY_PASSES
+    assert(!verifyModule(M, &errs()));
+#endif
+    return ret;
 }
 
 
@@ -405,6 +410,9 @@ PreservedAnalyses FinalLowerGCPass::run(Module &M, ModuleAnalysisManager &AM)
         modified |= finalLowerGC.runOnFunction(F);
     }
     modified |= finalLowerGC.doFinalization(M);
+#ifdef JL_VERIFY_PASSES
+    assert(!verifyModule(M, &errs()));
+#endif
     if (modified) {
         return PreservedAnalyses::allInSet<CFGAnalyses>();
     }
diff --git a/src/llvm-julia-licm.cpp b/src/llvm-julia-licm.cpp
index ad941adf2155d..d641d61ca126b 100644
--- a/src/llvm-julia-licm.cpp
+++ b/src/llvm-julia-licm.cpp
@@ -284,7 +284,9 @@ struct JuliaLICM : public JuliaPassContext {
         if (changed && SE) {
             SE->forgetLoopDispositions(L);
         }
+#ifdef JL_VERIFY_PASSES
         assert(!verifyFunction(*L->getHeader()->getParent(), &errs()));
+#endif
         return changed;
     }
 };
diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp
index 8847a3e34be51..eaba9c7b10d98 100644
--- a/src/llvm-late-gc-lowering.cpp
+++ b/src/llvm-late-gc-lowering.cpp
@@ -426,7 +426,7 @@ unsigned getCompositeNumElements(Type *T) {
 // Walk through a Type, and record the element path to every tracked value inside
 void TrackCompositeType(Type *T, std::vector<unsigned> &Idxs, std::vector<std::vector<unsigned>> &Numberings) {
     if (isa<PointerType>(T)) {
-        if (T->getPointerAddressSpace() == AddressSpace::Tracked)
+        if (isSpecialPtr(T))
             Numberings.push_back(Idxs);
     }
     else if (isa<StructType>(T) || isa<ArrayType>(T) || isa<VectorType>(T)) {
@@ -2706,12 +2706,12 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State
         // Insert GC frame stores
         PlaceGCFrameStores(S, AllocaSlot - 2, Colors, gcframe);
         // Insert GCFrame pops
-        for(Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
-            if (isa<ReturnInst>(I->getTerminator())) {
+        for (auto &BB : *F) {
+            if (isa<ReturnInst>(BB.getTerminator())) {
                 auto popGcframe = CallInst::Create(
                     getOrDeclare(jl_intrinsics::popGCFrame),
                     {gcframe});
-                popGcframe->insertBefore(I->getTerminator());
+                popGcframe->insertBefore(BB.getTerminator());
             }
         }
     }
@@ -2720,7 +2720,7 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State
 bool LateLowerGCFrame::runOnFunction(Function &F, bool *CFGModified) {
     initAll(*F.getParent());
     LLVM_DEBUG(dbgs() << "GC ROOT PLACEMENT: Processing function " << F.getName() << "\n");
-    if (!pgcstack_getter)
+    if (!pgcstack_getter && !adoptthread_func)
         return CleanupIR(F, nullptr, CFGModified);
 
     pgcstack = getPGCstack(F);
@@ -2741,7 +2741,11 @@ bool LateLowerGCFrameLegacy::runOnFunction(Function &F) {
         return getAnalysis<DominatorTreeWrapperPass>().getDomTree();
     };
     auto lateLowerGCFrame = LateLowerGCFrame(GetDT);
-    return lateLowerGCFrame.runOnFunction(F);
+    bool modified = lateLowerGCFrame.runOnFunction(F);
+#ifdef JL_VERIFY_PASSES
+    assert(!verifyFunction(F, &errs()));
+#endif
+    return modified;
 }
 
 PreservedAnalyses LateLowerGC::run(Function &F, FunctionAnalysisManager &AM)
@@ -2751,7 +2755,11 @@ PreservedAnalyses LateLowerGC::run(Function &F, FunctionAnalysisManager &AM)
     };
     auto lateLowerGCFrame = LateLowerGCFrame(GetDT);
     bool CFGModified = false;
-    if (lateLowerGCFrame.runOnFunction(F, &CFGModified)) {
+    bool modified = lateLowerGCFrame.runOnFunction(F, &CFGModified);
+#ifdef JL_VERIFY_PASSES
+    assert(!verifyFunction(F, &errs()));
+#endif
+    if (modified) {
         if (CFGModified) {
             return PreservedAnalyses::none();
         } else {
diff --git a/src/llvm-lower-handlers.cpp b/src/llvm-lower-handlers.cpp
index 881d2252eacbf..c8a77e2edc22f 100644
--- a/src/llvm-lower-handlers.cpp
+++ b/src/llvm-lower-handlers.cpp
@@ -17,6 +17,7 @@
 #include <llvm/IR/Module.h>
 #include <llvm/IR/Value.h>
 #include <llvm/IR/LegacyPassManager.h>
+#include <llvm/IR/Verifier.h>
 #include <llvm/Pass.h>
 #include <llvm/Support/Debug.h>
 #include <llvm/Transforms/Utils/BasicBlockUtils.h>
@@ -234,7 +235,11 @@ static bool lowerExcHandlers(Function &F) {
 
 PreservedAnalyses LowerExcHandlers::run(Function &F, FunctionAnalysisManager &AM)
 {
-    if (lowerExcHandlers(F)) {
+    bool modified = lowerExcHandlers(F);
+#ifdef JL_VERIFY_PASSES
+    assert(!verifyFunction(F, &errs()));
+#endif
+    if (modified) {
         return PreservedAnalyses::allInSet<CFGAnalyses>();
     }
     return PreservedAnalyses::all();
@@ -246,7 +251,11 @@ struct LowerExcHandlersLegacy : public FunctionPass {
     LowerExcHandlersLegacy() : FunctionPass(ID)
     {}
     bool runOnFunction(Function &F) {
-        return lowerExcHandlers(F);
+        bool modified = lowerExcHandlers(F);
+#ifdef JL_VERIFY_PASSES
+        assert(!verifyFunction(F, &errs()));
+#endif
+        return modified;
     }
 };
 
diff --git a/src/llvm-muladd.cpp b/src/llvm-muladd.cpp
index 148d1ca158c61..a554c32b5e657 100644
--- a/src/llvm-muladd.cpp
+++ b/src/llvm-muladd.cpp
@@ -84,7 +84,9 @@ static bool combineMulAdd(Function &F)
             }
         }
     }
+#ifdef JL_VERIFY_PASSES
     assert(!verifyFunction(F, &errs()));
+#endif
     return modified;
 }
 
diff --git a/src/llvm-multiversioning.cpp b/src/llvm-multiversioning.cpp
index e1110c556369d..e4581cc713f25 100644
--- a/src/llvm-multiversioning.cpp
+++ b/src/llvm-multiversioning.cpp
@@ -1134,8 +1134,9 @@ static bool runMultiVersioning(Module &M, function_ref<LoopInfo&(Function&)> Get
     // At this point, we should have fixed up all the uses of the cloned functions
     // and collected all the shared/target-specific relocations.
     clone.emit_metadata();
-
+#ifdef JL_VERIFY_PASSES
     assert(!verifyModule(M, &errs()));
+#endif
 
     return true;
 }
diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp
index f0c0c6ee77b44..3b55339984516 100644
--- a/src/llvm-pass-helpers.cpp
+++ b/src/llvm-pass-helpers.cpp
@@ -23,7 +23,7 @@ JuliaPassContext::JuliaPassContext()
 
         tbaa_gcframe(nullptr), tbaa_tag(nullptr),
 
-        pgcstack_getter(nullptr), gc_flush_func(nullptr),
+        pgcstack_getter(nullptr), adoptthread_func(nullptr), gc_flush_func(nullptr),
         gc_preserve_begin_func(nullptr), gc_preserve_end_func(nullptr),
         pointer_from_objref_func(nullptr), alloc_obj_func(nullptr),
         typeof_func(nullptr), write_barrier_func(nullptr),
@@ -44,6 +44,7 @@ void JuliaPassContext::initFunctions(Module &M)
     tbaa_tag = tbaa_make_child_with_context(llvmctx, "jtbaa_tag", tbaa_data_scalar).first;
 
     pgcstack_getter = M.getFunction("julia.get_pgcstack");
+    adoptthread_func = M.getFunction("julia.get_pgcstack_or_new");
     gc_flush_func = M.getFunction("julia.gcroot_flush");
     gc_preserve_begin_func = M.getFunction("llvm.julia.gc_preserve_begin");
     gc_preserve_end_func = M.getFunction("llvm.julia.gc_preserve_end");
@@ -70,10 +71,13 @@ void JuliaPassContext::initAll(Module &M)
 
 llvm::CallInst *JuliaPassContext::getPGCstack(llvm::Function &F) const
 {
-    for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end();
-         pgcstack_getter && I != E; ++I) {
-        if (CallInst *callInst = dyn_cast<CallInst>(&*I)) {
-            if (callInst->getCalledOperand() == pgcstack_getter) {
+    if (!pgcstack_getter && !adoptthread_func)
+        return nullptr;
+    for (auto &I : F.getEntryBlock()) {
+        if (CallInst *callInst = dyn_cast<CallInst>(&I)) {
+            Value *callee = callInst->getCalledOperand();
+            if ((pgcstack_getter && callee == pgcstack_getter) ||
+                (adoptthread_func && callee == adoptthread_func)) {
                 return callInst;
             }
         }
diff --git a/src/llvm-pass-helpers.h b/src/llvm-pass-helpers.h
index 64d5dc00e2c5b..68f6efe42be6d 100644
--- a/src/llvm-pass-helpers.h
+++ b/src/llvm-pass-helpers.h
@@ -50,6 +50,7 @@ struct JuliaPassContext {
 
     // Intrinsics.
     llvm::Function *pgcstack_getter;
+    llvm::Function *adoptthread_func;
     llvm::Function *gc_flush_func;
     llvm::Function *gc_preserve_begin_func;
     llvm::Function *gc_preserve_end_func;
diff --git a/src/llvm-propagate-addrspaces.cpp b/src/llvm-propagate-addrspaces.cpp
index e2d390a5e4395..53b3fce090c23 100644
--- a/src/llvm-propagate-addrspaces.cpp
+++ b/src/llvm-propagate-addrspaces.cpp
@@ -302,7 +302,11 @@ struct PropagateJuliaAddrspacesLegacy : FunctionPass {
 
     PropagateJuliaAddrspacesLegacy() : FunctionPass(ID) {}
     bool runOnFunction(Function &F) override {
-        return propagateJuliaAddrspaces(F);
+        bool modified = propagateJuliaAddrspaces(F);
+#ifdef JL_VERIFY_PASSES
+        assert(!verifyFunction(F, &errs()));
+#endif
+        return modified;
     }
 };
 
@@ -314,7 +318,12 @@ Pass *createPropagateJuliaAddrspaces() {
 }
 
 PreservedAnalyses PropagateJuliaAddrspacesPass::run(Function &F, FunctionAnalysisManager &AM) {
-    if (propagateJuliaAddrspaces(F)) {
+    bool modified = propagateJuliaAddrspaces(F);
+
+#ifdef JL_VERIFY_PASSES
+    assert(!verifyFunction(F, &errs()));
+#endif
+    if (modified) {
         return PreservedAnalyses::allInSet<CFGAnalyses>();
     } else {
         return PreservedAnalyses::all();
diff --git a/src/llvm-ptls.cpp b/src/llvm-ptls.cpp
index e948e1c1a10bc..c8d7ffbf0240b 100644
--- a/src/llvm-ptls.cpp
+++ b/src/llvm-ptls.cpp
@@ -16,6 +16,7 @@
 #include <llvm/IR/Constants.h>
 #include <llvm/IR/LLVMContext.h>
 #include <llvm/IR/MDBuilder.h>
+#include <llvm/IR/Verifier.h>
 
 #include <llvm/IR/InlineAsm.h>
 #include <llvm/Transforms/Utils/BasicBlockUtils.h>
@@ -35,19 +36,19 @@ typedef Instruction TerminatorInst;
 namespace {
 
 struct LowerPTLS {
-    LowerPTLS(bool imaging_mode=false)
-        : imaging_mode(imaging_mode)
+    LowerPTLS(Module &M, bool imaging_mode=false)
+        : imaging_mode(imaging_mode), M(&M)
     {}
 
-    bool runOnModule(Module &M, bool *CFGModified);
+    bool run(bool *CFGModified);
 private:
     const bool imaging_mode;
     Module *M;
-    Function *pgcstack_getter;
-    MDNode *tbaa_const;
-    FunctionType *FT_pgcstack_getter;
-    PointerType *T_pgcstack_getter;
-    PointerType *T_pppjlvalue;
+    MDNode *tbaa_const{nullptr};
+    MDNode *tbaa_gcframe{nullptr};
+    FunctionType *FT_pgcstack_getter{nullptr};
+    PointerType *T_pgcstack_getter{nullptr};
+    PointerType *T_pppjlvalue{nullptr};
     GlobalVariable *pgcstack_func_slot{nullptr};
     GlobalVariable *pgcstack_key_slot{nullptr};
     GlobalVariable *pgcstack_offset{nullptr};
@@ -55,7 +56,7 @@ struct LowerPTLS {
     Instruction *emit_pgcstack_tp(Value *offset, Instruction *insertBefore) const;
     template<typename T> T *add_comdat(T *G) const;
     GlobalVariable *create_aliased_global(Type *T, StringRef name) const;
-    void fix_pgcstack_use(CallInst *pgcstack, bool *CFGModified);
+    void fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter, bool or_new, bool *CFGModified);
 };
 
 void LowerPTLS::set_pgcstack_attrs(CallInst *pgcstack) const
@@ -159,19 +160,77 @@ inline T *LowerPTLS::add_comdat(T *G) const
     return G;
 }
 
-void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, bool *CFGModified)
+void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter, bool or_new, bool *CFGModified)
 {
     if (pgcstack->use_empty()) {
         pgcstack->eraseFromParent();
         return;
     }
+    if (or_new) {
+        // pgcstack();
+        // if (pgcstack != nullptr)
+        //     last_gc_state = emit_gc_unsafe_enter(ctx);
+        //     phi = pgcstack;        // fast
+        // else
+        //     last_gc_state = gc_safe;
+        //     phi = adopt();         // slow
+        // use phi;
+        // if (!retboxed)
+        //     foreach(retinst)
+        //         emit_gc_unsafe_leave(ctx, last_gc_state);
+        auto phi = PHINode::Create(pgcstack->getType(), 2, "");
+        phi->insertAfter(pgcstack);
+        pgcstack->replaceAllUsesWith(phi);
+        MDBuilder MDB(pgcstack->getContext());
+        SmallVector<uint32_t, 2> Weights{9, 1};
+        TerminatorInst *fastTerm;
+        TerminatorInst *slowTerm;
+        auto cmp = new ICmpInst(phi, CmpInst::ICMP_NE, pgcstack, Constant::getNullValue(pgcstack->getType()));
+        SplitBlockAndInsertIfThenElse(cmp, phi, &fastTerm, &slowTerm,
+                                      MDB.createBranchWeights(Weights));
+        if (CFGModified)
+            *CFGModified = true;
+        // emit slow branch code
+        CallInst *adopt = cast<CallInst>(pgcstack->clone());
+        Function *adoptFunc = M->getFunction(XSTR(jl_adopt_thread));
+        if (adoptFunc == NULL) {
+            adoptFunc = Function::Create(pgcstack_getter->getFunctionType(),
+                pgcstack_getter->getLinkage(), pgcstack_getter->getAddressSpace(),
+                XSTR(jl_adopt_thread), M);
+            adoptFunc->copyAttributesFrom(pgcstack_getter);
+            adoptFunc->copyMetadata(pgcstack_getter, 0);
+        }
+        adopt->setCalledFunction(adoptFunc);
+        adopt->insertBefore(slowTerm);
+        phi->addIncoming(adopt, slowTerm->getParent());
+        // emit fast branch code
+        IRBuilder<> builder(fastTerm->getParent());
+        fastTerm->removeFromParent();
+        MDNode *tbaa = tbaa_gcframe;
+        Value *prior = emit_gc_unsafe_enter(builder, get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, pgcstack), tbaa));
+        builder.Insert(fastTerm);
+        phi->addIncoming(pgcstack, fastTerm->getParent());
+        // emit pre-return cleanup
+        if (CountTrackedPointers(pgcstack->getParent()->getParent()->getReturnType()).count == 0) {
+            auto last_gc_state = PHINode::Create(Type::getInt8Ty(pgcstack->getContext()), 2, "", phi);
+            // if we called jl_adopt_thread, we must end this cfunction back in the safe-state
+            last_gc_state->addIncoming(ConstantInt::get(Type::getInt8Ty(M->getContext()), JL_GC_STATE_SAFE), slowTerm->getParent());
+            last_gc_state->addIncoming(prior, fastTerm->getParent());
+            for (auto &BB : *pgcstack->getParent()->getParent()) {
+                if (isa<ReturnInst>(BB.getTerminator())) {
+                    IRBuilder<> builder(BB.getTerminator());
+                    emit_gc_unsafe_leave(builder, get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, phi), tbaa), last_gc_state);
+                }
+            }
+        }
+    }
 
     if (imaging_mode) {
         if (jl_tls_elf_support) {
             // if (offset != 0)
-            //     pgcstack = tp + offset;
+            //     pgcstack = tp + offset; // fast
             // else
-            //     pgcstack = getter();
+            //     pgcstack = getter();    // slow
             auto offset = new LoadInst(getSizeTy(pgcstack->getContext()), pgcstack_offset, "", false, pgcstack);
             offset->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
             offset->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(pgcstack->getContext(), None));
@@ -184,7 +243,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, bool *CFGModified)
             SplitBlockAndInsertIfThenElse(cmp, pgcstack, &fastTerm, &slowTerm,
                                           MDB.createBranchWeights(Weights));
             if (CFGModified)
-            *CFGModified = true;
+                *CFGModified = true;
 
             auto fastTLS = emit_pgcstack_tp(offset, fastTerm);
             auto phi = PHINode::Create(T_pppjlvalue, 2, "", pgcstack);
@@ -248,37 +307,44 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, bool *CFGModified)
     }
 }
 
-bool LowerPTLS::runOnModule(Module &_M, bool *CFGModified)
+bool LowerPTLS::run(bool *CFGModified)
 {
-    M = &_M;
-    pgcstack_getter = M->getFunction("julia.get_pgcstack");
-    if (!pgcstack_getter)
-        return false;
+    bool need_init = true;
+    auto runOnGetter = [&](bool or_new) {
+        Function *pgcstack_getter = M->getFunction(or_new ? "julia.get_pgcstack_or_new" : "julia.get_pgcstack");
+        if (!pgcstack_getter)
+            return false;
 
-    tbaa_const = tbaa_make_child_with_context(_M.getContext(), "jtbaa_const", nullptr, true).first;
+        if (need_init) {
+            tbaa_const = tbaa_make_child_with_context(M->getContext(), "jtbaa_const", nullptr, true).first;
+            tbaa_gcframe = tbaa_make_child_with_context(M->getContext(), "jtbaa_gcframe").first;
 
-    FT_pgcstack_getter = pgcstack_getter->getFunctionType();
+            FT_pgcstack_getter = pgcstack_getter->getFunctionType();
 #if defined(_OS_DARWIN_)
-    assert(sizeof(jl_pgcstack_key_t) == sizeof(uintptr_t));
-    FT_pgcstack_getter = FunctionType::get(FT_pgcstack_getter->getReturnType(), {getSizeTy(_M.getContext())}, false);
+            assert(sizeof(jl_pgcstack_key_t) == sizeof(uintptr_t));
+            FT_pgcstack_getter = FunctionType::get(FT_pgcstack_getter->getReturnType(), {getSizeTy(M->getContext())}, false);
 #endif
-    T_pgcstack_getter = FT_pgcstack_getter->getPointerTo();
-    T_pppjlvalue = cast<PointerType>(FT_pgcstack_getter->getReturnType());
-    if (imaging_mode) {
-        pgcstack_func_slot = create_aliased_global(T_pgcstack_getter, "jl_pgcstack_func_slot");
-        pgcstack_key_slot = create_aliased_global(getSizeTy(_M.getContext()), "jl_pgcstack_key_slot"); // >= sizeof(jl_pgcstack_key_t)
-        pgcstack_offset = create_aliased_global(getSizeTy(_M.getContext()), "jl_tls_offset");
-    }
+            T_pgcstack_getter = FT_pgcstack_getter->getPointerTo();
+            T_pppjlvalue = cast<PointerType>(FT_pgcstack_getter->getReturnType());
+            if (imaging_mode) {
+                pgcstack_func_slot = create_aliased_global(T_pgcstack_getter, "jl_pgcstack_func_slot");
+                pgcstack_key_slot = create_aliased_global(getSizeTy(M->getContext()), "jl_pgcstack_key_slot"); // >= sizeof(jl_pgcstack_key_t)
+                pgcstack_offset = create_aliased_global(getSizeTy(M->getContext()), "jl_tls_offset");
+            }
+            need_init = false;
+        }
 
-    for (auto it = pgcstack_getter->user_begin(); it != pgcstack_getter->user_end();) {
-        auto call = cast<CallInst>(*it);
-        ++it;
-        assert(call->getCalledOperand() == pgcstack_getter);
-        fix_pgcstack_use(call, CFGModified);
-    }
-    assert(pgcstack_getter->use_empty());
-    pgcstack_getter->eraseFromParent();
-    return true;
+        for (auto it = pgcstack_getter->user_begin(); it != pgcstack_getter->user_end();) {
+            auto call = cast<CallInst>(*it);
+            ++it;
+            assert(call->getCalledOperand() == pgcstack_getter);
+            fix_pgcstack_use(call, pgcstack_getter, or_new, CFGModified);
+        }
+        assert(pgcstack_getter->use_empty());
+        pgcstack_getter->eraseFromParent();
+        return true;
+    };
+    return runOnGetter(false) + runOnGetter(true);
 }
 
 struct LowerPTLSLegacy: public ModulePass {
@@ -290,8 +356,12 @@ struct LowerPTLSLegacy: public ModulePass {
 
     bool imaging_mode;
     bool runOnModule(Module &M) override {
-        LowerPTLS lower(imaging_mode);
-        return lower.runOnModule(M, nullptr);
+        LowerPTLS lower(M, imaging_mode);
+        bool modified = lower.run(nullptr);
+#ifdef JL_VERIFY_PASSES
+        assert(!verifyModule(M, &errs()));
+#endif
+        return modified;
     }
 };
 
@@ -304,9 +374,13 @@ static RegisterPass<LowerPTLSLegacy> X("LowerPTLS", "LowerPTLS Pass",
 } // anonymous namespace
 
 PreservedAnalyses LowerPTLSPass::run(Module &M, ModuleAnalysisManager &AM) {
-    LowerPTLS lower(imaging_mode);
+    LowerPTLS lower(M, imaging_mode);
     bool CFGModified = false;
-    if (lower.runOnModule(M, &CFGModified)) {
+    bool modified = lower.run(&CFGModified);
+#ifdef JL_VERIFY_PASSES
+    assert(!verifyModule(M, &errs()));
+#endif
+    if (modified) {
         if (CFGModified) {
             return PreservedAnalyses::none();
         } else {
diff --git a/src/llvm-remove-addrspaces.cpp b/src/llvm-remove-addrspaces.cpp
index 814e31ec2252f..1cc09018958af 100644
--- a/src/llvm-remove-addrspaces.cpp
+++ b/src/llvm-remove-addrspaces.cpp
@@ -8,6 +8,7 @@
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/InstIterator.h>
 #include <llvm/IR/LegacyPassManager.h>
+#include <llvm/IR/Verifier.h>
 #include <llvm/Support/Debug.h>
 #include <llvm/Transforms/Utils/Cloning.h>
 #include <llvm/Transforms/Utils/ValueMapper.h>
@@ -464,7 +465,11 @@ struct RemoveAddrspacesPassLegacy : public ModulePass {
 
 public:
     bool runOnModule(Module &M) override {
-        return removeAddrspaces(M, ASRemapper);
+        bool modified = removeAddrspaces(M, ASRemapper);
+#ifdef JL_VERIFY_PASSES
+        assert(!verifyModule(M, &errs()));
+#endif
+        return modified;
     }
 };
 
@@ -484,7 +489,11 @@ Pass *createRemoveAddrspacesPass(
 RemoveAddrspacesPass::RemoveAddrspacesPass() : RemoveAddrspacesPass(removeAllAddrspaces) {}
 
 PreservedAnalyses RemoveAddrspacesPass::run(Module &M, ModuleAnalysisManager &AM) {
-    if (removeAddrspaces(M, ASRemapper)) {
+    bool modified = removeAddrspaces(M, ASRemapper);
+#ifdef JL_VERIFY_PASSES
+    assert(!verifyModule(M, &errs()));
+#endif
+    if (modified) {
         return PreservedAnalyses::allInSet<CFGAnalyses>();
     } else {
         return PreservedAnalyses::all();
diff --git a/src/llvm-simdloop.cpp b/src/llvm-simdloop.cpp
index 1848b429869dd..ae3065bc70b5f 100644
--- a/src/llvm-simdloop.cpp
+++ b/src/llvm-simdloop.cpp
@@ -232,8 +232,9 @@ static bool markLoopInfo(Module &M, Function *marker, function_ref<LoopInfo &(Fu
     for (Instruction *I : ToDelete)
         I->deleteValue();
     marker->eraseFromParent();
-
+#ifdef JL_VERIFY_PASSES
     assert(!verifyModule(M, &errs()));
+#endif
     return Changed;
 }
 
diff --git a/src/method.c b/src/method.c
index f0e2598750801..ec49fdf32a193 100644
--- a/src/method.c
+++ b/src/method.c
@@ -17,6 +17,7 @@ extern "C" {
 
 extern jl_value_t *jl_builtin_getfield;
 extern jl_value_t *jl_builtin_tuple;
+jl_methtable_t *jl_kwcall_mt;
 
 jl_method_t *jl_make_opaque_closure_method(jl_module_t *module, jl_value_t *name,
     int nargs, jl_value_t *functionloc, jl_code_info_t *ci, int isva);
@@ -705,6 +706,24 @@ static void jl_method_set_source(jl_method_t *m, jl_code_info_t *src)
             else if (nargs >= 1 && jl_exprarg(st, 0) == (jl_value_t*)jl_specialize_sym) {
                 if (nargs == 1) // bare `@specialize` is special: it causes specialization on all args
                     m->nospecialize = 0;
+                for (j = 1; j < nargs; j++) {
+                    jl_value_t *aj = jl_exprarg(st, j);
+                    if (!jl_is_slot(aj) && !jl_is_argument(aj))
+                        continue;
+                    int sn = (int)jl_slot_number(aj) - 2;
+                    if (sn < 0) // @specialize on self is valid but currently ignored
+                        continue;
+                    if (sn > (m->nargs - 2)) {
+                        jl_error("@specialize annotation applied to a non-argument");
+                    }
+                    if (sn >= sizeof(m->nospecialize) * 8) {
+                        jl_printf(JL_STDERR,
+                                  "WARNING: @specialize annotation only supported on the first %d arguments.\n",
+                                  (int)(sizeof(m->nospecialize) * 8));
+                        continue;
+                    }
+                    m->nospecialize &= ~(1 << sn);
+                }
                 st = jl_nothing;
             }
             else if (nargs == 2 && jl_exprarg(st, 0) == (jl_value_t*)jl_generated_sym) {
@@ -884,30 +903,30 @@ JL_DLLEXPORT jl_value_t *jl_generic_function_def(jl_sym_t *name,
     return gf;
 }
 
-static jl_methtable_t *first_methtable(jl_value_t *a JL_PROPAGATES_ROOT, int got_tuple1) JL_NOTSAFEPOINT
+static jl_methtable_t *nth_methtable(jl_value_t *a JL_PROPAGATES_ROOT, int n) JL_NOTSAFEPOINT
 {
     if (jl_is_datatype(a)) {
-        if (got_tuple1) {
+        if (n == 0) {
             jl_methtable_t *mt = ((jl_datatype_t*)a)->name->mt;
             if (mt != NULL)
                 return mt;
         }
         if (jl_is_tuple_type(a)) {
-            if (jl_nparams(a) >= 1)
-                return first_methtable(jl_tparam0(a), 1);
+            if (jl_nparams(a) >= n)
+                return nth_methtable(jl_tparam(a, n - 1), 0);
         }
     }
     else if (jl_is_typevar(a)) {
-        return first_methtable(((jl_tvar_t*)a)->ub, got_tuple1);
+        return nth_methtable(((jl_tvar_t*)a)->ub, n);
     }
     else if (jl_is_unionall(a)) {
-        return first_methtable(((jl_unionall_t*)a)->body, got_tuple1);
+        return nth_methtable(((jl_unionall_t*)a)->body, n);
     }
     else if (jl_is_uniontype(a)) {
         jl_uniontype_t *u = (jl_uniontype_t*)a;
-        jl_methtable_t *m1 = first_methtable(u->a, got_tuple1);
+        jl_methtable_t *m1 = nth_methtable(u->a, n);
         if ((jl_value_t*)m1 != jl_nothing) {
-            jl_methtable_t *m2 = first_methtable(u->b, got_tuple1);
+            jl_methtable_t *m2 = nth_methtable(u->b, n);
             if (m1 == m2)
                 return m1;
         }
@@ -918,7 +937,15 @@ static jl_methtable_t *first_methtable(jl_value_t *a JL_PROPAGATES_ROOT, int got
 // get the MethodTable for dispatch, or `nothing` if cannot be determined
 JL_DLLEXPORT jl_methtable_t *jl_method_table_for(jl_value_t *argtypes JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
 {
-    return first_methtable(argtypes, 0);
+    return nth_methtable(argtypes, 1);
+}
+
+jl_methtable_t *jl_kwmethod_table_for(jl_value_t *argtypes JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
+{
+    jl_methtable_t *kwmt = nth_methtable(argtypes, 3);
+    if ((jl_value_t*)kwmt == jl_nothing)
+        return NULL;
+    return kwmt;
 }
 
 JL_DLLEXPORT jl_methtable_t *jl_method_get_table(jl_method_t *method JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
@@ -929,7 +956,7 @@ JL_DLLEXPORT jl_methtable_t *jl_method_get_table(jl_method_t *method JL_PROPAGAT
 // get the MethodTable implied by a single given type, or `nothing`
 JL_DLLEXPORT jl_methtable_t *jl_argument_method_table(jl_value_t *argt JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
 {
-    return first_methtable(argt, 1);
+    return nth_methtable(argt, 0);
 }
 
 jl_array_t *jl_all_methods JL_GLOBALLY_ROOTED;
@@ -973,11 +1000,13 @@ JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
     int32_t line = jl_linenode_line(functionloc);
 
     // TODO: derive our debug name from the syntax instead of the type
-    name = mt->name;
-    if (mt == jl_type_type_mt || mt == jl_nonfunction_mt || external_mt) {
+    jl_methtable_t *kwmt = mt == jl_kwcall_mt ? jl_kwmethod_table_for(argtype) : mt;
+    // if we have a kwcall, try to derive the name from the callee argument method table
+    name = (kwmt ? kwmt : mt)->name;
+    if (kwmt == jl_type_type_mt || kwmt == jl_nonfunction_mt || external_mt) {
         // our value for `name` is bad, try to guess what the syntax might have had,
         // like `jl_static_show_func_sig` might have come up with
-        jl_datatype_t *dt = jl_first_argument_datatype(argtype);
+        jl_datatype_t *dt = jl_nth_argument_datatype(argtype, mt == jl_kwcall_mt ? 3 : 1);
         if (dt != NULL) {
             name = dt->name->name;
             if (jl_is_type_type((jl_value_t*)dt)) {
diff --git a/src/module.c b/src/module.c
index 805f4ca1affac..1e1bf4d52436e 100644
--- a/src/module.c
+++ b/src/module.c
@@ -394,12 +394,11 @@ JL_DLLEXPORT jl_value_t *jl_binding_owner(jl_module_t *m, jl_sym_t *var)
 JL_DLLEXPORT jl_value_t *jl_binding_type(jl_module_t *m, jl_sym_t *var)
 {
     JL_LOCK(&m->lock);
-    jl_binding_t *b = (jl_binding_t*)ptrhash_get(&m->bindings, var);
-    if (b == HT_NOTFOUND || b->owner == NULL)
-        b = using_resolve_binding(m, var, NULL, 0);
+    jl_binding_t *b = _jl_get_module_binding(m, var);
     JL_UNLOCK(&m->lock);
-    if (b == NULL)
+    if (b == HT_NOTFOUND || b->owner == NULL)
         return jl_nothing;
+    b = jl_get_binding(m, var);
     jl_value_t *ty = jl_atomic_load_relaxed(&b->ty);
     return ty ? ty : jl_nothing;
 }
diff --git a/src/opaque_closure.c b/src/opaque_closure.c
index d34989181b7ad..7a01d254ce71a 100644
--- a/src/opaque_closure.c
+++ b/src/opaque_closure.c
@@ -110,9 +110,6 @@ JL_DLLEXPORT jl_code_instance_t* jl_new_codeinst(
         uint32_t ipo_effects, uint32_t effects, jl_value_t *argescapes,
         uint8_t relocatability);
 
-JL_DLLEXPORT void jl_mi_cache_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMENT,
-                                     jl_code_instance_t *ci JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED);
-
 JL_DLLEXPORT jl_opaque_closure_t *jl_new_opaque_closure_from_code_info(jl_tupletype_t *argt, jl_value_t *rt_lb, jl_value_t *rt_ub,
     jl_module_t *mod, jl_code_info_t *ci, int lineno, jl_value_t *file, int nargs, int isva, jl_value_t *env)
 {
diff --git a/src/partr.c b/src/partr.c
index eeb0d0f456d97..ec6bbe3e5720a 100644
--- a/src/partr.c
+++ b/src/partr.c
@@ -26,6 +26,9 @@ static const int16_t not_sleeping = 0;
 // it is acceptable for the thread to be sleeping.
 static const int16_t sleeping = 1;
 
+// this thread is dead.
+static const int16_t sleeping_like_the_dead JL_UNUSED = 2;
+
 // invariant: No thread is ever asleep unless sleep_check_state is sleeping (or we have a wakeup signal pending).
 // invariant: Any particular thread is not asleep unless that thread's sleep_check_state is sleeping.
 // invariant: The transition of a thread state to sleeping must be followed by a check that there wasn't work pending for it.
@@ -182,7 +185,7 @@ static int sleep_check_after_threshold(uint64_t *start_cycles)
 
 static int wake_thread(int16_t tid)
 {
-    jl_ptls_t other = jl_all_tls_states[tid];
+    jl_ptls_t other = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
     int8_t state = sleeping;
 
     if (jl_atomic_load_relaxed(&other->sleep_check_state) == sleeping) {
@@ -229,7 +232,7 @@ JL_DLLEXPORT void jl_wakeup_thread(int16_t tid)
         if (wake_thread(tid)) {
             // check if we need to notify uv_run too
             jl_fence();
-            jl_ptls_t other = jl_all_tls_states[tid];
+            jl_ptls_t other = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
             jl_task_t *tid_task = jl_atomic_load_relaxed(&other->current_task);
             // now that we have changed the thread to not-sleeping, ensure that
             // either it has not yet acquired the libuv lock, or that it will
@@ -244,7 +247,8 @@ JL_DLLEXPORT void jl_wakeup_thread(int16_t tid)
         // in the future, we might want to instead wake some fraction of threads,
         // and let each of those wake additional threads if they find work
         int anysleep = 0;
-        for (tid = 0; tid < jl_n_threads; tid++) {
+        int nthreads = jl_atomic_load_acquire(&jl_n_threads);
+        for (tid = 0; tid < nthreads; tid++) {
             if (tid != self)
                 anysleep |= wake_thread(tid);
         }
diff --git a/src/rtutils.c b/src/rtutils.c
index f3a2e745ed651..497b348f871d5 100644
--- a/src/rtutils.c
+++ b/src/rtutils.c
@@ -202,12 +202,6 @@ JL_DLLEXPORT void JL_NORETURN jl_eof_error(void)
     jl_throw(jl_new_struct(eof_error));
 }
 
-// get kwsorter field, with appropriate error check and message
-JL_DLLEXPORT jl_value_t *jl_get_keyword_sorter(jl_value_t *f)
-{
-    return jl_get_kwsorter(jl_typeof(f));
-}
-
 JL_DLLEXPORT void jl_typeassert(jl_value_t *x, jl_value_t *t)
 {
     if (!jl_isa(x,t))
@@ -583,29 +577,29 @@ JL_DLLEXPORT int jl_is_identifier(char *str) JL_NOTSAFEPOINT
     return 1;
 }
 
-static jl_datatype_t *first_arg_datatype(jl_value_t *a JL_PROPAGATES_ROOT, int got_tuple1) JL_NOTSAFEPOINT
+static jl_datatype_t *nth_arg_datatype(jl_value_t *a JL_PROPAGATES_ROOT, int n) JL_NOTSAFEPOINT
 {
     if (jl_is_datatype(a)) {
-        if (got_tuple1)
+        if (n == 0)
             return (jl_datatype_t*)a;
         if (jl_is_tuple_type(a)) {
-            if (jl_nparams(a) < 1)
+            if (jl_nparams(a) < n)
                 return NULL;
-            return first_arg_datatype(jl_tparam0(a), 1);
+            return nth_arg_datatype(jl_tparam(a, n - 1), 0);
         }
         return NULL;
     }
     else if (jl_is_typevar(a)) {
-        return first_arg_datatype(((jl_tvar_t*)a)->ub, got_tuple1);
+        return nth_arg_datatype(((jl_tvar_t*)a)->ub, n);
     }
     else if (jl_is_unionall(a)) {
-        return first_arg_datatype(((jl_unionall_t*)a)->body, got_tuple1);
+        return nth_arg_datatype(((jl_unionall_t*)a)->body, n);
     }
     else if (jl_is_uniontype(a)) {
         jl_uniontype_t *u = (jl_uniontype_t*)a;
-        jl_datatype_t *d1 = first_arg_datatype(u->a, got_tuple1);
+        jl_datatype_t *d1 = nth_arg_datatype(u->a, n);
         if (d1 == NULL) return NULL;
-        jl_datatype_t *d2 = first_arg_datatype(u->b, got_tuple1);
+        jl_datatype_t *d2 = nth_arg_datatype(u->b, n);
         if (d2 == NULL || d1->name != d2->name)
             return NULL;
         return d1;
@@ -614,15 +608,15 @@ static jl_datatype_t *first_arg_datatype(jl_value_t *a JL_PROPAGATES_ROOT, int g
 }
 
 // get DataType of first tuple element (if present), or NULL if cannot be determined
-JL_DLLEXPORT jl_datatype_t *jl_first_argument_datatype(jl_value_t *argtypes JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
+jl_datatype_t *jl_nth_argument_datatype(jl_value_t *argtypes JL_PROPAGATES_ROOT, int n) JL_NOTSAFEPOINT
 {
-    return first_arg_datatype(argtypes, 0);
+    return nth_arg_datatype(argtypes, n);
 }
 
 // get DataType implied by a single given type, or `nothing`
 JL_DLLEXPORT jl_value_t *jl_argument_datatype(jl_value_t *argt JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
 {
-    jl_datatype_t *dt = first_arg_datatype(argt, 1);
+    jl_datatype_t *dt = nth_arg_datatype(argt, 0);
     if (dt == NULL)
         return jl_nothing;
     return (jl_value_t*)dt;
@@ -1260,7 +1254,7 @@ JL_DLLEXPORT size_t jl_static_show_func_sig(JL_STREAM *s, jl_value_t *type) JL_N
 {
     size_t n = 0;
     size_t i;
-    jl_value_t *ftype = (jl_value_t*)jl_first_argument_datatype(type);
+    jl_value_t *ftype = (jl_value_t*)jl_nth_argument_datatype(type, 1);
     if (ftype == NULL)
         return jl_static_show(s, type);
     jl_unionall_t *tvars = (jl_unionall_t*)type;
@@ -1279,7 +1273,9 @@ JL_DLLEXPORT size_t jl_static_show_func_sig(JL_STREAM *s, jl_value_t *type) JL_N
         n += jl_static_show(s, type);
         return n;
     }
-    if (jl_nparams(ftype) == 0 || ftype == ((jl_datatype_t*)ftype)->name->wrapper) {
+    if ((jl_nparams(ftype) == 0 || ftype == ((jl_datatype_t*)ftype)->name->wrapper) &&
+            ((jl_datatype_t*)ftype)->name->mt != jl_type_type_mt &&
+            ((jl_datatype_t*)ftype)->name->mt != jl_nonfunction_mt) {
         n += jl_printf(s, "%s", jl_symbol_name(((jl_datatype_t*)ftype)->name->mt->name));
     }
     else {
diff --git a/src/safepoint.c b/src/safepoint.c
index b2feccf74e068..1ff26d616a5d8 100644
--- a/src/safepoint.c
+++ b/src/safepoint.c
@@ -111,10 +111,6 @@ void jl_safepoint_init(void)
 
 int jl_safepoint_start_gc(void)
 {
-    if (jl_n_threads == 1) {
-        jl_atomic_store_relaxed(&jl_gc_running, 1);
-        return 1;
-    }
     // The thread should have set this already
     assert(jl_atomic_load_relaxed(&jl_current_task->ptls->gc_state) == JL_GC_STATE_WAITING);
     uv_mutex_lock(&safepoint_lock);
@@ -137,10 +133,6 @@ int jl_safepoint_start_gc(void)
 void jl_safepoint_end_gc(void)
 {
     assert(jl_atomic_load_relaxed(&jl_gc_running));
-    if (jl_n_threads == 1) {
-        jl_atomic_store_relaxed(&jl_gc_running, 0);
-        return;
-    }
     uv_mutex_lock(&safepoint_lock);
     // Need to reset the page protection before resetting the flag since
     // the thread will trigger a segfault immediately after returning from
diff --git a/src/signal-handling.c b/src/signal-handling.c
index 3b1e4934e764b..5154a9f563f30 100644
--- a/src/signal-handling.c
+++ b/src/signal-handling.c
@@ -182,14 +182,10 @@ static int *profile_get_randperm(int size)
 
 JL_DLLEXPORT int jl_profile_is_buffer_full(void)
 {
-    // declare buffer full if there isn't enough room to take samples across all threads
-    #if defined(_OS_WINDOWS_)
-        uint64_t nthreads = 1; // windows only profiles the main thread
-    #else
-        uint64_t nthreads = jl_n_threads;
-    #endif
-    // the `+ 6` is for the two block terminators `0` plus 4 metadata entries
-    return bt_size_cur + (((JL_BT_MAX_ENTRY_SIZE + 1) + 6) * nthreads) > bt_size_max;
+    // Declare buffer full if there isn't enough room to sample even just the
+    // thread metadata and one max-sized frame. The `+ 6` is for the two block
+    // terminator `0`'s plus the 4 metadata entries.
+    return bt_size_cur + ((JL_BT_MAX_ENTRY_SIZE + 1) + 6) > bt_size_max;
 }
 
 static uint64_t jl_last_sigint_trigger = 0;
diff --git a/src/signals-mach.c b/src/signals-mach.c
index 5a1816a80f2b2..edc2b42215f67 100644
--- a/src/signals-mach.c
+++ b/src/signals-mach.c
@@ -50,7 +50,7 @@ void jl_mach_gc_end(void)
         uintptr_t item = (uintptr_t)suspended_threads.items[i];
         int16_t tid = (int16_t)item;
         int8_t gc_state = (int8_t)(item >> 8);
-        jl_ptls_t ptls2 = jl_all_tls_states[tid];
+        jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
         jl_atomic_store_release(&ptls2->gc_state, gc_state);
         thread_resume(pthread_mach_thread_np(ptls2->system_id));
     }
@@ -119,7 +119,8 @@ static void allocate_mach_handler()
     if (_keymgr_set_lockmode_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST, NM_ALLOW_RECURSION))
         jl_error("_keymgr_set_lockmode_processwide_ptr failed");
 
-    arraylist_new(&suspended_threads, jl_n_threads);
+    int16_t nthreads = jl_atomic_load_acquire(&jl_n_threads);
+    arraylist_new(&suspended_threads, nthreads); // we will resize later (inside safepoint_lock), if needed
     pthread_t thread;
     pthread_attr_t attr;
     kern_return_t ret;
@@ -221,7 +222,7 @@ static void jl_throw_in_thread(int tid, mach_port_t thread, jl_value_t *exceptio
     host_thread_state_t state;
     kern_return_t ret = thread_get_state(thread, MACH_THREAD_STATE, (thread_state_t)&state, &count);
     HANDLE_MACH_ERROR("thread_get_state", ret);
-    jl_ptls_t ptls2 = jl_all_tls_states[tid];
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
     if (!jl_get_safe_restore()) {
         assert(exception);
         ptls2->bt_size =
@@ -265,8 +266,9 @@ kern_return_t catch_mach_exception_raise(
 #endif
     int16_t tid;
     jl_ptls_t ptls2 = NULL;
-    for (tid = 0; tid < jl_n_threads; tid++) {
-        jl_ptls_t _ptls2 = jl_all_tls_states[tid];
+    int nthreads = jl_atomic_load_acquire(&jl_n_threads);
+    for (tid = 0; tid < nthreads; tid++) {
+        jl_ptls_t _ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
         if (pthread_mach_thread_np(_ptls2->system_id) == thread) {
             ptls2 = _ptls2;
             break;
@@ -381,9 +383,15 @@ static void attach_exception_port(thread_port_t thread, int segv_only)
     HANDLE_MACH_ERROR("thread_set_exception_ports", ret);
 }
 
-static void jl_thread_suspend_and_get_state2(int tid, host_thread_state_t *ctx)
+static int jl_thread_suspend_and_get_state2(int tid, host_thread_state_t *ctx)
 {
-    jl_ptls_t ptls2 = jl_all_tls_states[tid];
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+    if (ptls2 == NULL) // this thread is not alive
+        return 0;
+    jl_task_t *ct2 = ptls2 ? jl_atomic_load_relaxed(&ptls2->current_task) : NULL;
+    if (ct2 == NULL) // this thread is already dead
+        return 0;
+
     mach_port_t thread = pthread_mach_thread_np(ptls2->system_id);
 
     kern_return_t ret = thread_suspend(thread);
@@ -395,18 +403,22 @@ static void jl_thread_suspend_and_get_state2(int tid, host_thread_state_t *ctx)
 
     // Get the state of the suspended thread
     ret = thread_get_state(thread, MACH_THREAD_STATE, (thread_state_t)ctx, &count);
+    return 1;
 }
 
 static void jl_thread_suspend_and_get_state(int tid, unw_context_t **ctx)
 {
     static host_thread_state_t state;
-    jl_thread_suspend_and_get_state2(tid, &state);
+    if (!jl_thread_suspend_and_get_state2(tid, &state)) {
+        *ctx = NULL;
+        return;
+    }
     *ctx = (unw_context_t*)&state;
 }
 
 static void jl_thread_resume(int tid, int sig)
 {
-    jl_ptls_t ptls2 = jl_all_tls_states[tid];
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
     mach_port_t thread = pthread_mach_thread_np(ptls2->system_id);
     kern_return_t ret = thread_resume(thread);
     HANDLE_MACH_ERROR("thread_resume", ret);
@@ -416,7 +428,7 @@ static void jl_thread_resume(int tid, int sig)
 // or if SIGINT happens too often.
 static void jl_try_deliver_sigint(void)
 {
-    jl_ptls_t ptls2 = jl_all_tls_states[0];
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[0];
     mach_port_t thread = pthread_mach_thread_np(ptls2->system_id);
 
     kern_return_t ret = thread_suspend(thread);
@@ -452,11 +464,12 @@ CFI_NORETURN
 
 static void jl_exit_thread0(int exitstate, jl_bt_element_t *bt_data, size_t bt_size)
 {
-    jl_ptls_t ptls2 = jl_all_tls_states[0];
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[0];
     mach_port_t thread = pthread_mach_thread_np(ptls2->system_id);
 
     host_thread_state_t state;
-    jl_thread_suspend_and_get_state2(0, &state);
+    if (!jl_thread_suspend_and_get_state2(0, &state))
+        return;
     unw_context_t *uc = (unw_context_t*)&state;
 
     // This aborts `sleep` and other syscalls.
@@ -608,8 +621,9 @@ void *mach_profile_listener(void *arg)
         // (so that thread zero gets notified last)
         int keymgr_locked = jl_lock_profile_mach(0);
 
-        int *randperm = profile_get_randperm(jl_n_threads);
-        for (int idx = jl_n_threads; idx-- > 0; ) {
+        int nthreads = jl_atomic_load_acquire(&jl_n_threads);
+        int *randperm = profile_get_randperm(nthreads);
+        for (int idx = nthreads; idx-- > 0; ) {
             // Stop the threads in the random or reverse round-robin order.
             int i = randperm[idx];
             // if there is no space left, break early
@@ -621,7 +635,8 @@ void *mach_profile_listener(void *arg)
             if (_dyld_atfork_prepare != NULL && _dyld_atfork_parent != NULL)
                 _dyld_atfork_prepare(); // briefly acquire the dlsym lock
             host_thread_state_t state;
-            jl_thread_suspend_and_get_state2(i, &state);
+            if (!jl_thread_suspend_and_get_state2(i, &state))
+                continue;
             unw_context_t *uc = (unw_context_t*)&state;
             if (_dyld_atfork_prepare != NULL && _dyld_atfork_parent != NULL)
                 _dyld_atfork_parent(); // quickly release the dlsym lock
@@ -660,12 +675,12 @@ void *mach_profile_listener(void *arg)
 #else
                 bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur, bt_size_max - bt_size_cur - 1, uc, NULL);
 #endif
-                jl_ptls_t ptls = jl_all_tls_states[i];
+                jl_ptls_t ptls = jl_atomic_load_relaxed(&jl_all_tls_states)[i];
 
                 // store threadid but add 1 as 0 is preserved to indicate end of block
                 bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;
 
-                // store task id
+                // store task id (never null)
                 bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls->current_task);
 
                 // store cpu cycle clock
diff --git a/src/signals-unix.c b/src/signals-unix.c
index 8fad82e0e40dc..5fd9b3c44587e 100644
--- a/src/signals-unix.c
+++ b/src/signals-unix.c
@@ -372,7 +372,14 @@ static void jl_thread_suspend_and_get_state(int tid, unw_context_t **ctx)
     clock_gettime(CLOCK_REALTIME, &ts);
     ts.tv_sec += 1;
     pthread_mutex_lock(&in_signal_lock);
-    jl_ptls_t ptls2 = jl_all_tls_states[tid];
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+    jl_task_t *ct2 = ptls2 ? jl_atomic_load_relaxed(&ptls2->current_task) : NULL;
+    if (ct2 == NULL) {
+        // this thread is not alive or already dead
+        *ctx = NULL;
+        pthread_mutex_unlock(&in_signal_lock);
+        return;
+    }
     jl_atomic_store_release(&ptls2->signal_request, 1);
     pthread_kill(ptls2->system_id, SIGUSR2);
     // wait for thread to acknowledge
@@ -404,7 +411,7 @@ static void jl_thread_suspend_and_get_state(int tid, unw_context_t **ctx)
 
 static void jl_thread_resume(int tid, int sig)
 {
-    jl_ptls_t ptls2 = jl_all_tls_states[tid];
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
     jl_atomic_store_release(&ptls2->signal_request, sig == -1 ? 3 : 1);
     pthread_cond_broadcast(&exit_signal_cond);
     pthread_cond_wait(&signal_caught_cond, &in_signal_lock); // wait for thread to acknowledge
@@ -420,7 +427,7 @@ static void jl_thread_resume(int tid, int sig)
 // or if SIGINT happens too often.
 static void jl_try_deliver_sigint(void)
 {
-    jl_ptls_t ptls2 = jl_all_tls_states[0];
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[0];
     jl_safepoint_enable_sigint();
     jl_wake_libuv();
     jl_atomic_store_release(&ptls2->signal_request, 2);
@@ -451,7 +458,7 @@ CFI_NORETURN
 
 static void jl_exit_thread0(int state, jl_bt_element_t *bt_data, size_t bt_size)
 {
-    jl_ptls_t ptls2 = jl_all_tls_states[0];
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[0];
     if (thread0_exit_count <= 1) {
         unw_context_t *signal_context;
         jl_thread_suspend_and_get_state(0, &signal_context);
@@ -701,7 +708,7 @@ void trigger_profile_peek(void)
     if (bt_size_max == 0){
         // If the buffer hasn't been initialized, initialize with default size
         // Keep these values synchronized with Profile.default_init()
-        if (jl_profile_init(10000000 * jl_n_threads, 1000000) == -1){
+        if (jl_profile_init(10000000, 1000000) == -1) {
             jl_safe_printf("ERROR: could not initialize the profile buffer");
             return;
         }
@@ -831,6 +838,7 @@ static void *signal_listener(void *arg)
         }
 #endif
 
+        int nthreads = jl_atomic_load_acquire(&jl_n_threads);
         bt_size = 0;
 #if !defined(JL_DISABLE_LIBUNWIND)
         unw_context_t *signal_context;
@@ -840,8 +848,8 @@ static void *signal_listener(void *arg)
             jl_lock_profile();
             int *randperm;
             if (profile)
-                 randperm = profile_get_randperm(jl_n_threads);
-            for (int idx = jl_n_threads; idx-- > 0; ) {
+                 randperm = profile_get_randperm(nthreads);
+            for (int idx = nthreads; idx-- > 0; ) {
                 // Stop the threads in the random or reverse round-robin order.
                 int i = profile ? randperm[idx] : idx;
                 // notify thread to stop
@@ -853,7 +861,7 @@ static void *signal_listener(void *arg)
                 // this part must be signal-handler safe
                 if (critical) {
                     bt_size += rec_backtrace_ctx(bt_data + bt_size,
-                            JL_MAX_BT_SIZE / jl_n_threads - 1,
+                            JL_MAX_BT_SIZE / nthreads - 1,
                             signal_context, NULL);
                     bt_data[bt_size++].uintptr = 0;
                 }
@@ -880,12 +888,12 @@ static void *signal_listener(void *arg)
                         }
                         jl_set_safe_restore(old_buf);
 
-                        jl_ptls_t ptls2 = jl_all_tls_states[i];
+                        jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[i];
 
                         // store threadid but add 1 as 0 is preserved to indicate end of block
                         bt_data_prof[bt_size_cur++].uintptr = ptls2->tid + 1;
 
-                        // store task id
+                        // store task id (never null)
                         bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls2->current_task);
 
                         // store cpu cycle clock
@@ -927,11 +935,11 @@ static void *signal_listener(void *arg)
             else {
 #ifndef SIGINFO // SIGINFO already prints this automatically
                 int nrunning = 0;
-                for (int idx = jl_n_threads; idx-- > 0; ) {
-                    jl_ptls_t ptls2 = jl_all_tls_states[idx];
+                for (int idx = nthreads; idx-- > 0; ) {
+                    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[idx];
                     nrunning += !jl_atomic_load_relaxed(&ptls2->sleep_check_state);
                 }
-                jl_safe_printf("\ncmd: %s %d running %d of %d\n", jl_options.julia_bin ? jl_options.julia_bin : "julia", uv_os_getpid(), nrunning, jl_n_threads);
+                jl_safe_printf("\ncmd: %s %d running %d of %d\n", jl_options.julia_bin ? jl_options.julia_bin : "julia", uv_os_getpid(), nrunning, nthreads);
 #endif
 
                 jl_safe_printf("\nsignal (%d): %s\n", sig, strsignal(sig));
diff --git a/src/signals-win.c b/src/signals-win.c
index 178a7463b8d50..83e92ff400e1d 100644
--- a/src/signals-win.c
+++ b/src/signals-win.c
@@ -165,7 +165,7 @@ HANDLE hMainThread = INVALID_HANDLE_VALUE;
 // Try to throw the exception in the master thread.
 static void jl_try_deliver_sigint(void)
 {
-    jl_ptls_t ptls2 = jl_all_tls_states[0];
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[0];
     jl_lock_profile();
     jl_safepoint_enable_sigint();
     jl_wake_libuv();
@@ -362,12 +362,12 @@ static DWORD WINAPI profile_bt( LPVOID lparam )
                     bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur,
                             bt_size_max - bt_size_cur - 1, &ctxThread, NULL);
 
-                    jl_ptls_t ptls = jl_all_tls_states[0]; // given only profiling hMainThread
+                    jl_ptls_t ptls = jl_atomic_load_relaxed(&jl_all_tls_states)[0]; // given only profiling hMainThread
 
                     // store threadid but add 1 as 0 is preserved to indicate end of block
                     bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;
 
-                    // store task id
+                    // store task id (never null)
                     bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls->current_task);
 
                     // store cpu cycle clock
diff --git a/src/stackwalk.c b/src/stackwalk.c
index a6ca5f3d73493..d64727dea8ba6 100644
--- a/src/stackwalk.c
+++ b/src/stackwalk.c
@@ -1051,6 +1051,19 @@ void jl_rec_backtrace(jl_task_t *t)
     (void)mctx;
     (void)c;
   #endif
+ #elif defined(_OS_FREEBSD_) && defined(_CPU_X86_64_)
+    sigjmp_buf *mctx = &t->ctx.ctx.uc_mcontext;
+    mcontext_t *mc = &c.uc_mcontext;
+    // https://github.com/freebsd/freebsd-src/blob/releng/13.1/lib/libc/amd64/gen/_setjmp.S
+    mc->mc_rip = ((long*)mctx)[0];
+    mc->mc_rbx = ((long*)mctx)[1];
+    mc->mc_rsp = ((long*)mctx)[2];
+    mc->mc_rbp = ((long*)mctx)[3];
+    mc->mc_r12 = ((long*)mctx)[4];
+    mc->mc_r13 = ((long*)mctx)[5];
+    mc->mc_r14 = ((long*)mctx)[6];
+    mc->mc_r15 = ((long*)mctx)[7];
+    context = &c;
  #else
   #pragma message("jl_rec_backtrace not defined for ASM/SETJMP on unknown system")
   (void)c;
diff --git a/src/staticdata.c b/src/staticdata.c
index 10c1c3cae9e3f..c252a8885131e 100644
--- a/src/staticdata.c
+++ b/src/staticdata.c
@@ -81,7 +81,7 @@ extern "C" {
 // TODO: put WeakRefs on the weak_refs list during deserialization
 // TODO: handle finalizers
 
-#define NUM_TAGS    156
+#define NUM_TAGS    157
 
 // An array of references that need to be restored from the sysimg
 // This is a manually constructed dual of the gvars array, which would be produced by codegen for Julia code, for C.
@@ -223,6 +223,7 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_typeinf_func);
         INSERT_TAG(jl_type_type_mt);
         INSERT_TAG(jl_nonfunction_mt);
+        INSERT_TAG(jl_kwcall_func);
 
         // some Core.Builtin Functions that we want to be able to reference:
         INSERT_TAG(jl_builtin_throw);
@@ -301,6 +302,8 @@ void *native_functions;   // opaque jl_native_code_desc_t blob used for fetching
 // table of struct field addresses to rewrite during saving
 static htable_t field_replace;
 
+static htable_t layout_cache;
+
 // array of definitions for the predefined function pointers
 // (reverse of fptr_to_id)
 // This is a manually constructed dual of the fvars array, which would be produced by codegen for Julia code, for C.
@@ -308,7 +311,7 @@ static const jl_fptr_args_t id_to_fptrs[] = {
     &jl_f_throw, &jl_f_is, &jl_f_typeof, &jl_f_issubtype, &jl_f_isa,
     &jl_f_typeassert, &jl_f__apply_iterate, &jl_f__apply_pure,
     &jl_f__call_latest, &jl_f__call_in_world, &jl_f__call_in_world_total, &jl_f_isdefined,
-    &jl_f_tuple, &jl_f_svec, &jl_f_intrinsic_call, &jl_f_invoke_kwsorter,
+    &jl_f_tuple, &jl_f_svec, &jl_f_intrinsic_call,
     &jl_f_getfield, &jl_f_setfield, &jl_f_swapfield, &jl_f_modifyfield,
     &jl_f_replacefield, &jl_f_fieldtype, &jl_f_nfields,
     &jl_f_arrayref, &jl_f_const_arrayref, &jl_f_arrayset, &jl_f_arraysize, &jl_f_apply_type,
@@ -363,12 +366,12 @@ typedef enum {
 } jl_callingconv_t;
 
 
-//#ifdef _P64
-//#define RELOC_TAG_OFFSET 61
-//#else
+#ifdef _P64
+#define RELOC_TAG_OFFSET 61
+#else
 // this supports up to 8 RefTags, 512MB of pointer data, and 4/2 (64/32-bit) GB of constant data.
 #define RELOC_TAG_OFFSET 29
-//#endif
+#endif
 
 #if RELOC_TAG_OFFSET <= 32
 typedef uint32_t reloc_t;
@@ -1079,8 +1082,8 @@ static void jl_write_values(jl_serializer_state *s)
                 if (fld != NULL) {
                     arraylist_push(&s->relocs_list, (void*)(uintptr_t)(offset + reloc_offset)); // relocation location
                     arraylist_push(&s->relocs_list, (void*)backref_id(s, fld)); // relocation target
-                    memset(&s->s->buf[offset + reloc_offset], 0, sizeof(fld)); // relocation offset (none)
                 }
+                memset(&s->s->buf[offset + reloc_offset], 0, sizeof(fld)); // relocation offset (none)
             }
 
             // A few objects need additional handling beyond the generic serialization above
@@ -1130,7 +1133,7 @@ static void jl_write_values(jl_serializer_state *s)
                                     assert(invokeptr_id > 0);
                                     ios_ensureroom(s->fptr_record, invokeptr_id * sizeof(void*));
                                     ios_seek(s->fptr_record, (invokeptr_id - 1) * sizeof(void*));
-                                    write_reloc_t(s->fptr_record, (uint32_t)~reloc_offset);
+                                    write_reloc_t(s->fptr_record, (reloc_t)~reloc_offset);
 #ifdef _P64
                                     if (sizeof(reloc_t) < 8)
                                         write_padding(s->fptr_record, 8 - sizeof(reloc_t));
@@ -1164,20 +1167,32 @@ static void jl_write_values(jl_serializer_state *s)
                 jl_datatype_t *dt = (jl_datatype_t*)v;
                 jl_datatype_t *newdt = (jl_datatype_t*)&s->s->buf[reloc_offset];
                 if (dt->layout != NULL) {
-                    size_t nf = dt->layout->nfields;
-                    size_t np = dt->layout->npointers;
-                    size_t fieldsize = jl_fielddesc_size(dt->layout->fielddesc_type);
+                    newdt->layout = NULL;
+
                     char *flddesc = (char*)dt->layout;
-                    size_t fldsize = sizeof(jl_datatype_layout_t) + nf * fieldsize;
-                    if (dt->layout->first_ptr != -1)
-                        fldsize += np << dt->layout->fielddesc_type;
-                    uintptr_t layout = LLT_ALIGN(ios_pos(s->const_data), sizeof(void*));
-                    write_padding(s->const_data, layout - ios_pos(s->const_data)); // realign stream
-                    newdt->layout = NULL; // relocation offset
-                    layout /= sizeof(void*);
-                    arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_datatype_t, layout))); // relocation location
-                    arraylist_push(&s->relocs_list, (void*)(((uintptr_t)ConstDataRef << RELOC_TAG_OFFSET) + layout)); // relocation target
-                    ios_write(s->const_data, flddesc, fldsize);
+                    void* reloc_from = (void*)(reloc_offset + offsetof(jl_datatype_t, layout));
+                    void* reloc_to;
+
+                    void** bp = ptrhash_bp(&layout_cache, flddesc);
+                    if (*bp == HT_NOTFOUND) {
+                        int64_t streampos = ios_pos(s->const_data);
+                        uintptr_t align = LLT_ALIGN(streampos, sizeof(void*));
+                        uintptr_t layout = align / sizeof(void*);
+                        *bp = reloc_to = (void*)(((uintptr_t)ConstDataRef << RELOC_TAG_OFFSET) + layout);
+
+                        size_t fieldsize = jl_fielddesc_size(dt->layout->fielddesc_type);
+                        size_t layoutsize = sizeof(jl_datatype_layout_t) + dt->layout->nfields * fieldsize;
+                        if (dt->layout->first_ptr != -1)
+                            layoutsize += dt->layout->npointers << dt->layout->fielddesc_type;
+                        write_padding(s->const_data, align - streampos);
+                        ios_write(s->const_data, flddesc, layoutsize);
+                    }
+                    else {
+                        reloc_to = *bp;
+                    }
+
+                    arraylist_push(&s->relocs_list, reloc_from);
+                    arraylist_push(&s->relocs_list, reloc_to);
                 }
             }
             else if (jl_is_typename(v)) {
@@ -1347,7 +1362,7 @@ static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t bas
         assert(offset < deser_sym.len && deser_sym.items[offset] && "corrupt relocation item id");
         return (uintptr_t)deser_sym.items[offset];
     case BindingRef:
-        return jl_buff_tag | GC_OLD_MARKED;
+        return jl_buff_tag | GC_OLD;
     case TagRef:
         if (offset == 0)
             return (uintptr_t)s->ptls->root_task;
@@ -1394,41 +1409,71 @@ static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t bas
 }
 
 
-static void jl_write_skiplist(ios_t *s, char *base, size_t size, arraylist_t *list)
+static void jl_write_reloclist(ios_t *s, char *base, size_t size, arraylist_t *list)
 {
-    size_t i;
-    for (i = 0; i < list->len; i += 2) {
+    for (size_t i = 0; i < list->len; i += 2) {
+        size_t last_pos = i ? (size_t)list->items[i - 2] : 0;
         size_t pos = (size_t)list->items[i];
         size_t item = (size_t)list->items[i + 1];
         uintptr_t *pv = (uintptr_t*)(base + pos);
         assert(pos < size && pos != 0);
         *pv = get_reloc_for_item(item, *pv);
-        // record pos in relocations list
-        // TODO: save space by using delta-compression
-        write_reloc_t(s, pos);
+
+        // write pos as compressed difference.
+        size_t pos_diff = pos - last_pos;
+        while (pos_diff) {
+            assert(pos_diff >= 0);
+            if (pos_diff <= 127) {
+                write_int8(s, pos_diff);
+                break;
+            }
+            else {
+                // Extract the next 7 bits
+                int8_t ns = pos_diff & (int8_t)0x7F;
+                pos_diff >>= 7;
+                // Set the high bit if there's still more
+                ns |= (!!pos_diff) << 7;
+                write_int8(s, ns);
+            }
+        }
     }
-    write_reloc_t(s, 0);
+    write_int8(s, 0);
 }
 
 
 static void jl_write_relocations(jl_serializer_state *s)
 {
     char *base = &s->s->buf[0];
-    jl_write_skiplist(s->relocs, base, s->s->size, &s->gctags_list);
-    jl_write_skiplist(s->relocs, base, s->s->size, &s->relocs_list);
+    jl_write_reloclist(s->relocs, base, s->s->size, &s->gctags_list);
+    jl_write_reloclist(s->relocs, base, s->s->size, &s->relocs_list);
 }
 
-
-static void jl_read_relocations(jl_serializer_state *s, uint8_t bits)
+static void jl_read_reloclist(jl_serializer_state *s, uint8_t bits)
 {
-    uintptr_t base = (uintptr_t)&s->s->buf[0];
+    uintptr_t base = (uintptr_t)s->s->buf;
     size_t size = s->s->size;
+    uintptr_t last_pos = 0;
+    uint8_t *current = (uint8_t *)(s->relocs->buf + s->relocs->bpos);
     while (1) {
-        uintptr_t offset = *(reloc_t*)&s->relocs->buf[(uintptr_t)s->relocs->bpos];
-        s->relocs->bpos += sizeof(reloc_t);
-        if (offset == 0)
+        // Read the offset of the next object
+        size_t pos_diff = 0;
+        size_t cnt = 0;
+        while (1) {
+            assert(s->relocs->bpos <= s->relocs->size);
+            assert((char *)current <= (char *)(s->relocs->buf + s->relocs->size));
+            int8_t c = *current++;
+            s->relocs->bpos += 1;
+
+            pos_diff |= ((size_t)c & 0x7F) << (7 * cnt++);
+            if ((c >> 7) == 0)
+                break;
+        }
+        if (pos_diff == 0)
             break;
-        uintptr_t *pv = (uintptr_t*)(base + offset);
+
+        uintptr_t pos = last_pos + pos_diff;
+        last_pos = pos;
+        uintptr_t *pv = (uintptr_t *)(base + pos);
         uintptr_t v = *pv;
         v = get_item_for_reloc(s, base, size, v);
         *pv = v | bits;
@@ -1439,16 +1484,27 @@ static char *sysimg_base;
 static char *sysimg_relocs;
 void gc_sweep_sysimg(void)
 {
-    char *base = sysimg_base;
-    reloc_t *relocs = (reloc_t*)sysimg_relocs;
-    if (relocs == NULL)
+    if (!sysimg_relocs)
         return;
+    uintptr_t base = (uintptr_t)sysimg_base;
+    uintptr_t last_pos = 0;
+    uint8_t *current = (uint8_t *)sysimg_relocs;
     while (1) {
-        uintptr_t offset = *relocs;
-        relocs++;
-        if (offset == 0)
+        // Read the offset of the next object
+        size_t pos_diff = 0;
+        size_t cnt = 0;
+        while (1) {
+            int8_t c = *current++;
+            pos_diff |= ((size_t)c & 0x7F) << (7 * cnt++);
+            if ((c >> 7) == 0)
+                break;
+        }
+        if (pos_diff == 0)
             break;
-        jl_taggedvalue_t *o = (jl_taggedvalue_t*)(base + offset);
+
+        uintptr_t pos = last_pos + pos_diff;
+        last_pos = pos;
+        jl_taggedvalue_t *o = (jl_taggedvalue_t *)(base + pos);
         o->bits.gc = GC_OLD;
     }
 }
@@ -1740,7 +1796,7 @@ static void strip_specializations_(jl_method_instance_t *mi)
         jl_value_t *inferred = jl_atomic_load_relaxed(&codeinst->inferred);
         if (inferred && inferred != jl_nothing) {
             if (jl_options.strip_ir) {
-                record_field_change(&inferred, jl_nothing);
+                record_field_change((jl_value_t**)&codeinst->inferred, jl_nothing);
             }
             else if (jl_options.strip_metadata) {
                 jl_value_t *stripped = strip_codeinfo_meta(mi->def.method, inferred, 0);
@@ -1753,6 +1809,8 @@ static void strip_specializations_(jl_method_instance_t *mi)
     }
     if (jl_options.strip_ir) {
         record_field_change(&mi->uninferred, NULL);
+        record_field_change((jl_value_t**)&mi->backedges, NULL);
+        record_field_change((jl_value_t**)&mi->callbacks, NULL);
     }
 }
 
@@ -1793,11 +1851,15 @@ static int strip_all_codeinfos__(jl_typemap_entry_t *def, void *_env)
     }
     if (m->unspecialized)
         strip_specializations_(m->unspecialized);
+    if (jl_options.strip_ir && m->root_blocks)
+        record_field_change((jl_value_t**)&m->root_blocks, NULL);
     return 1;
 }
 
 static int strip_all_codeinfos_(jl_methtable_t *mt, void *_env)
 {
+    if (jl_options.strip_ir && mt->backedges)
+        record_field_change((jl_value_t**)&mt->backedges, NULL);
     return jl_typemap_visitor(mt->defs, strip_all_codeinfos__, NULL);
 }
 
@@ -2199,16 +2261,18 @@ static void jl_restore_system_image_from_stream(ios_t *f) JL_GC_DISABLED
     jl_gc_set_permalloc_region((void*)sysimg_base, (void*)(sysimg_base + sysimg.size));
 
     s.s = &sysimg;
-    jl_read_relocations(&s, GC_OLD_MARKED); // gctags
+    jl_read_reloclist(&s, GC_OLD); // gctags
     size_t sizeof_tags = ios_pos(&relocs);
     (void)sizeof_tags;
-    jl_read_relocations(&s, 0); // general relocs
+    jl_read_reloclist(&s, 0); // general relocs
     ios_close(&relocs);
     ios_close(&const_data);
     jl_update_all_gvars(&s); // gvars relocs
     ios_close(&gvar_record);
     s.s = NULL;
 
+    jl_kwcall_mt = ((jl_datatype_t*)jl_typeof(jl_kwcall_func))->name->mt;
+
     s.s = f;
     // reinit items except ccallables
     jl_finalize_deserializer(&s);
@@ -2299,6 +2363,7 @@ static void jl_init_serializer2(int for_serialize)
         htable_new(&symbol_table, 0);
         htable_new(&fptr_to_id, sizeof(id_to_fptrs) / sizeof(*id_to_fptrs));
         htable_new(&backref_table, 0);
+        htable_new(&layout_cache, 0);
         uintptr_t i;
         for (i = 0; id_to_fptrs[i] != NULL; i++) {
             ptrhash_put(&fptr_to_id, (void*)(uintptr_t)id_to_fptrs[i], (void*)(i + 2));
@@ -2315,6 +2380,7 @@ static void jl_cleanup_serializer2(void)
     htable_reset(&symbol_table, 0);
     htable_reset(&fptr_to_id, 0);
     htable_reset(&backref_table, 0);
+    htable_reset(&layout_cache, 0);
     arraylist_free(&deser_sym);
 }
 
diff --git a/src/subtype.c b/src/subtype.c
index 55579f2b47305..9a5a9fdbbbfd4 100644
--- a/src/subtype.c
+++ b/src/subtype.c
@@ -1003,7 +1003,7 @@ static int subtype_tuple_tail(jl_datatype_t *xd, jl_datatype_t *yd, int8_t R, jl
 {
     size_t lx = jl_nparams(xd);
     size_t ly = jl_nparams(yd);
-    size_t i = 0, j = 0, vx = 0, vy = 0, x_reps = 1;
+    size_t i = 0, j = 0, vx = 0, vy = 0, x_reps = 0;
     jl_value_t *lastx = NULL, *lasty = NULL;
     jl_value_t *xi = NULL, *yi = NULL;
 
diff --git a/src/support/htable.h b/src/support/htable.h
index 0b5196374e2b6..4f821493beee8 100644
--- a/src/support/htable.h
+++ b/src/support/htable.h
@@ -47,13 +47,13 @@ int HTNAME##_has(htable_t *h, void *key) JL_NOTSAFEPOINT;               \
 int HTNAME##_remove(htable_t *h, void *key) JL_NOTSAFEPOINT;            \
 void **HTNAME##_bp(htable_t *h, void *key) JL_NOTSAFEPOINT;
 
-#define HTPROT_R(HTNAME)                                                \
-void *HTNAME##_get_r(htable_t *h, void *key, void *ctx);                \
-void HTNAME##_put_r(htable_t *h, void *key, void *val, void *ctx);      \
-void HTNAME##_adjoin_r(htable_t *h, void *key, void *val, void *ctx);   \
-int HTNAME##_has_r(htable_t *h, void *key, void *ctx);                  \
-int HTNAME##_remove_r(htable_t *h, void *key, void *ctx);               \
-void **HTNAME##_bp_r(htable_t *h, void *key, void *ctx);
+#define HTPROT_R(HTNAME)                                                                \
+void *HTNAME##_get_r(htable_t *h, void *key, void *ctx) JL_NOTSAFEPOINT;                \
+void HTNAME##_put_r(htable_t *h, void *key, void *val, void *ctx) JL_NOTSAFEPOINT;      \
+void HTNAME##_adjoin_r(htable_t *h, void *key, void *val, void *ctx) JL_NOTSAFEPOINT;   \
+int HTNAME##_has_r(htable_t *h, void *key, void *ctx) JL_NOTSAFEPOINT;                  \
+int HTNAME##_remove_r(htable_t *h, void *key, void *ctx) JL_NOTSAFEPOINT;               \
+void **HTNAME##_bp_r(htable_t *h, void *key, void *ctx) JL_NOTSAFEPOINT;
 
 #ifdef __cplusplus
 }
diff --git a/src/task.c b/src/task.c
index a1adb704695a7..5c7c521f89b09 100644
--- a/src/task.c
+++ b/src/task.c
@@ -331,7 +331,8 @@ JL_DLLEXPORT void *jl_task_stack_buffer(jl_task_t *task, size_t *size, int *ptid
 {
     size_t off = 0;
 #ifndef _OS_WINDOWS_
-    if (jl_all_tls_states[0]->root_task == task) {
+    jl_ptls_t ptls0 = jl_atomic_load_relaxed(&jl_all_tls_states)[0];
+    if (ptls0->root_task == task) {
         // See jl_init_root_task(). The root task of the main thread
         // has its buffer enlarged by an artificial 3000000 bytes, but
         // that means that the start of the buffer usually points to
@@ -372,7 +373,8 @@ JL_DLLEXPORT void jl_active_task_stack(jl_task_t *task,
     else if (task->stkbuf) {
         *total_start = *active_start = (char*)task->stkbuf;
 #ifndef _OS_WINDOWS_
-        if (jl_all_tls_states[0]->root_task == task) {
+        jl_ptls_t ptls0 = jl_atomic_load_relaxed(&jl_all_tls_states)[0];
+        if (ptls0->root_task == task) {
             // See jl_init_root_task(). The root task of the main thread
             // has its buffer enlarged by an artificial 3000000 bytes, but
             // that means that the start of the buffer usually points to
diff --git a/src/threading.c b/src/threading.c
index 2cebdb22fc0aa..581032092168c 100644
--- a/src/threading.c
+++ b/src/threading.c
@@ -46,12 +46,16 @@ JL_DLLEXPORT void *jl_get_ptls_states(void)
     return jl_current_task->ptls;
 }
 
+static void jl_delete_thread(void*);
+
 #if !defined(_OS_WINDOWS_)
+static pthread_key_t jl_task_exit_key;
 static pthread_key_t jl_safe_restore_key;
 
 __attribute__((constructor)) void _jl_init_safe_restore(void)
 {
     pthread_key_create(&jl_safe_restore_key, NULL);
+    pthread_key_create(&jl_task_exit_key, jl_delete_thread);
 }
 
 JL_DLLEXPORT jl_jmp_buf *jl_get_safe_restore(void)
@@ -124,21 +128,26 @@ static DWORD jl_safe_restore_key;
 BOOLEAN WINAPI DllMain(IN HINSTANCE hDllHandle, IN DWORD nReason,
                        IN LPVOID Reserved)
 {
+    jl_task_t *ct;
     switch (nReason) {
     case DLL_PROCESS_ATTACH:
         jl_pgcstack_key = TlsAlloc();
         assert(jl_pgcstack_key != TLS_OUT_OF_INDEXES);
         jl_safe_restore_key = TlsAlloc();
         assert(jl_safe_restore_key != TLS_OUT_OF_INDEXES);
-        // Fall through
-    case DLL_THREAD_ATTACH:
-        break;
-    case DLL_THREAD_DETACH:
         break;
     case DLL_PROCESS_DETACH:
         TlsFree(jl_pgcstack_key);
         TlsFree(jl_safe_restore_key);
         break;
+    case DLL_THREAD_ATTACH:
+        // will call jl_adopt_thread lazily on-demand
+        break;
+    case DLL_THREAD_DETACH:
+        ct = jl_get_current_task();
+        if (ct != NULL)
+            jl_delete_thread((void*)ct->ptls);
+        break;
     }
     return 1; // success
 }
@@ -291,7 +300,8 @@ void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k)
 #endif
 
 static uv_mutex_t tls_lock; // controls write-access to these variables:
-jl_ptls_t *jl_all_tls_states JL_GLOBALLY_ROOTED;
+_Atomic(jl_ptls_t*) jl_all_tls_states JL_GLOBALLY_ROOTED;
+int jl_all_tls_states_size;
 static uv_cond_t cond;
 
 // return calling thread's ID
@@ -302,7 +312,8 @@ JL_DLLEXPORT int16_t jl_threadid(void)
 
 JL_DLLEXPORT int8_t jl_threadpoolid(int16_t tid) JL_NOTSAFEPOINT
 {
-    if (tid < 0 || tid >= jl_n_threads)
+    int nthreads = jl_atomic_load_acquire(&jl_n_threads);
+    if (tid < 0 || tid >= nthreads)
         jl_error("invalid tid");
     int n = 0;
     for (int i = 0; i < jl_n_threadpools; i++) {
@@ -310,14 +321,25 @@ JL_DLLEXPORT int8_t jl_threadpoolid(int16_t tid) JL_NOTSAFEPOINT
         if (tid < n)
             return (int8_t)i;
     }
-    jl_error("internal error: couldn't determine threadpool id");
+    return 0; // everything else uses threadpool 0 (though does not become part of any threadpool)
 }
 
 jl_ptls_t jl_init_threadtls(int16_t tid)
 {
+#ifndef _OS_WINDOWS_
+    if (pthread_getspecific(jl_task_exit_key))
+        abort();
+#endif
+    if (jl_get_pgcstack() != NULL)
+        abort();
     jl_ptls_t ptls = (jl_ptls_t)calloc(1, sizeof(jl_tls_states_t));
+#ifndef _OS_WINDOWS_
+    pthread_setspecific(jl_task_exit_key, (void*)ptls);
+#endif
     ptls->system_id = (jl_thread_t)(uintptr_t)uv_thread_self();
     ptls->rngseed = jl_rand();
+    if (tid == 0)
+        ptls->disable_gc = 1;
 #ifdef _OS_WINDOWS_
     if (tid == 0) {
         if (!DuplicateHandle(GetCurrentProcess(), GetCurrentThread(),
@@ -328,7 +350,6 @@ jl_ptls_t jl_init_threadtls(int16_t tid)
         }
     }
 #endif
-    ptls->tid = tid;
     jl_atomic_store_relaxed(&ptls->gc_state, 0); // GC unsafe
     // Conditionally initialize the safepoint address. See comment in
     // `safepoint.c`
@@ -349,11 +370,80 @@ jl_ptls_t jl_init_threadtls(int16_t tid)
     uv_mutex_init(&ptls->sleep_lock);
     uv_cond_init(&ptls->wake_signal);
 
-    jl_all_tls_states[tid] = ptls;
+    uv_mutex_lock(&tls_lock);
+    jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
+    if (tid == -1)
+        tid = jl_atomic_load_relaxed(&jl_n_threads);
+    ptls->tid = tid;
+    if (jl_all_tls_states_size <= tid) {
+        int i, newsize = jl_all_tls_states_size + tid + 2;
+        jl_ptls_t *newpptls = (jl_ptls_t*)calloc(newsize, sizeof(jl_ptls_t));
+        for (i = 0; i < jl_all_tls_states_size; i++) {
+            newpptls[i] = allstates[i];
+        }
+        jl_atomic_store_release(&jl_all_tls_states, newpptls);
+        jl_all_tls_states_size = newsize;
+        jl_gc_add_quiescent(ptls, (void**)allstates, free);
+        allstates = newpptls;
+    }
+    allstates[tid] = ptls;
+    if (jl_atomic_load_relaxed(&jl_n_threads) < tid + 1)
+        jl_atomic_store_release(&jl_n_threads, tid + 1);
+    jl_fence();
+    uv_mutex_unlock(&tls_lock);
 
     return ptls;
 }
 
+JL_DLLEXPORT jl_gcframe_t **jl_adopt_thread(void)
+{
+    // initialize this thread (assign tid, create heap, set up root task)
+    jl_ptls_t ptls = jl_init_threadtls(-1);
+    void *stack_lo, *stack_hi;
+    jl_init_stack_limits(0, &stack_lo, &stack_hi);
+
+    (void)jl_gc_unsafe_enter(ptls);
+    // warning: this changes `jl_current_task`, so be careful not to call that from this function
+    jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi);
+    JL_GC_PROMISE_ROOTED(ct);
+
+    return &ct->gcstack;
+}
+
+static void jl_delete_thread(void *value)
+{
+    jl_ptls_t ptls = (jl_ptls_t)value;
+    // Acquire the profile write lock, to ensure we are not racing with the `kill`
+    // call in the profile code which will also try to look at these variables.
+    // We have no control over when the user calls pthread_join, so we must do
+    // this here by blocking. This also synchronizes our read of `current_task`
+    // (which is the flag we currently use to check the liveness state of a thread).
+#ifdef _OS_WINDOWS_
+    jl_lock_profile_wr();
+#elif defined(JL_DISABLE_LIBUNWIND)
+    // nothing
+#elif defined(__APPLE__)
+    jl_lock_profile_wr();
+#else
+    pthread_mutex_lock(&in_signal_lock);
+#endif
+#ifndef _OS_WINDOWS_
+    pthread_setspecific(jl_task_exit_key, NULL);
+#endif
+    jl_atomic_store_relaxed(&ptls->current_task, NULL); // dead
+    jl_atomic_store_relaxed(&ptls->sleep_check_state, 2); // dead, interpreted as sleeping and unwakeable
+#ifdef _OS_WINDOWS_
+    jl_unlock_profile_wr();
+#elif defined(JL_DISABLE_LIBUNWIND)
+    // nothing
+#elif defined(__APPLE__)
+    jl_unlock_profile_wr();
+#else
+    pthread_mutex_unlock(&in_signal_lock);
+#endif
+    (void)jl_gc_safe_enter(ptls);
+}
+
 JL_DLLEXPORT jl_mutex_t jl_codegen_lock;
 jl_mutex_t typecache_lock;
 
@@ -467,7 +557,6 @@ void jl_init_threading(void)
 
     uv_mutex_init(&tls_lock);
     uv_cond_init(&cond);
-
 #ifdef JL_ELF_TLS_VARIANT
     jl_check_tls();
 #endif
@@ -477,8 +566,8 @@ void jl_init_threading(void)
     // environment variable. Set the globals `jl_n_threadpools`, `jl_n_threads`
     // and `jl_n_threads_per_pool`.
     jl_n_threadpools = 1;
-    jl_n_threads = JULIA_NUM_THREADS;
-    int16_t nthreads = jl_n_threads, nthreadsi = 0;
+    int16_t nthreads = JULIA_NUM_THREADS;
+    int16_t nthreadsi = 0;
     char *endptr, *endptri;
 
     if (jl_options.nthreads != 0) { // --threads specified
@@ -516,26 +605,26 @@ void jl_init_threading(void)
         }
     }
 
-    jl_n_threads = nthreads + nthreadsi;
-    jl_n_threads_per_pool = (int *)malloc(2 * sizeof(int));
+    jl_all_tls_states_size = nthreads + nthreadsi;
+    jl_n_threads_per_pool = (int*)malloc_s(2 * sizeof(int));
     jl_n_threads_per_pool[0] = nthreads;
     jl_n_threads_per_pool[1] = nthreadsi;
 
-#ifndef __clang_gcanalyzer__
-    jl_all_tls_states = (jl_ptls_t*)calloc(jl_n_threads, sizeof(void*));
-#endif
+    jl_atomic_store_release(&jl_all_tls_states, (jl_ptls_t*)calloc(jl_all_tls_states_size, sizeof(jl_ptls_t)));
+    jl_atomic_store_release(&jl_n_threads, jl_all_tls_states_size);
 }
 
 static uv_barrier_t thread_init_done;
 
 void jl_start_threads(void)
 {
+    int nthreads = jl_atomic_load_relaxed(&jl_n_threads);
     int cpumasksize = uv_cpumask_size();
     char *cp;
     int i, exclusive;
     uv_thread_t uvtid;
-    if (cpumasksize < jl_n_threads) // also handles error case
-        cpumasksize = jl_n_threads;
+    if (cpumasksize < nthreads) // also handles error case
+        cpumasksize = nthreads;
     char *mask = (char*)alloca(cpumasksize);
 
     // do we have exclusive use of the machine? default is no
@@ -548,7 +637,7 @@ void jl_start_threads(void)
     // according to a 'compact' policy
     // non-exclusive: no affinity settings; let the kernel move threads about
     if (exclusive) {
-        if (jl_n_threads > jl_cpu_threads()) {
+        if (nthreads > jl_cpu_threads()) {
             jl_printf(JL_STDERR, "ERROR: Too many threads requested for %s option.\n", MACHINE_EXCLUSIVE_NAME);
             exit(1);
         }
@@ -559,9 +648,6 @@ void jl_start_threads(void)
         mask[0] = 0;
     }
 
-    // The analyzer doesn't know jl_n_threads doesn't change, help it
-    size_t nthreads = jl_n_threads;
-
     // create threads
     uv_barrier_init(&thread_init_done, nthreads);
 
diff --git a/src/threading.h b/src/threading.h
index 4c6f1e19881f5..9fd63f0fd188d 100644
--- a/src/threading.h
+++ b/src/threading.h
@@ -12,7 +12,7 @@ extern "C" {
 
 #define PROFILE_JL_THREADING            0
 
-extern jl_ptls_t *jl_all_tls_states JL_GLOBALLY_ROOTED; /* thread local storage */
+extern _Atomic(jl_ptls_t*) jl_all_tls_states JL_GLOBALLY_ROOTED; /* thread local storage */
 
 typedef struct _jl_threadarg_t {
     int16_t tid;
diff --git a/src/typemap.c b/src/typemap.c
index cbabbe361daa5..7374c9d7c3cc5 100644
--- a/src/typemap.c
+++ b/src/typemap.c
@@ -290,7 +290,6 @@ static jl_typemap_t *mtcache_hash_lookup(jl_array_t *cache JL_PROPAGATES_ROOT, j
     if (cache == (jl_array_t*)jl_an_empty_vec_any)
         return (jl_typemap_t*)jl_nothing;
     jl_typemap_t *ml = (jl_typemap_t*)jl_eqtable_get(cache, ty, jl_nothing);
-    JL_GC_PROMISE_ROOTED(ml); // clang-sa doesn't trust our JL_PROPAGATES_ROOT claim
     return ml;
 }
 
diff --git a/stdlib/Artifacts/src/Artifacts.jl b/stdlib/Artifacts/src/Artifacts.jl
index 3f1574db4c4a6..4bcf98df2a1d9 100644
--- a/stdlib/Artifacts/src/Artifacts.jl
+++ b/stdlib/Artifacts/src/Artifacts.jl
@@ -242,7 +242,7 @@ end
 """
     artifact_exists(hash::SHA1; honor_overrides::Bool=true)
 
-Returns whether or not the given artifact (identified by its sha1 git tree hash) exists
+Return whether or not the given artifact (identified by its sha1 git tree hash) exists
 on-disk.  Note that it is possible that the given artifact exists in multiple locations
 (e.g. within multiple depots).
 
@@ -455,7 +455,7 @@ end
                                   include_lazy = false,
                                   pkg_uuid = nothing)
 
-Returns a dictionary where every entry is an artifact from the given `Artifacts.toml`
+Return a dictionary where every entry is an artifact from the given `Artifacts.toml`
 that should be downloaded for the requested platform.  Lazy artifacts are included if
 `include_lazy` is set.
 """
@@ -611,7 +611,7 @@ end
     artifact_slash_lookup(name::String, atifact_dict::Dict,
                           artifacts_toml::String, platform::Platform)
 
-Returns `artifact_name`, `artifact_path_tail`, and `hash` by looking the results up in
+Return `artifact_name`, `artifact_path_tail`, and `hash` by looking the results up in
 the given `artifacts_toml`, first extracting the name and path tail from the given `name`
 to support slash-indexing within the given artifact.
 """
diff --git a/stdlib/Base64/src/Base64.jl b/stdlib/Base64/src/Base64.jl
index 108faa18f5b85..f1fef096888ed 100644
--- a/stdlib/Base64/src/Base64.jl
+++ b/stdlib/Base64/src/Base64.jl
@@ -33,7 +33,7 @@ include("decode.jl")
 """
     stringmime(mime, x; context=nothing)
 
-Returns an `AbstractString` containing the representation of `x` in the
+Return an `AbstractString` containing the representation of `x` in the
 requested `mime` type. This is similar to [`repr(mime, x)`](@ref) except
 that binary data is base64-encoded as an ASCII string.
 
diff --git a/stdlib/CompilerSupportLibraries_jll/Project.toml b/stdlib/CompilerSupportLibraries_jll/Project.toml
index 877a1ab5b005c..3f134b053268e 100644
--- a/stdlib/CompilerSupportLibraries_jll/Project.toml
+++ b/stdlib/CompilerSupportLibraries_jll/Project.toml
@@ -4,7 +4,7 @@ uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
 # NOTE: When updating this, also make sure to update the value
 # `CSL_NEXT_GLIBCXX_VERSION` in `deps/csl.mk`, to properly disable
 # automatic usage of BB-built CSLs on extremely up-to-date systems!
-version = "0.5.2+0"
+version = "0.5.3+0"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/Dates/src/periods.jl b/stdlib/Dates/src/periods.jl
index 7eb71ff2905cf..9b7e29496e642 100644
--- a/stdlib/Dates/src/periods.jl
+++ b/stdlib/Dates/src/periods.jl
@@ -58,7 +58,7 @@ Base.isfinite(::Union{Type{P}, P}) where {P<:Period} = true
 """
     default(p::Period) -> Period
 
-Returns a sensible "default" value for the input Period by returning `T(1)` for Year,
+Return a sensible "default" value for the input Period by returning `T(1)` for Year,
 Month, and Day, and `T(0)` for Hour, Minute, Second, and Millisecond.
 """
 function default end
diff --git a/stdlib/Distributed/test/distributed_exec.jl b/stdlib/Distributed/test/distributed_exec.jl
index 9dffbe0e41994..c2c6efaa6f7e1 100644
--- a/stdlib/Distributed/test/distributed_exec.jl
+++ b/stdlib/Distributed/test/distributed_exec.jl
@@ -153,12 +153,12 @@ function _getenv_include_thread_unsafe()
 end
 const _env_include_thread_unsafe = _getenv_include_thread_unsafe()
 function include_thread_unsafe_tests()
-    if Threads.nthreads() > 1
+    if Threads.maxthreadid() > 1
         if _env_include_thread_unsafe
             return true
         end
-        msg = "Skipping a thread-unsafe test because `Threads.nthreads() > 1`"
-        @warn msg Threads.nthreads()
+        msg = "Skipping a thread-unsafe test because `Threads.maxthreadid() > 1`"
+        @warn msg Threads.maxthreadid()
         Test.@test_broken false
         return false
     end
diff --git a/stdlib/Downloads.version b/stdlib/Downloads.version
index eaeef9bdc1192..8ec2124c9e06d 100644
--- a/stdlib/Downloads.version
+++ b/stdlib/Downloads.version
@@ -1,4 +1,4 @@
 DOWNLOADS_BRANCH = master
-DOWNLOADS_SHA1 = 0733701b0e21df6ae61a6b2fc8cec60ff1fd28dc
+DOWNLOADS_SHA1 = 11b6bb73bff32cec1b1e3bf064420cad1335400b
 DOWNLOADS_GIT_URL := https://github.com/JuliaLang/Downloads.jl.git
 DOWNLOADS_TAR_URL = https://api.github.com/repos/JuliaLang/Downloads.jl/tarball/$1
diff --git a/stdlib/InteractiveUtils/src/InteractiveUtils.jl b/stdlib/InteractiveUtils/src/InteractiveUtils.jl
index 4621ed07ed124..4d43ca113b0e1 100644
--- a/stdlib/InteractiveUtils/src/InteractiveUtils.jl
+++ b/stdlib/InteractiveUtils/src/InteractiveUtils.jl
@@ -141,7 +141,7 @@ function versioninfo(io::IO=stdout; verbose::Bool=false)
     println(io, "  WORD_SIZE: ", Sys.WORD_SIZE)
     println(io, "  LIBM: ",Base.libm_name)
     println(io, "  LLVM: libLLVM-",Base.libllvm_version," (", Sys.JIT, ", ", Sys.CPU_NAME, ")")
-    println(io, "  Threads: ", Threads.nthreads(), " on ", Sys.CPU_THREADS, " virtual cores")
+    println(io, "  Threads: ", Threads.maxthreadid(), " on ", Sys.CPU_THREADS, " virtual cores")
 
     function is_nonverbose_env(k::String)
         return occursin(r"^JULIA_|^DYLD_|^LD_", k)
@@ -183,7 +183,7 @@ The optional second argument restricts the search to a particular module or func
 If keyword `supertypes` is `true`, also return arguments with a parent type of `typ`,
 excluding type `Any`.
 """
-function methodswith(t::Type, f::Base.Callable, meths = Method[]; supertypes::Bool=false)
+function methodswith(@nospecialize(t::Type), @nospecialize(f::Base.Callable), meths = Method[]; supertypes::Bool=false)
     for d in methods(f)
         if any(function (x)
                    let x = rewrap_unionall(x, d.sig)
@@ -200,7 +200,7 @@ function methodswith(t::Type, f::Base.Callable, meths = Method[]; supertypes::Bo
     return meths
 end
 
-function _methodswith(t::Type, m::Module, supertypes::Bool)
+function _methodswith(@nospecialize(t::Type), m::Module, supertypes::Bool)
     meths = Method[]
     for nm in names(m)
         if isdefined(m, nm)
@@ -213,9 +213,9 @@ function _methodswith(t::Type, m::Module, supertypes::Bool)
     return unique(meths)
 end
 
-methodswith(t::Type, m::Module; supertypes::Bool=false) = _methodswith(t, m, supertypes)
+methodswith(@nospecialize(t::Type), m::Module; supertypes::Bool=false) = _methodswith(t, m, supertypes)
 
-function methodswith(t::Type; supertypes::Bool=false)
+function methodswith(@nospecialize(t::Type); supertypes::Bool=false)
     meths = Method[]
     for mod in Base.loaded_modules_array()
         append!(meths, _methodswith(t, mod, supertypes))
diff --git a/stdlib/InteractiveUtils/src/clipboard.jl b/stdlib/InteractiveUtils/src/clipboard.jl
index ee4548315c6ce..adf676cb8c55a 100644
--- a/stdlib/InteractiveUtils/src/clipboard.jl
+++ b/stdlib/InteractiveUtils/src/clipboard.jl
@@ -103,7 +103,7 @@ elseif Sys.iswindows()
         ccall(:memcpy, Ptr{UInt16}, (Ptr{UInt16}, Ptr{UInt16}, Csize_t), plock, x_u16, sizeof(x_u16))
         unlock = ccall((:GlobalUnlock, "kernel32"), stdcall, Cint, (Ptr{UInt16},), pdata)
         (unlock == 0 && Libc.GetLastError() == 0) || return cleanup(:GlobalUnlock) # this should never fail
-        pset = ccall((:SetClipboardData, "user32"), stdcall, Ptr{UInt16}, (Cuint, Ptr{UInt16}), 13, pdata)
+        pset = ccall((:SetClipboardData, "user32"), stdcall, Ptr{UInt16}, (Cuint, Ptr{UInt16}), 13, pdata) # CF_UNICODETEXT
         pdata != pset && return cleanup(:SetClipboardData)
         cleanup(:success)
     end
@@ -114,14 +114,14 @@ elseif Sys.iswindows()
             if cause !== :OpenClipboard
                 ccall((:CloseClipboard, "user32"), stdcall, Cint, ()) == 0 && Base.windowserror(:CloseClipboard) # this should never fail
             end
-            if cause !== :success && (cause !== :GetClipboardData || errno != 0)
+            if cause !== :success && !(cause === :GetClipboardData && (errno == 0x8004006A || errno == 0x800401D3)) # ignore DV_E_CLIPFORMAT and CLIPBRD_E_BAD_DATA from GetClipboardData
                 Base.windowserror(cause, errno)
             end
             ""
         end
         ccall((:OpenClipboard, "user32"), stdcall, Cint, (Ptr{Cvoid},), C_NULL) == 0 && return Base.windowserror(:OpenClipboard)
         ccall(:SetLastError, stdcall, Cvoid, (UInt32,), 0) # allow distinguishing if the clipboard simply didn't have text
-        pdata = ccall((:GetClipboardData, "user32"), stdcall, Ptr{UInt16}, (Cuint,), 13)
+        pdata = ccall((:GetClipboardData, "user32"), stdcall, Ptr{UInt16}, (Cuint,), 13) # CF_UNICODETEXT
         pdata == C_NULL && return cleanup(:GetClipboardData)
         plock = ccall((:GlobalLock, "kernel32"), stdcall, Ptr{UInt16}, (Ptr{UInt16},), pdata)
         plock == C_NULL && return cleanup(:GlobalLock)
diff --git a/stdlib/InteractiveUtils/src/editless.jl b/stdlib/InteractiveUtils/src/editless.jl
index 6fcc9e9423822..539e9b12f4071 100644
--- a/stdlib/InteractiveUtils/src/editless.jl
+++ b/stdlib/InteractiveUtils/src/editless.jl
@@ -65,6 +65,7 @@ already work:
 - nano
 - micro
 - kak
+- helix
 - textmate
 - mate
 - kate
@@ -123,8 +124,10 @@ function define_default_editors()
         `$cmd $path`
     end
     # vim family
-    for (editors, wait) in  [[Any["vim", "vi", "nvim", "mvim"], true],
-                             [Any["\bgvim"],                    false]]
+    for (editors, wait) in [
+        [["vim", "vi", "nvim", "mvim"], true],
+        [[r"\bgvim"], false],
+    ]
         define_editor(editors; wait) do cmd, path, line, column
             cmd = line == 0 ? `$cmd $path` :
                 column == 0 ? `$cmd +$line $path` :
@@ -134,24 +137,31 @@ function define_default_editors()
     define_editor("nano"; wait=true) do cmd, path, line, column
         cmd = `$cmd +$line,$column $path`
     end
-    # emacs (must check that emacs not running in -t/-nw before regex match for general emacs)
-    for (editors, wait) in [[Any[r"\bemacs"],                                                                           false],
-                            [Any[r"\bemacs\b.*\s(-nw|--no-window-system)\b", r"\bemacsclient\b.\s*-(-?nw|t|-?tty)\b"], true]]
+    # emacs (must check that emacs not running in -t/-nw
+    # before regex match for general emacs)
+    for (editors, wait) in [
+        [[r"\bemacs"], false],
+        [[r"\bemacs\b.*\s(-nw|--no-window-system)\b",
+          r"\bemacsclient\b.\s*-(-?nw|t|-?tty)\b"], true],
+    ]
         define_editor(editors; wait) do cmd, path, line, column
             `$cmd +$line:$column $path`
         end
     end
-    # Other editors
+    # other editors
     define_editor("gedit") do cmd, path, line, column
         `$cmd +$line:$column $path`
     end
-    define_editor(Any["micro", "kak"]; wait=true) do cmd, path, line, column
+    define_editor(["micro", "kak"]; wait=true) do cmd, path, line, column
         `$cmd +$line $path`
     end
+    define_editor(["hx", "helix"]; wait=true) do cmd, path, line, column
+        `$cmd $path:$line:$column`
+    end
     define_editor(["textmate", "mate", "kate"]) do cmd, path, line, column
         `$cmd $path -l $line`
     end
-    define_editor(Any[r"\bsubl", r"\batom", "pycharm", "bbedit"]) do cmd, path, line, column
+    define_editor([r"\bsubl", r"\batom", "pycharm", "bbedit"]) do cmd, path, line, column
         `$cmd $path:$line`
     end
     define_editor(["code", "code-insiders"]) do cmd, path, line, column
diff --git a/stdlib/InteractiveUtils/src/macros.jl b/stdlib/InteractiveUtils/src/macros.jl
index 98189f62edf6f..a9c0283a6fba3 100644
--- a/stdlib/InteractiveUtils/src/macros.jl
+++ b/stdlib/InteractiveUtils/src/macros.jl
@@ -24,7 +24,7 @@ function recursive_dotcalls!(ex, args, i=1)
         end
     end
     (start, branches) = ex.head === :. ? (1, ex.args[2].args) : (2, ex.args)
-    length_branches = length(branches)::Integer
+    length_branches = length(branches)::Int
     for j in start:length_branches
         branch, i = recursive_dotcalls!(branches[j], args, i)
         branches[j] = branch
@@ -43,7 +43,7 @@ function gen_call_with_extracted_types(__module__, fcn, ex0, kws=Expr[])
             end
             i = findlast(a->(Meta.isexpr(a, :kw) || Meta.isexpr(a, :parameters)), ex0.args[1].args)
             args = copy(ex0.args[1].args)
-            insert!(args, (isnothing(i) ? 2 : i+1), ex0.args[2])
+            insert!(args, (isnothing(i) ? 2 : 1+i::Int), ex0.args[2])
             ex0 = Expr(:call, args...)
         end
         if ex0.head === :. || (ex0.head === :call && ex0.args[1] !== :.. && string(ex0.args[1])[1] == '.')
@@ -97,7 +97,7 @@ function gen_call_with_extracted_types(__module__, fcn, ex0, kws=Expr[])
             return quote
                 local arg1 = $(esc(ex0.args[1]))
                 local args, kwargs = $separate_kwargs($(map(esc, ex0.args[2:end])...))
-                $(fcn)(Core.kwfunc(arg1),
+                $(fcn)(Core.kwcall,
                        Tuple{typeof(kwargs), Core.Typeof(arg1), map(Core.Typeof, args)...};
                        $(kws...))
             end
diff --git a/stdlib/LibUnwind_jll/Project.toml b/stdlib/LibUnwind_jll/Project.toml
index df4cc9df68b28..1f5f695a26ba4 100644
--- a/stdlib/LibUnwind_jll/Project.toml
+++ b/stdlib/LibUnwind_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "LibUnwind_jll"
 uuid = "745a5e78-f969-53e9-954f-d19f2f74f4e3"
-version = "1.5.0+2"
+version = "1.5.0+4"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/LinearAlgebra/docs/src/index.md b/stdlib/LinearAlgebra/docs/src/index.md
index 3c0004757788b..a95b622480191 100644
--- a/stdlib/LinearAlgebra/docs/src/index.md
+++ b/stdlib/LinearAlgebra/docs/src/index.md
@@ -556,8 +556,8 @@ LinearAlgebra.BLAS.rot!
 LinearAlgebra.BLAS.scal!
 LinearAlgebra.BLAS.scal
 LinearAlgebra.BLAS.blascopy!
-LinearAlgebra.BLAS.axpy!
-LinearAlgebra.BLAS.axpby!
+# xAXPY!
+# xAXPBY!
 LinearAlgebra.BLAS.dot
 LinearAlgebra.BLAS.dotu
 LinearAlgebra.BLAS.dotc
diff --git a/stdlib/LinearAlgebra/src/LinearAlgebra.jl b/stdlib/LinearAlgebra/src/LinearAlgebra.jl
index d9376cbda80cc..0a0162da0b1b8 100644
--- a/stdlib/LinearAlgebra/src/LinearAlgebra.jl
+++ b/stdlib/LinearAlgebra/src/LinearAlgebra.jl
@@ -570,7 +570,8 @@ function versioninfo(io::IO=stdout)
         println(io, indent, "--> ", lib.libname, " (", interface, ")")
     end
     println(io, "Threading:")
-    println(io, indent, "Threads.nthreads() = ", Base.Threads.nthreads())
+    println(io, indent, "Threads.threadpoolsize() = ", Threads.threadpoolsize())
+    println(io, indent, "Threads.maxthreadid() = ", Base.Threads.maxthreadid())
     println(io, indent, "LinearAlgebra.BLAS.get_num_threads() = ", BLAS.get_num_threads())
     println(io, "Relevant environment variables:")
     env_var_names = [
diff --git a/stdlib/LinearAlgebra/src/bidiag.jl b/stdlib/LinearAlgebra/src/bidiag.jl
index 958466f25e1b5..9eaa1517da1e3 100644
--- a/stdlib/LinearAlgebra/src/bidiag.jl
+++ b/stdlib/LinearAlgebra/src/bidiag.jl
@@ -22,6 +22,9 @@ function Bidiagonal{T}(dv::AbstractVector, ev::AbstractVector, uplo::Union{Symbo
                convert(AbstractVector{T}, ev)::AbstractVector{T},
                uplo)
 end
+function Bidiagonal{T,V}(A::Bidiagonal) where {T,V<:AbstractVector{T}}
+    Bidiagonal{T,V}(A.dv, A.ev, A.uplo)
+end
 
 """
     Bidiagonal(dv::V, ev::V, uplo::Symbol) where V <: AbstractVector
diff --git a/stdlib/LinearAlgebra/src/blas.jl b/stdlib/LinearAlgebra/src/blas.jl
index 7547a60f390d4..6048fee0a9a69 100644
--- a/stdlib/LinearAlgebra/src/blas.jl
+++ b/stdlib/LinearAlgebra/src/blas.jl
@@ -20,8 +20,8 @@ export
     scal!,
     scal,
     blascopy!,
-    axpy!,
-    axpby!,
+    # xAXPY!,
+    # xAXPBY!,
     # xDOT
     dotc,
     dotu,
@@ -1737,14 +1737,14 @@ hemm!
 
 Rank-k update of the symmetric matrix `C` as `alpha*A*transpose(A) + beta*C` or
 `alpha*transpose(A)*A + beta*C` according to [`trans`](@ref stdlib-blas-trans).
-Only the [`uplo`](@ref stdlib-blas-uplo) triangle of `C` is used. Returns `C`.
+Only the [`uplo`](@ref stdlib-blas-uplo) triangle of `C` is used. Return `C`.
 """
 function syrk! end
 
 """
     syrk(uplo, trans, alpha, A)
 
-Returns either the upper triangle or the lower triangle of `A`,
+Return either the upper triangle or the lower triangle of `A`,
 according to [`uplo`](@ref stdlib-blas-uplo),
 of `alpha*A*transpose(A)` or `alpha*transpose(A)*A`,
 according to [`trans`](@ref stdlib-blas-trans).
@@ -1916,7 +1916,7 @@ end
 """
     syr2k(uplo, trans, A, B)
 
-Returns the [`uplo`](@ref stdlib-blas-uplo) triangle of `A*transpose(B) + B*transpose(A)`
+Return the [`uplo`](@ref stdlib-blas-uplo) triangle of `A*transpose(B) + B*transpose(A)`
 or `transpose(A)*B + transpose(B)*A`, according to [`trans`](@ref stdlib-blas-trans).
 """
 syr2k(uplo::AbstractChar, trans::AbstractChar, A::AbstractVecOrMat, B::AbstractVecOrMat) = syr2k(uplo, trans, one(eltype(A)), A, B)
@@ -1969,14 +1969,14 @@ end
 Rank-2k update of the Hermitian matrix `C` as
 `alpha*A*B' + alpha*B*A' + beta*C` or `alpha*A'*B + alpha*B'*A + beta*C`
 according to [`trans`](@ref stdlib-blas-trans). The scalar `beta` has to be real.
-Only the [`uplo`](@ref stdlib-blas-uplo) triangle of `C` is used. Returns `C`.
+Only the [`uplo`](@ref stdlib-blas-uplo) triangle of `C` is used. Return `C`.
 """
 function her2k! end
 
 """
     her2k(uplo, trans, alpha, A, B)
 
-Returns the [`uplo`](@ref stdlib-blas-uplo) triangle of `alpha*A*B' + alpha*B*A'`
+Return the [`uplo`](@ref stdlib-blas-uplo) triangle of `alpha*A*B' + alpha*B*A'`
 or `alpha*A'*B + alpha*B'*A`, according to [`trans`](@ref stdlib-blas-trans).
 """
 her2k(uplo, trans, alpha, A, B)
@@ -1984,7 +1984,7 @@ her2k(uplo, trans, alpha, A, B)
 """
     her2k(uplo, trans, A, B)
 
-Returns the [`uplo`](@ref stdlib-blas-uplo) triangle of `A*B' + B*A'`
+Return the [`uplo`](@ref stdlib-blas-uplo) triangle of `A*B' + B*A'`
 or `A'*B + B'*A`, according to [`trans`](@ref stdlib-blas-trans).
 """
 her2k(uplo, trans, A, B)
@@ -1999,14 +1999,14 @@ Update `B` as `alpha*A*B` or one of the other three variants determined by
 Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
 [`dA`](@ref stdlib-blas-diag) determines if the diagonal values are read or
 are assumed to be all ones.
-Returns the updated `B`.
+Return the updated `B`.
 """
 function trmm! end
 
 """
     trmm(side, ul, tA, dA, alpha, A, B)
 
-Returns `alpha*A*B` or one of the other three variants determined by
+Return `alpha*A*B` or one of the other three variants determined by
 [`side`](@ref stdlib-blas-side) and [`tA`](@ref stdlib-blas-trans).
 Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
 [`dA`](@ref stdlib-blas-diag) determines if the diagonal values are read or
diff --git a/stdlib/LinearAlgebra/src/cholesky.jl b/stdlib/LinearAlgebra/src/cholesky.jl
index 917c32625adb5..8e5c85ac88948 100644
--- a/stdlib/LinearAlgebra/src/cholesky.jl
+++ b/stdlib/LinearAlgebra/src/cholesky.jl
@@ -178,10 +178,8 @@ Base.iterate(C::CholeskyPivoted, ::Val{:done}) = nothing
 
 
 # make a copy that allow inplace Cholesky factorization
-@inline choltype(A) = promote_type(typeof(sqrt(oneunit(eltype(A)))), Float32)
-@inline cholcopy(A::StridedMatrix) = copymutable_oftype(A, choltype(A))
-@inline cholcopy(A::RealHermSymComplexHerm) = copymutable_oftype(A, choltype(A))
-@inline cholcopy(A::AbstractMatrix) = copy_similar(A, choltype(A))
+choltype(A) = promote_type(typeof(sqrt(oneunit(eltype(A)))), Float32)
+cholcopy(A::AbstractMatrix) = eigencopy_oftype(A, choltype(A))
 
 # _chol!. Internal methods for calling unpivoted Cholesky
 ## BLAS/LAPACK element types
diff --git a/stdlib/LinearAlgebra/src/dense.jl b/stdlib/LinearAlgebra/src/dense.jl
index bcf9443f7632c..0689eee635330 100644
--- a/stdlib/LinearAlgebra/src/dense.jl
+++ b/stdlib/LinearAlgebra/src/dense.jl
@@ -1543,7 +1543,7 @@ function nullspace(A::AbstractVecOrMat; atol::Real = 0.0, rtol::Real = (min(size
     SVD = svd(A; full=true)
     tol = max(atol, SVD.S[1]*rtol)
     indstart = sum(s -> s .> tol, SVD.S) + 1
-    return copy(SVD.Vt[indstart:end,:]')
+    return copy((@view SVD.Vt[indstart:end,:])')
 end
 
 """
diff --git a/stdlib/LinearAlgebra/src/diagonal.jl b/stdlib/LinearAlgebra/src/diagonal.jl
index 32687404752ff..3de6cd9b213fb 100644
--- a/stdlib/LinearAlgebra/src/diagonal.jl
+++ b/stdlib/LinearAlgebra/src/diagonal.jl
@@ -252,38 +252,19 @@ end
 rmul!(A::AbstractMatrix, D::Diagonal) = @inline mul!(A, A, D)
 lmul!(D::Diagonal, B::AbstractVecOrMat) = @inline mul!(B, D, B)
 
-#TODO: It seems better to call (D' * adjA')' directly?
-function *(adjA::Adjoint{<:Any,<:AbstractMatrix}, D::Diagonal)
-    A = adjA.parent
-    Ac = similar(A, promote_op(*, eltype(A), eltype(D.diag)), (size(A, 2), size(A, 1)))
-    adjoint!(Ac, A)
+function *(A::AdjOrTransAbsMat, D::Diagonal)
+    Ac = copy_similar(A, promote_op(*, eltype(A), eltype(D.diag)))
     rmul!(Ac, D)
 end
 
-function *(transA::Transpose{<:Any,<:AbstractMatrix}, D::Diagonal)
-    A = transA.parent
-    At = similar(A, promote_op(*, eltype(A), eltype(D.diag)), (size(A, 2), size(A, 1)))
-    transpose!(At, A)
-    rmul!(At, D)
-end
-
 *(D::Diagonal, adjQ::Adjoint{<:Any,<:Union{QRCompactWYQ,QRPackedQ}}) =
     rmul!(Array{promote_type(eltype(D), eltype(adjQ))}(D), adjQ)
 
-function *(D::Diagonal, adjA::Adjoint{<:Any,<:AbstractMatrix})
-    A = adjA.parent
-    Ac = similar(A, promote_op(*, eltype(A), eltype(D.diag)), (size(A, 2), size(A, 1)))
-    adjoint!(Ac, A)
+function *(D::Diagonal, A::AdjOrTransAbsMat)
+    Ac = copy_similar(A, promote_op(*, eltype(A), eltype(D.diag)))
     lmul!(D, Ac)
 end
 
-function *(D::Diagonal, transA::Transpose{<:Any,<:AbstractMatrix})
-    A = transA.parent
-    At = similar(A, promote_op(*, eltype(A), eltype(D.diag)), (size(A, 2), size(A, 1)))
-    transpose!(At, A)
-    lmul!(D, At)
-end
-
 @inline function __muldiag!(out, D::Diagonal, B, alpha, beta)
     require_one_based_indexing(B)
     require_one_based_indexing(out)
@@ -827,6 +808,9 @@ end
 dot(A::AbstractMatrix, B::Diagonal) = conj(dot(B, A))
 
 function _mapreduce_prod(f, x, D::Diagonal, y)
+    if !(length(x) == length(D.diag) == length(y))
+        throw(DimensionMismatch("x has length $(length(x)), D has size $(size(D)), and y has $(length(y))"))
+    end
     if isempty(x) && isempty(D) && isempty(y)
         return zero(promote_op(f, eltype(x), eltype(D), eltype(y)))
     else
@@ -853,8 +837,8 @@ end
 
 inv(C::Cholesky{<:Any,<:Diagonal}) = Diagonal(map(inv∘abs2, C.factors.diag))
 
-@inline cholcopy(A::Diagonal) = copymutable_oftype(A, choltype(A))
-@inline cholcopy(A::RealHermSymComplexHerm{<:Real,<:Diagonal}) = copymutable_oftype(A, choltype(A))
+cholcopy(A::Diagonal) = copymutable_oftype(A, choltype(A))
+cholcopy(A::RealHermSymComplexHerm{<:Any,<:Diagonal}) = Diagonal(copy_similar(diag(A), choltype(A)))
 
 function getproperty(C::Cholesky{<:Any,<:Diagonal}, d::Symbol)
     Cfactors = getfield(C, :factors)
diff --git a/stdlib/LinearAlgebra/src/generic.jl b/stdlib/LinearAlgebra/src/generic.jl
index d4d33f1320ea6..7d472856b3ac8 100644
--- a/stdlib/LinearAlgebra/src/generic.jl
+++ b/stdlib/LinearAlgebra/src/generic.jl
@@ -367,7 +367,7 @@ tril(M::AbstractMatrix) = tril!(copy(M))
 """
     triu(M, k::Integer)
 
-Returns the upper triangle of `M` starting from the `k`th superdiagonal.
+Return the upper triangle of `M` starting from the `k`th superdiagonal.
 
 # Examples
 ```jldoctest
@@ -398,7 +398,7 @@ triu(M::AbstractMatrix,k::Integer) = triu!(copy(M),k)
 """
     tril(M, k::Integer)
 
-Returns the lower triangle of `M` starting from the `k`th superdiagonal.
+Return the lower triangle of `M` starting from the `k`th superdiagonal.
 
 # Examples
 ```jldoctest
@@ -1419,9 +1419,7 @@ isdiag(x::Number) = true
     axpy!(α, x::AbstractArray, y::AbstractArray)
 
 Overwrite `y` with `x * α + y` and return `y`.
-If `x` and `y` have the same axes, it's equivalent with `y .+= x .* a`
-
-See also [`BLAS.axpy!`](@ref)
+If `x` and `y` have the same axes, it's equivalent with `y .+= x .* a`.
 
 # Examples
 ```jldoctest
@@ -1465,9 +1463,7 @@ end
     axpby!(α, x::AbstractArray, β, y::AbstractArray)
 
 Overwrite `y` with `x * α + y * β` and return `y`.
-If `x` and `y` have the same axes, it's equivalent with `y .= x .* a .+ y .* β`
-
-See also [`BLAS.axpby!`](@ref)
+If `x` and `y` have the same axes, it's equivalent with `y .= x .* a .+ y .* β`.
 
 # Examples
 ```jldoctest
diff --git a/stdlib/LinearAlgebra/src/givens.jl b/stdlib/LinearAlgebra/src/givens.jl
index 6074bb1ed3b94..c37df41f9567c 100644
--- a/stdlib/LinearAlgebra/src/givens.jl
+++ b/stdlib/LinearAlgebra/src/givens.jl
@@ -9,21 +9,16 @@ end
 
 transpose(R::AbstractRotation) = error("transpose not implemented for $(typeof(R)). Consider using adjoint instead of transpose.")
 
-function (*)(R::AbstractRotation{T}, A::AbstractVecOrMat{S}) where {T,S}
+(*)(R::AbstractRotation, A::AbstractVector) = _rot_mul_vecormat(R, A)
+(*)(R::AbstractRotation, A::AbstractMatrix) = _rot_mul_vecormat(R, A)
+function _rot_mul_vecormat(R::AbstractRotation{T}, A::AbstractVecOrMat{S}) where {T,S}
     TS = typeof(zero(T)*zero(S) + zero(T)*zero(S))
     lmul!(convert(AbstractRotation{TS}, R), copy_similar(A, TS))
 end
-function (*)(adjR::AdjointRotation{T}, A::AbstractVecOrMat{S}) where {T,S}
-    TS = typeof(zero(T)*zero(S) + zero(T)*zero(S))
-    lmul!(convert(AbstractRotation{TS}, adjR.R)', copy_similar(A, TS))
-end
-(*)(A::AbstractVector, adjR::AdjointRotation) = _absvecormat_mul_adjrot(A, adjR)
-(*)(A::AbstractMatrix, adjR::AdjointRotation) = _absvecormat_mul_adjrot(A, adjR)
-function _absvecormat_mul_adjrot(A::AbstractVecOrMat{T}, adjR::AdjointRotation{S}) where {T,S}
-    TS = typeof(zero(T)*zero(S) + zero(T)*zero(S))
-    rmul!(copy_similar(A, TS), convert(AbstractRotation{TS}, adjR.R)')
-end
-function(*)(A::AbstractMatrix{T}, R::AbstractRotation{S}) where {T,S}
+
+(*)(A::AbstractVector, R::AbstractRotation) = _vecormat_mul_rot(A, R)
+(*)(A::AbstractMatrix, R::AbstractRotation) = _vecormat_mul_rot(A, R)
+function _vecormat_mul_rot(A::AbstractVecOrMat{T}, R::AbstractRotation{S}) where {T,S}
     TS = typeof(zero(T)*zero(S) + zero(T)*zero(S))
     rmul!(copy_similar(A, TS), convert(AbstractRotation{TS}, R))
 end
@@ -51,6 +46,8 @@ end
 
 convert(::Type{T}, r::T) where {T<:AbstractRotation} = r
 convert(::Type{T}, r::AbstractRotation) where {T<:AbstractRotation} = T(r)::T
+convert(::Type{AbstractRotation{T}}, r::AdjointRotation) where {T} = convert(AbstractRotation{T}, r.R)'
+convert(::Type{AbstractRotation{T}}, r::AdjointRotation{T}) where {T} = r
 
 Givens(i1, i2, c, s) = Givens(i1, i2, promote(c, s)...)
 Givens{T}(G::Givens{T}) where {T} = G
@@ -316,7 +313,7 @@ B[i2,j] = 0
 See also [`LinearAlgebra.Givens`](@ref).
 """
 givens(A::AbstractMatrix, i1::Integer, i2::Integer, j::Integer) =
-    givens(A[i1,j], A[i2,j],i1,i2)
+    givens(A[i1,j], A[i2,j], i1, i2)
 
 
 """
@@ -383,30 +380,49 @@ function lmul!(G::Givens, R::Rotation)
     push!(R.rotations, G)
     return R
 end
-function lmul!(R::Rotation, A::AbstractMatrix)
-    @inbounds for i = 1:length(R.rotations)
+function rmul!(R::Rotation, G::Givens)
+    pushfirst!(R.rotations, G)
+    return R
+end
+
+function lmul!(R::Rotation, A::AbstractVecOrMat)
+    @inbounds for i in eachindex(R.rotations)
         lmul!(R.rotations[i], A)
     end
     return A
 end
 function rmul!(A::AbstractMatrix, R::Rotation)
-    @inbounds for i = 1:length(R.rotations)
+    @inbounds for i in eachindex(R.rotations)
         rmul!(A, R.rotations[i])
     end
     return A
 end
-function lmul!(adjR::AdjointRotation{<:Any,<:Rotation}, A::AbstractMatrix)
+
+function lmul!(adjR::AdjointRotation{<:Any,<:Rotation}, A::AbstractVecOrMat)
     R = adjR.R
-    @inbounds for i = 1:length(R.rotations)
+    @inbounds for i in eachindex(R.rotations)
         lmul!(adjoint(R.rotations[i]), A)
     end
     return A
 end
 function rmul!(A::AbstractMatrix, adjR::AdjointRotation{<:Any,<:Rotation})
     R = adjR.R
-    @inbounds for i = 1:length(R.rotations)
+    @inbounds for i in eachindex(R.rotations)
         rmul!(A, adjoint(R.rotations[i]))
     end
     return A
 end
-*(G1::Givens{T}, G2::Givens{T}) where {T} = Rotation([G2, G1])
+
+function *(G1::Givens{S}, G2::Givens{T}) where {S,T}
+    TS = promote_type(T, S)
+    Rotation{TS}([convert(AbstractRotation{TS}, G2), convert(AbstractRotation{TS}, G1)])
+end
+*(G::Givens{T}...) where {T} = Rotation([reverse(G)...])
+function *(G::Givens{S}, R::Rotation{T}) where {S,T}
+    TS = promote_type(T, S)
+    Rotation(vcat(convert(AbstractRotation{TS}, R).rotations, convert(AbstractRotation{TS}, G)))
+end
+function *(R::Rotation{S}, G::Givens{T}) where {S,T}
+    TS = promote_type(T, S)
+    Rotation(vcat(convert(AbstractRotation{TS}, G), convert(AbstractRotation{TS}, R).rotations))
+end
diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl
index a95a73dfc8819..d0013aa553929 100644
--- a/stdlib/LinearAlgebra/src/hessenberg.jl
+++ b/stdlib/LinearAlgebra/src/hessenberg.jl
@@ -502,7 +502,7 @@ true
 ```
 """
 hessenberg(A::AbstractMatrix{T}) where T =
-    hessenberg!(copymutable_oftype(A, eigtype(T)))
+    hessenberg!(eigencopy_oftype(A, eigtype(T)))
 
 function show(io::IO, mime::MIME"text/plain", F::Hessenberg)
     summary(io, F)
diff --git a/stdlib/LinearAlgebra/src/lapack.jl b/stdlib/LinearAlgebra/src/lapack.jl
index 4ff2035c85f55..3b912fa6adedb 100644
--- a/stdlib/LinearAlgebra/src/lapack.jl
+++ b/stdlib/LinearAlgebra/src/lapack.jl
@@ -550,6 +550,7 @@ for (gebrd, gelqf, geqlf, geqrf, geqp3, geqrt, geqrt3, gerqf, getrf, elty, relty
         #       DOUBLE PRECISION   A( LDA, * )
         function getrf!(A::AbstractMatrix{$elty})
             require_one_based_indexing(A)
+            chkfinite(A)
             chkstride1(A)
             m, n = size(A)
             lda  = max(1,stride(A, 2))
@@ -5180,6 +5181,7 @@ for (syev, syevr, sygvd, elty, relty) in
         #       COMPLEX*16         A( LDA, * ), WORK( * )
         function syev!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty})
             chkstride1(A)
+            chkuplofinite(A, uplo)
             n = checksquare(A)
             W     = similar(A, $relty, n)
             work  = Vector{$elty}(undef, 1)
@@ -5218,6 +5220,7 @@ for (syev, syevr, sygvd, elty, relty) in
         function syevr!(jobz::AbstractChar, range::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty},
                         vl::AbstractFloat, vu::AbstractFloat, il::Integer, iu::Integer, abstol::AbstractFloat)
             chkstride1(A)
+            chkuplofinite(A, uplo)
             n = checksquare(A)
             if range == 'I' && !(1 <= il <= iu <= n)
                 throw(ArgumentError("illegal choice of eigenvalue indices (il = $il, iu=$iu), which must be between 1 and n = $n"))
@@ -5286,6 +5289,8 @@ for (syev, syevr, sygvd, elty, relty) in
         #       COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * )
         function sygvd!(itype::Integer, jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
             chkstride1(A, B)
+            chkuplofinite(A, uplo)
+            chkuplofinite(B, uplo)
             n, m = checksquare(A, B)
             if n != m
                 throw(DimensionMismatch("dimensions of A, ($n,$n), and B, ($m,$m), must match"))
diff --git a/stdlib/LinearAlgebra/src/lq.jl b/stdlib/LinearAlgebra/src/lq.jl
index 52d4f944f682f..81c34447402d7 100644
--- a/stdlib/LinearAlgebra/src/lq.jl
+++ b/stdlib/LinearAlgebra/src/lq.jl
@@ -120,7 +120,7 @@ julia> l == S.L &&  q == S.Q
 true
 ```
 """
-lq(A::AbstractMatrix{T}) where {T}  = lq!(copymutable_oftype(A, lq_eltype(T)))
+lq(A::AbstractMatrix{T}) where {T} = lq!(copy_similar(A, lq_eltype(T)))
 lq(x::Number) = lq!(fill(convert(lq_eltype(typeof(x)), x), 1, 1))
 
 lq_eltype(::Type{T}) where {T} = typeof(zero(T) / sqrt(abs2(one(T))))
@@ -195,9 +195,9 @@ function lmul!(A::LQ, B::StridedVecOrMat)
     lmul!(LowerTriangular(A.L), view(lmul!(A.Q, B), 1:size(A,1), axes(B,2)))
     return B
 end
-function *(A::LQ{TA}, B::StridedVecOrMat{TB}) where {TA,TB}
+function *(A::LQ{TA}, B::AbstractVecOrMat{TB}) where {TA,TB}
     TAB = promote_type(TA, TB)
-    _cut_B(lmul!(convert(Factorization{TAB}, A), copymutable_oftype(B, TAB)), 1:size(A,1))
+    _cut_B(lmul!(convert(Factorization{TAB}, A), copy_similar(B, TAB)), 1:size(A,1))
 end
 
 ## Multiplication by Q
diff --git a/stdlib/LinearAlgebra/src/lu.jl b/stdlib/LinearAlgebra/src/lu.jl
index 47e3fbfcb0232..a6fb9f6542aad 100644
--- a/stdlib/LinearAlgebra/src/lu.jl
+++ b/stdlib/LinearAlgebra/src/lu.jl
@@ -136,6 +136,7 @@ lu!(A::StridedMatrix, pivot::Union{RowMaximum,NoPivot,RowNonZero} = lupivottype(
     generic_lufact!(A, pivot; check = check)
 function generic_lufact!(A::StridedMatrix{T}, pivot::Union{RowMaximum,NoPivot,RowNonZero} = lupivottype(T);
                          check::Bool = true) where {T}
+    LAPACK.chkfinite(A)
     # Extract values
     m, n = size(A)
     minmn = min(m,n)
@@ -461,18 +462,18 @@ end
 
 function (/)(A::AbstractMatrix, F::Adjoint{<:Any,<:LU})
     T = promote_type(eltype(A), eltype(F))
-    return adjoint(ldiv!(F.parent, copymutable_oftype(adjoint(A), T)))
+    return adjoint(ldiv!(F.parent, copy_similar(adjoint(A), T)))
 end
 # To avoid ambiguities with definitions in adjtrans.jl and factorizations.jl
 (/)(adjA::Adjoint{<:Any,<:AbstractVector}, F::Adjoint{<:Any,<:LU}) = adjoint(F.parent \ adjA.parent)
 (/)(adjA::Adjoint{<:Any,<:AbstractMatrix}, F::Adjoint{<:Any,<:LU}) = adjoint(F.parent \ adjA.parent)
 function (/)(trA::Transpose{<:Any,<:AbstractVector}, F::Adjoint{<:Any,<:LU})
     T = promote_type(eltype(trA), eltype(F))
-    return adjoint(ldiv!(F.parent, conj!(copymutable_oftype(trA.parent, T))))
+    return adjoint(ldiv!(F.parent, conj!(copy_similar(trA.parent, T))))
 end
 function (/)(trA::Transpose{<:Any,<:AbstractMatrix}, F::Adjoint{<:Any,<:LU})
     T = promote_type(eltype(trA), eltype(F))
-    return adjoint(ldiv!(F.parent, conj!(copymutable_oftype(trA.parent, T))))
+    return adjoint(ldiv!(F.parent, conj!(copy_similar(trA.parent, T))))
 end
 
 function det(F::LU{T}) where T
diff --git a/stdlib/LinearAlgebra/src/qr.jl b/stdlib/LinearAlgebra/src/qr.jl
index 741b53bcd56a9..023146040eb82 100644
--- a/stdlib/LinearAlgebra/src/qr.jl
+++ b/stdlib/LinearAlgebra/src/qr.jl
@@ -909,11 +909,12 @@ function ldiv!(A::QRPivoted{T}, B::StridedMatrix{T}, rcond::Real) where T<:BlasF
         end
         rnk += 1
     end
-    C, τ = LAPACK.tzrzf!(A.factors[1:rnk,:])
-    ldiv!(UpperTriangular(C[1:rnk,1:rnk]),view(lmul!(adjoint(A.Q), view(B, 1:mA, 1:nrhs)), 1:rnk, 1:nrhs))
+    C, τ = LAPACK.tzrzf!(A.factors[1:rnk, :])
+    lmul!(A.Q', view(B, 1:mA, :))
+    ldiv!(UpperTriangular(view(C, :, 1:rnk)), view(B, 1:rnk, :))
     B[rnk+1:end,:] .= zero(T)
-    LAPACK.ormrz!('L', eltype(B)<:Complex ? 'C' : 'T', C, τ, view(B,1:nA,1:nrhs))
-    B[1:nA,:] = view(B, 1:nA, :)[invperm(A.p),:]
+    LAPACK.ormrz!('L', eltype(B)<:Complex ? 'C' : 'T', C, τ, view(B, 1:nA, :))
+    B[A.p,:] = B[1:nA,:]
     return B, rnk
 end
 ldiv!(A::QRPivoted{T}, B::StridedVector{T}) where {T<:BlasFloat} =
diff --git a/stdlib/LinearAlgebra/src/special.jl b/stdlib/LinearAlgebra/src/special.jl
index 7fcace8e4ef71..8af8625a0e817 100644
--- a/stdlib/LinearAlgebra/src/special.jl
+++ b/stdlib/LinearAlgebra/src/special.jl
@@ -43,8 +43,8 @@ Bidiagonal(A::AbstractTriangular) =
     isbanded(A, -1, 0) ? Bidiagonal(diag(A, 0), diag(A, -1), :L) : # is lower bidiagonal
         throw(ArgumentError("matrix cannot be represented as Bidiagonal"))
 
-_lucopy(A::Bidiagonal, T)     = copymutable_oftype(Tridiagonal(A), T)
-_lucopy(A::Diagonal, T)       = copymutable_oftype(Tridiagonal(A), T)
+_lucopy(A::Bidiagonal, T) = copymutable_oftype(Tridiagonal(A), T)
+_lucopy(A::Diagonal, T)   = copymutable_oftype(Tridiagonal(A), T)
 function _lucopy(A::SymTridiagonal, T)
     du = copy_similar(_evview(A), T)
     dl = copy.(transpose.(du))
diff --git a/stdlib/LinearAlgebra/src/svd.jl b/stdlib/LinearAlgebra/src/svd.jl
index c58c83bcb5a98..a4d83edb50f13 100644
--- a/stdlib/LinearAlgebra/src/svd.jl
+++ b/stdlib/LinearAlgebra/src/svd.jl
@@ -175,11 +175,11 @@ julia> Uonly == U
 true
 ```
 """
-function svd(A::StridedVecOrMat{T}; full::Bool = false, alg::Algorithm = default_svd_alg(A)) where {T}
-    svd!(copymutable_oftype(A, eigtype(T)), full = full, alg = alg)
+function svd(A::AbstractVecOrMat{T}; full::Bool = false, alg::Algorithm = default_svd_alg(A)) where {T}
+    svd!(eigencopy_oftype(A, eigtype(T)), full = full, alg = alg)
 end
-function svd(A::StridedVecOrMat{T}; full::Bool = false, alg::Algorithm = default_svd_alg(A)) where {T <: Union{Float16,Complex{Float16}}}
-    A = svd!(copymutable_oftype(A, eigtype(T)), full = full, alg = alg)
+function svd(A::AbstractVecOrMat{T}; full::Bool = false, alg::Algorithm = default_svd_alg(A)) where {T <: Union{Float16,Complex{Float16}}}
+    A = svd!(eigencopy_oftype(A, eigtype(T)), full = full, alg = alg)
     return SVD{T}(A)
 end
 function svd(x::Number; full::Bool = false, alg::Algorithm = default_svd_alg(x))
@@ -240,10 +240,8 @@ julia> svdvals(A)
  0.0
 ```
 """
-svdvals(A::AbstractMatrix{T}) where {T} = svdvals!(copymutable_oftype(A, eigtype(T)))
+svdvals(A::AbstractMatrix{T}) where {T} = svdvals!(eigencopy_oftype(A, eigtype(T)))
 svdvals(A::AbstractVector{T}) where {T} = [convert(eigtype(T), norm(A))]
-svdvals(A::AbstractMatrix{<:BlasFloat}) = svdvals!(copy(A))
-svdvals(A::AbstractVector{<:BlasFloat}) = [norm(A)]
 svdvals(x::Number) = abs(x)
 svdvals(S::SVD{<:Any,T}) where {T} = (S.S)::Vector{T}
 
@@ -457,9 +455,9 @@ julia> U == Uonly
 true
 ```
 """
-function svd(A::StridedMatrix{TA}, B::StridedMatrix{TB}) where {TA,TB}
+function svd(A::AbstractMatrix{TA}, B::AbstractMatrix{TB}) where {TA,TB}
     S = promote_type(eigtype(TA),TB)
-    return svd!(copymutable_oftype(A, S), copymutable_oftype(B, S))
+    return svd!(copy_similar(A, S), copy_similar(B, S))
 end
 # This method can be heavily optimized but it is probably not critical
 # and might introduce bugs or inconsistencies relative to the 1x1 matrix
@@ -541,7 +539,6 @@ function svdvals!(A::StridedMatrix{T}, B::StridedMatrix{T}) where T<:BlasFloat
     end
     a[1:k + l] ./ b[1:k + l]
 end
-svdvals(A::StridedMatrix{T},B::StridedMatrix{T}) where {T<:BlasFloat} = svdvals!(copy(A),copy(B))
 
 """
     svdvals(A, B)
@@ -567,9 +564,9 @@ julia> svdvals(A, B)
  1.0
 ```
 """
-function svdvals(A::StridedMatrix{TA}, B::StridedMatrix{TB}) where {TA,TB}
+function svdvals(A::AbstractMatrix{TA}, B::AbstractMatrix{TB}) where {TA,TB}
     S = promote_type(eigtype(TA), TB)
-    return svdvals!(copymutable_oftype(A, S), copymutable_oftype(B, S))
+    return svdvals!(copy_similar(A, S), copy_similar(B, S))
 end
 svdvals(x::Number, y::Number) = abs(x/y)
 
diff --git a/stdlib/LinearAlgebra/src/transpose.jl b/stdlib/LinearAlgebra/src/transpose.jl
index c7ca6339aac6a..f15f54669d124 100644
--- a/stdlib/LinearAlgebra/src/transpose.jl
+++ b/stdlib/LinearAlgebra/src/transpose.jl
@@ -201,3 +201,12 @@ function copy_transpose!(B::AbstractVecOrMat, ir_dest::AbstractRange{Int}, jr_de
     end
     return B
 end
+
+function copy_similar(A::AdjointAbsMat, ::Type{T}) where {T}
+    C = similar(A, T, size(A))
+    adjoint!(C, parent(A))
+end
+function copy_similar(A::TransposeAbsMat, ::Type{T}) where {T}
+    C = similar(A, T, size(A))
+    transpose!(C, parent(A))
+end
diff --git a/stdlib/LinearAlgebra/src/tridiag.jl b/stdlib/LinearAlgebra/src/tridiag.jl
index 70556d1c92a0b..01ce355b2c34b 100644
--- a/stdlib/LinearAlgebra/src/tridiag.jl
+++ b/stdlib/LinearAlgebra/src/tridiag.jl
@@ -525,6 +525,9 @@ Tridiagonal(dl::V, d::V, du::V, du2::V) where {T,V<:AbstractVector{T}} = Tridiag
 function Tridiagonal{T}(dl::AbstractVector, d::AbstractVector, du::AbstractVector) where {T}
     Tridiagonal(map(x->convert(AbstractVector{T}, x), (dl, d, du))...)
 end
+function Tridiagonal{T,V}(A::Tridiagonal) where {T,V<:AbstractVector{T}}
+    Tridiagonal{T,V}(A.dl, A.d, A.du)
+end
 
 """
     Tridiagonal(A)
diff --git a/stdlib/LinearAlgebra/test/bidiag.jl b/stdlib/LinearAlgebra/test/bidiag.jl
index c3242b705f110..22c070be13cb5 100644
--- a/stdlib/LinearAlgebra/test/bidiag.jl
+++ b/stdlib/LinearAlgebra/test/bidiag.jl
@@ -52,6 +52,9 @@ Random.seed!(1)
             # from matrix
             @test Bidiagonal(ubd, :U) == Bidiagonal(Matrix(ubd), :U) == ubd
             @test Bidiagonal(lbd, :L) == Bidiagonal(Matrix(lbd), :L) == lbd
+            # from its own type
+            @test typeof(ubd)(ubd) === ubd
+            @test typeof(lbd)(lbd) === lbd
         end
         @test eltype(Bidiagonal{elty}([1,2,3,4], [1.0f0,2.0f0,3.0f0], :U)) == elty
         @test eltype(Bidiagonal([1,2,3,4], [1.0f0,2.0f0,3.0f0], :U)) == Float32 # promotion test
diff --git a/stdlib/LinearAlgebra/test/bunchkaufman.jl b/stdlib/LinearAlgebra/test/bunchkaufman.jl
index d9efa48c8766c..613e4d09a3cc6 100644
--- a/stdlib/LinearAlgebra/test/bunchkaufman.jl
+++ b/stdlib/LinearAlgebra/test/bunchkaufman.jl
@@ -190,4 +190,10 @@ end
     @test_throws ArgumentError("adjoint not implemented for complex symmetric matrices") F'
 end
 
+@testset "BunchKaufman for AbstractMatrix" begin
+    S = SymTridiagonal(fill(2.0, 4), ones(3))
+    B = bunchkaufman(S)
+    @test B.U * B.D * B.U' ≈ S
+end
+
 end # module TestBunchKaufman
diff --git a/stdlib/LinearAlgebra/test/cholesky.jl b/stdlib/LinearAlgebra/test/cholesky.jl
index d1d00e2326dfb..a3008a236df7b 100644
--- a/stdlib/LinearAlgebra/test/cholesky.jl
+++ b/stdlib/LinearAlgebra/test/cholesky.jl
@@ -390,9 +390,9 @@ end
 
     # complex
     D = complex(D)
-    CD = cholesky(D)
-    CM = cholesky(Matrix(D))
-    @test CD isa Cholesky{ComplexF64}
+    CD = cholesky(Hermitian(D))
+    CM = cholesky(Matrix(Hermitian(D)))
+    @test CD isa Cholesky{ComplexF64,<:Diagonal}
     @test CD.U ≈ Diagonal(.√d) ≈ CM.U
     @test D ≈ CD.L * CD.U
     @test CD.info == 0
@@ -407,6 +407,12 @@ end
     @test_throws InexactError cholesky!(Diagonal([2, 1]))
 end
 
+@testset "Cholesky for AbstractMatrix" begin
+    S = SymTridiagonal(fill(2.0, 4), ones(3))
+    C = cholesky(S)
+    @test C.L * C.U ≈ S
+end
+
 @testset "constructor with non-BlasInt arguments" begin
 
     x = rand(5,5)
diff --git a/stdlib/LinearAlgebra/test/dense.jl b/stdlib/LinearAlgebra/test/dense.jl
index 01f573bf43674..1546f3247acf4 100644
--- a/stdlib/LinearAlgebra/test/dense.jl
+++ b/stdlib/LinearAlgebra/test/dense.jl
@@ -238,6 +238,15 @@ end
     @test pinv(M,rtol=0.5)== M
 end
 
+@testset "Test inv of matrix of NaNs" begin
+    for eltya in (NaN16, NaN32, NaN32)
+        r = fill(eltya, 2, 2)
+        @test_throws ArgumentError inv(r)
+        c = fill(complex(eltya, eltya), 2, 2)
+        @test_throws ArgumentError inv(c)
+    end
+end
+
 @testset "test out of bounds triu/tril" begin
     local m, n = 5, 7
     ainit = rand(m, n)
diff --git a/stdlib/LinearAlgebra/test/diagonal.jl b/stdlib/LinearAlgebra/test/diagonal.jl
index 3e6f456c3de1e..e78e7e311a29a 100644
--- a/stdlib/LinearAlgebra/test/diagonal.jl
+++ b/stdlib/LinearAlgebra/test/diagonal.jl
@@ -977,10 +977,14 @@ end
     @test s1 == prod(sign, d)
 end
 
-@testset "Empty (#35424)" begin
+@testset "Empty (#35424) & size checks (#47060)" begin
     @test zeros(0)'*Diagonal(zeros(0))*zeros(0) === 0.0
     @test transpose(zeros(0))*Diagonal(zeros(Complex{Int}, 0))*zeros(0) === 0.0 + 0.0im
     @test dot(zeros(Int32, 0), Diagonal(zeros(Int, 0)), zeros(Int16, 0)) === 0
+    @test_throws DimensionMismatch zeros(2)' * Diagonal(zeros(2)) * zeros(3)
+    @test_throws DimensionMismatch zeros(3)' * Diagonal(zeros(2)) * zeros(2)
+    @test_throws DimensionMismatch dot(zeros(2), Diagonal(zeros(2)), zeros(3))
+    @test_throws DimensionMismatch dot(zeros(3), Diagonal(zeros(2)), zeros(2))
 end
 
 @testset "Diagonal(undef)" begin
diff --git a/stdlib/LinearAlgebra/test/eigen.jl b/stdlib/LinearAlgebra/test/eigen.jl
index 746b724439217..413a8df0474fa 100644
--- a/stdlib/LinearAlgebra/test/eigen.jl
+++ b/stdlib/LinearAlgebra/test/eigen.jl
@@ -159,8 +159,17 @@ end
         test_matrix = rand(typeof(eltya),3,3)
         test_matrix[1,3] = eltya
         @test_throws(ArgumentError, eigen(test_matrix))
+        @test_throws(ArgumentError, eigvals(test_matrix))
+        @test_throws(ArgumentError, eigvecs(test_matrix))
         @test_throws(ArgumentError, eigen(Symmetric(test_matrix)))
+        @test_throws(ArgumentError, eigvals(Symmetric(test_matrix)))
+        @test_throws(ArgumentError, eigvecs(Symmetric(test_matrix)))
         @test_throws(ArgumentError, eigen(Hermitian(test_matrix)))
+        @test_throws(ArgumentError, eigvals(Hermitian(test_matrix)))
+        @test_throws(ArgumentError, eigvecs(Hermitian(test_matrix)))
+        @test_throws(ArgumentError, eigen(Hermitian(complex.(test_matrix))))
+        @test_throws(ArgumentError, eigvals(Hermitian(complex.(test_matrix))))
+        @test_throws(ArgumentError, eigvecs(Hermitian(complex.(test_matrix))))
         @test eigen(Symmetric(test_matrix, :L)) isa Eigen
         @test eigen(Hermitian(test_matrix, :L)) isa Eigen
     end
diff --git a/stdlib/LinearAlgebra/test/generic.jl b/stdlib/LinearAlgebra/test/generic.jl
index 25a89977bce54..0b57ecd2713b0 100644
--- a/stdlib/LinearAlgebra/test/generic.jl
+++ b/stdlib/LinearAlgebra/test/generic.jl
@@ -445,6 +445,7 @@ Base.:-(a::ModInt{n}) where {n} = ModInt{n}(-a.k)
 Base.inv(a::ModInt{n}) where {n} = ModInt{n}(invmod(a.k, n))
 Base.:/(a::ModInt{n}, b::ModInt{n}) where {n} = a*inv(b)
 
+Base.isfinite(a::ModInt{n}) where {n} = isfinite(a.k)
 Base.zero(::Type{ModInt{n}}) where {n} = ModInt{n}(0)
 Base.zero(::ModInt{n}) where {n} = ModInt{n}(0)
 Base.one(::Type{ModInt{n}}) where {n} = ModInt{n}(1)
diff --git a/stdlib/LinearAlgebra/test/givens.jl b/stdlib/LinearAlgebra/test/givens.jl
index 9f23fe4ffaa61..a2556b45d1280 100644
--- a/stdlib/LinearAlgebra/test/givens.jl
+++ b/stdlib/LinearAlgebra/test/givens.jl
@@ -6,7 +6,7 @@ using Test, LinearAlgebra, Random
 using LinearAlgebra: Givens, Rotation
 
 # Test givens rotations
-@testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
+@testset "Test Givens for $elty" for elty in (Float32, Float64, ComplexF32, ComplexF64)
     if elty <: Real
         raw_A = convert(Matrix{elty}, randn(10,10))
     else
@@ -15,14 +15,16 @@ using LinearAlgebra: Givens, Rotation
     @testset for A in (raw_A, view(raw_A, 1:10, 1:10))
         Ac = copy(A)
         R = Rotation(Givens{elty}[])
+        T = Rotation(Givens{elty}[])
         for j = 1:8
             for i = j+2:10
                 G, _ = givens(A, j+1, i, j)
                 lmul!(G, A)
                 rmul!(A, adjoint(G))
                 lmul!(G, R)
+                rmul!(T, G)
 
-                @test lmul!(G,Matrix{elty}(I, 10, 10)) == [G[i,j] for i=1:10,j=1:10]
+                @test lmul!(G, Matrix{elty}(I, 10, 10)) == [G[i,j] for i=1:10,j=1:10]
 
                 @testset "transposes" begin
                     @test (@inferred G'*G)*Matrix(elty(1)I, 10, 10) ≈ Matrix(I, 10, 10)
@@ -34,10 +36,13 @@ using LinearAlgebra: Givens, Rotation
             end
         end
         @test (R')' === R
-        @test R * A ≈ (A' * R')' ≈ lmul!(R, copy(A))
-        @test A * R ≈ (R' * A')' ≈ rmul!(copy(A), R)
-        @test R' * A ≈ lmul!(R', copy(A))
-        @test A * R' ≈ rmul!(copy(A), R')
+        # test products of Givens and Rotations
+        for r in (R, T, *(R.rotations...), *(R.rotations[1], *(R.rotations[2:end]...)))
+            @test r * A ≈ (A' * r')' ≈ lmul!(r, copy(A))
+            @test A * r ≈ (r' * A')' ≈ rmul!(copy(A), r)
+            @test r' * A ≈ lmul!(r', copy(A))
+            @test A * r' ≈ rmul!(copy(A), r')
+        end
         @test_throws ArgumentError givens(A, 3, 3, 2)
         @test_throws ArgumentError givens(one(elty),zero(elty),2,2)
         G, _ = givens(one(elty),zero(elty),11,12)
@@ -51,27 +56,29 @@ using LinearAlgebra: Givens, Rotation
         @test (G*I10)' * (G*I10) ≈ I10
         K, _ = givens(zero(elty),one(elty),9,10)
         @test (K*I10)' * (K*I10) ≈ I10
+    end
 
-        @testset "Givens * vectors" begin
-            if isa(A, Array)
-                x = A[:, 1]
-            else
-                x = view(A, 1:10, 1)
-            end
-            G, r = givens(x[2], x[4], 2, 4)
+    @testset "Givens * vectors" begin
+        for x in (raw_A[:,1], view(raw_A, :, 1))
+            G, r = @inferred  givens(x[2], x[4], 2, 4)
             @test (G*x)[2] ≈ r
             @test abs((G*x)[4]) < eps(real(elty))
-            @inferred givens(x[2], x[4], 2, 4)
 
-            G, r = givens(x, 2, 4)
+            G, r = @inferred givens(x, 2, 4)
             @test (G*x)[2] ≈ r
             @test abs((G*x)[4]) < eps(real(elty))
-            @inferred givens(x, 2, 4)
 
             G, r = givens(x, 4, 2)
             @test (G*x)[4] ≈ r
             @test abs((G*x)[2]) < eps(real(elty))
         end
+        d = rand(4)
+        l = d[1]
+        g2, l = givens(l, d[2], 1, 2)
+        g3, l = givens(l, d[3], 1, 3)
+        g4, l = givens(l, d[4], 1, 4)
+        @test g2*(g3*d) ≈ g2*g3*d ≈ (g2*g3)*d
+        @test g2*g3*g4 isa Rotation
     end
 end
 
diff --git a/stdlib/LinearAlgebra/test/hessenberg.jl b/stdlib/LinearAlgebra/test/hessenberg.jl
index b2b23caac6865..4b14179e644e5 100644
--- a/stdlib/LinearAlgebra/test/hessenberg.jl
+++ b/stdlib/LinearAlgebra/test/hessenberg.jl
@@ -191,6 +191,13 @@ let n = 10
     end
 end
 
+@testset "hessenberg(::AbstractMatrix)" begin
+    n = 10
+    A = Tridiagonal(rand(n-1), rand(n), rand(n-1))
+    H = hessenberg(A)
+    @test convert(Array, H) ≈ A
+end
+
 # check logdet on a matrix that has a positive determinant
 let A = [0.5 0.1 0.9 0.4; 0.9 0.7 0.5 0.4; 0.3 0.4 0.9 0.0; 0.4 0.0 0.0 0.5]
     @test logdet(hessenberg(A)) ≈ logdet(A) ≈ -3.5065578973199822
diff --git a/stdlib/LinearAlgebra/test/lq.jl b/stdlib/LinearAlgebra/test/lq.jl
index 96f31ded78d6d..c340317a7cc23 100644
--- a/stdlib/LinearAlgebra/test/lq.jl
+++ b/stdlib/LinearAlgebra/test/lq.jl
@@ -37,10 +37,10 @@ rectangularQ(Q::LinearAlgebra.LQPackedQ) = convert(Array, Q)
 
         @testset for isview in (false,true)
             let a = isview ? view(a, 1:m - 1, 1:n - 1) : a, b = isview ? view(b, 1:m - 1) : b, m = m - isview, n = n - isview
-                lqa   = lq(a)
+                lqa = lq(a)
                 x = lqa\b
-                l,q   = lqa.L, lqa.Q
-                qra   = qr(a, ColumnNorm())
+                l, q = lqa.L, lqa.Q
+                qra = qr(a, ColumnNorm())
                 @testset "Basic ops" begin
                     @test size(lqa,1) == size(a,1)
                     @test size(lqa,3) == 1
@@ -62,6 +62,10 @@ rectangularQ(Q::LinearAlgebra.LQPackedQ) = convert(Array, Q)
                     @test Array{eltya}(q) ≈ Matrix(q)
                 end
                 @testset "Binary ops" begin
+                    k = size(a, 2)
+                    T = Tridiagonal(rand(eltya, k-1), rand(eltya, k), rand(eltya, k-1))
+                    @test lq(T) * T ≈ T * T rtol=3000ε
+                    @test lqa * T ≈ a * T rtol=3000ε
                     @test a*x ≈ b rtol=3000ε
                     @test x ≈ qra \ b rtol=3000ε
                     @test lqa*x ≈ a*x rtol=3000ε
diff --git a/stdlib/LinearAlgebra/test/lu.jl b/stdlib/LinearAlgebra/test/lu.jl
index b2477ce731739..7692bce71fd17 100644
--- a/stdlib/LinearAlgebra/test/lu.jl
+++ b/stdlib/LinearAlgebra/test/lu.jl
@@ -440,4 +440,13 @@ end
     @test length(b) == 4
 end
 
+@testset "NaN matrix should throw error" begin
+    for eltya in (NaN16, NaN32, NaN64, BigFloat(NaN))
+        r = fill(eltya, 2, 3)
+        c = fill(complex(eltya, eltya), 2, 3)
+        @test_throws ArgumentError lu(r)
+        @test_throws ArgumentError lu(c)
+    end
+end
+
 end # module TestLU
diff --git a/stdlib/LinearAlgebra/test/svd.jl b/stdlib/LinearAlgebra/test/svd.jl
index 8bd3edadc911d..7f2aad904a88f 100644
--- a/stdlib/LinearAlgebra/test/svd.jl
+++ b/stdlib/LinearAlgebra/test/svd.jl
@@ -127,8 +127,20 @@ aimg  = randn(n,n)/2
             gsvd = svd(b,c)
             @test gsvd.U*gsvd.D1*gsvd.R*gsvd.Q' ≈ b
             @test gsvd.V*gsvd.D2*gsvd.R*gsvd.Q' ≈ c
+            # AbstractMatrix svd
+            T = Tridiagonal(a)
+            asvd = svd(T, a)
+            @test asvd.U*asvd.D1*asvd.R*asvd.Q' ≈ T
+            @test asvd.V*asvd.D2*asvd.R*asvd.Q' ≈ a
+            @test all(≈(1), svdvals(T, T))
         end
     end
+    @testset "singular value decomposition of AbstractMatrix" begin
+        A = Tridiagonal(aa)
+        F = svd(A)
+        @test Matrix(F) ≈ A
+        @test svdvals(A) ≈ F.S
+    end
     @testset "singular value decomposition of Hermitian/real-Symmetric" begin
         for T in (eltya <: Real ? (Symmetric, Hermitian) : (Hermitian,))
             usv = svd(T(asym))
diff --git a/stdlib/LinearAlgebra/test/symmetric.jl b/stdlib/LinearAlgebra/test/symmetric.jl
index 96759643716da..1d58cfc180a23 100644
--- a/stdlib/LinearAlgebra/test/symmetric.jl
+++ b/stdlib/LinearAlgebra/test/symmetric.jl
@@ -252,6 +252,14 @@ end
                         end
                     end
                 end
+                if eltya <: AbstractFloat
+                @testset "inv should error with NaNs/Infs" begin
+                    h = Hermitian(fill(eltya(NaN), 2, 2))
+                    @test_throws ArgumentError inv(h)
+                    s = Symmetric(fill(eltya(NaN), 2, 2))
+                    @test_throws ArgumentError inv(s)
+                end
+                end
             end
 
             # Revisit when implemented in julia
diff --git a/stdlib/LinearAlgebra/test/trickyarithmetic.jl b/stdlib/LinearAlgebra/test/trickyarithmetic.jl
index c5faf57acd857..ad04ac89c2761 100644
--- a/stdlib/LinearAlgebra/test/trickyarithmetic.jl
+++ b/stdlib/LinearAlgebra/test/trickyarithmetic.jl
@@ -8,12 +8,15 @@ module TrickyArithmetic
     Base.convert(::Type{A}, i::Int) = A(i)
     Base.zero(::Union{A, Type{A}}) = A(0)
     Base.one(::Union{A, Type{A}}) = A(1)
+    Base.isfinite(a::A) = isfinite(a.x)
     struct B
         x::Int
     end
     struct C
         x::Int
     end
+    Base.isfinite(b::B) = isfinite(b.x)
+    Base.isfinite(c::C) = isfinite(c.x)
     C(a::A) = C(a.x)
     Base.zero(::Union{C, Type{C}}) = C(0)
     Base.one(::Union{C, Type{C}}) = C(1)
@@ -40,6 +43,7 @@ module TrickyArithmetic
     Base.:(*)(a::Union{A,B,C}, b::D) = b * a
     Base.inv(a::Union{A,B,C}) = A(1) / a
     Base.inv(a::D) = a.d / a.n
+    Base.isfinite(a::D) = isfinite(a.n) && isfinite(a.d)
     Base.:(/)(a::Union{A,B,C}, b::Union{A,B,C}) = D(a, b)
     Base.:(/)(a::D, b::Union{A,B,C}) = a.n / (a.d*b)
     Base.:(/)(a::Union{A,B,C,D}, b::D) = a * inv(b)
diff --git a/stdlib/LinearAlgebra/test/tridiag.jl b/stdlib/LinearAlgebra/test/tridiag.jl
index 0698a583c8d45..0fcd8744142be 100644
--- a/stdlib/LinearAlgebra/test/tridiag.jl
+++ b/stdlib/LinearAlgebra/test/tridiag.jl
@@ -71,11 +71,13 @@ end
             @test ST == Matrix(ST)
             @test ST.dv === x
             @test ST.ev === y
+            @test typeof(ST)(ST) === ST
             TT = (Tridiagonal(y, x, y))::Tridiagonal{elty, typeof(x)}
             @test TT == Matrix(TT)
             @test TT.dl === y
             @test TT.d  === x
             @test TT.du === y
+            @test typeof(TT)(TT) === TT
         end
         ST = SymTridiagonal{elty}([1,2,3,4], [1,2,3])
         @test eltype(ST) == elty
diff --git a/stdlib/Markdown/src/parse/util.jl b/stdlib/Markdown/src/parse/util.jl
index 7be845c96a9fc..aabfcbb3ddc62 100644
--- a/stdlib/Markdown/src/parse/util.jl
+++ b/stdlib/Markdown/src/parse/util.jl
@@ -36,7 +36,7 @@ function skipblank(io::IO)
 end
 
 """
-Returns true if the line contains only (and, unless allowempty,
+Return true if the line contains only (and, unless allowempty,
 at least one of) the characters given.
 """
 function linecontains(io::IO, chars; allow_whitespace = true,
diff --git a/stdlib/MozillaCACerts_jll/Project.toml b/stdlib/MozillaCACerts_jll/Project.toml
index 0db86a1dd5319..db6e85252e17f 100644
--- a/stdlib/MozillaCACerts_jll/Project.toml
+++ b/stdlib/MozillaCACerts_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "MozillaCACerts_jll"
 uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
-version = "2022.2.1"
+version = "2022.10.11"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/Pkg.version b/stdlib/Pkg.version
index cc29b6aa26d2f..f010384f587c0 100644
--- a/stdlib/Pkg.version
+++ b/stdlib/Pkg.version
@@ -1,4 +1,4 @@
 PKG_BRANCH = master
-PKG_SHA1 = 3cbbd860afd4c2a50a80a04fa229fe5cd5bddc76
+PKG_SHA1 = b11ca0acdda718a15068cd1815ec346a4facf412
 PKG_GIT_URL := https://github.com/JuliaLang/Pkg.jl.git
 PKG_TAR_URL = https://api.github.com/repos/JuliaLang/Pkg.jl/tarball/$1
diff --git a/stdlib/Profile/docs/src/index.md b/stdlib/Profile/docs/src/index.md
index 8701dded0d427..e67c1d3a6fdc3 100644
--- a/stdlib/Profile/docs/src/index.md
+++ b/stdlib/Profile/docs/src/index.md
@@ -107,3 +107,24 @@ Profile.Allocs.fetch
 Profile.Allocs.start
 Profile.Allocs.stop
 ```
+
+## Heap Snapshots
+
+```@docs
+Profile.take_heap_snapshot
+```
+
+The methods in `Profile` are not exported and need to be called e.g. as `Profile.take_heap_snapshot()`.
+
+```julia-repl
+julia> using Profile
+
+julia> Profile.take_heap_snapshot("snapshot.heapsnapshot")
+```
+
+Traces and records julia objects on the heap. This only records objects known to the Julia
+garbage collector. Memory allocated by external libraries not managed by the garbage
+collector will not show up in the snapshot.
+
+The resulting heap snapshot file can be uploaded to chrome devtools to be viewed.
+For more information, see the [chrome devtools docs](https://developer.chrome.com/docs/devtools/memory-problems/heap-snapshots/#view_snapshots).
diff --git a/stdlib/Profile/src/Allocs.jl b/stdlib/Profile/src/Allocs.jl
index 26dd90a821e01..2bf06550b72d6 100644
--- a/stdlib/Profile/src/Allocs.jl
+++ b/stdlib/Profile/src/Allocs.jl
@@ -47,6 +47,10 @@ julia> last(sort(results.allocs, by=x->x.size))
 Profile.Allocs.Alloc(Vector{Any}, Base.StackTraces.StackFrame[_new_array_ at array.c:127, ...], 5576)
 ```
 
+The best way to visualize these is currently with the
+[PProf.jl](https://github.com/JuliaPerf/PProf.jl) package,
+by invoking `PProf.Allocs.pprof`.
+
 !!! note
     The current implementation of the Allocations Profiler does not
     capture types for all allocations. Allocations for which the profiler
@@ -54,7 +58,7 @@ Profile.Allocs.Alloc(Vector{Any}, Base.StackTraces.StackFrame[_new_array_ at arr
     `Profile.Allocs.UnknownType`.
 
     You can read more about the missing types and the plan to improve this, here:
-    https://github.com/JuliaLang/julia/issues/43688.
+    <https://github.com/JuliaLang/julia/issues/43688>.
 
 !!! compat "Julia 1.8"
     The allocation profiler was added in Julia 1.8.
diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl
index 3621fe63bcaac..07e727e445239 100644
--- a/stdlib/Profile/src/Profile.jl
+++ b/stdlib/Profile/src/Profile.jl
@@ -88,10 +88,6 @@ stored per thread. Each instruction pointer corresponds to a single line of code
 list of instruction pointers. Note that 6 spaces for instruction pointers per backtrace are used to store metadata and two
 NULL end markers. Current settings can be obtained by calling this function with no arguments, and each can be set independently
 using keywords or in the order `(n, delay)`.
-
-!!! compat "Julia 1.8"
-    As of Julia 1.8, this function allocates space for `n` instruction pointers per thread being profiled.
-    Previously this was `n` total.
 """
 function init(; n::Union{Nothing,Integer} = nothing, delay::Union{Nothing,Real} = nothing, limitwarn::Bool = true)
     n_cur = ccall(:jl_profile_maxlen_data, Csize_t, ())
@@ -102,8 +98,7 @@ function init(; n::Union{Nothing,Integer} = nothing, delay::Union{Nothing,Real}
     end
     delay_cur = ccall(:jl_profile_delay_nsec, UInt64, ())/10^9
     if n === nothing && delay === nothing
-        nthreads = Sys.iswindows() ? 1 : Threads.nthreads() # windows only profiles the main thread
-        return round(Int, n_cur / nthreads), delay_cur
+        return n_cur, delay_cur
     end
     nnew = (n === nothing) ? n_cur : n
     delaynew = (delay === nothing) ? delay_cur : delay
@@ -111,20 +106,17 @@ function init(; n::Union{Nothing,Integer} = nothing, delay::Union{Nothing,Real}
 end
 
 function init(n::Integer, delay::Real; limitwarn::Bool = true)
-    nthreads = Sys.iswindows() ? 1 : Threads.nthreads() # windows only profiles the main thread
     sample_size_bytes = sizeof(Ptr) # == Sys.WORD_SIZE / 8
-    buffer_samples = n * nthreads
+    buffer_samples = n
     buffer_size_bytes = buffer_samples * sample_size_bytes
     if buffer_size_bytes > 2^29 && Sys.WORD_SIZE == 32
-        buffer_size_bytes_per_thread = floor(Int, 2^29 / nthreads)
-        buffer_samples_per_thread = floor(Int, buffer_size_bytes_per_thread / sample_size_bytes)
-        buffer_samples = buffer_samples_per_thread * nthreads
+        buffer_samples = floor(Int, 2^29 / sample_size_bytes)
         buffer_size_bytes = buffer_samples * sample_size_bytes
-        limitwarn && @warn "Requested profile buffer limited to 512MB (n = $buffer_samples_per_thread per thread) given that this system is 32-bit"
+        limitwarn && @warn "Requested profile buffer limited to 512MB (n = $buffer_samples) given that this system is 32-bit"
     end
-    status = ccall(:jl_profile_init, Cint, (Csize_t, UInt64), buffer_samples, round(UInt64,10^9*delay))
+    status = ccall(:jl_profile_init, Cint, (Csize_t, UInt64), buffer_samples, round(UInt64, 10^9*delay))
     if status == -1
-        error("could not allocate space for ", n, " instruction pointers per thread being profiled ($nthreads threads, $(Base.format_bytes(buffer_size_bytes)) total)")
+        error("could not allocate space for ", n, " instruction pointers ($(Base.format_bytes(buffer_size_bytes)))")
     end
 end
 
@@ -427,7 +419,7 @@ function getdict!(dict::LineInfoDict, data::Vector{UInt})
     n_unique_ips = length(unique_ips)
     n_unique_ips == 0 && return dict
     iplookups = similar(unique_ips, Vector{StackFrame})
-    @sync for indexes_part in Iterators.partition(eachindex(unique_ips), div(n_unique_ips, Threads.nthreads(), RoundUp))
+    @sync for indexes_part in Iterators.partition(eachindex(unique_ips), div(n_unique_ips, Threads.threadpoolsize(), RoundUp))
         Threads.@spawn begin
             for i in indexes_part
                 iplookups[i] = _lookup_corrected(unique_ips[i])
@@ -615,7 +607,7 @@ error_codes = Dict(
 """
     fetch(;include_meta = true) -> data
 
-Returns a copy of the buffer of profile backtraces. Note that the
+Return a copy of the buffer of profile backtraces. Note that the
 values in `data` have meaning only on this machine in the current session, because it
 depends on the exact memory addresses used in JIT-compiling. This function is primarily for
 internal use; [`retrieve`](@ref) may be a better choice for most users.
@@ -1239,6 +1231,34 @@ function warning_empty(;summary = false)
     end
 end
 
+
+"""
+    Profile.take_heap_snapshot(io::IOStream, all_one::Bool=false)
+    Profile.take_heap_snapshot(filepath::String, all_one::Bool=false)
+    Profile.take_heap_snapshot(all_one::Bool=false)
+
+Write a snapshot of the heap, in the JSON format expected by the Chrome
+Devtools Heap Snapshot viewer (.heapsnapshot extension), to a file
+(`\$pid_\$timestamp.heapsnapshot`) in the current directory, or the given
+file path, or IO stream. If `all_one` is true, then report the size of
+every object as one so they can be easily counted. Otherwise, report the
+actual size.
+"""
+function take_heap_snapshot(io::IOStream, all_one::Bool=false)
+    @Base._lock_ios(io, ccall(:jl_gc_take_heap_snapshot, Cvoid, (Ptr{Cvoid}, Cchar), io.handle, Cchar(all_one)))
+end
+function take_heap_snapshot(filepath::String, all_one::Bool=false)
+    open(filepath, "w") do io
+        take_heap_snapshot(io, all_one)
+    end
+    return filepath
+end
+function take_heap_snapshot(all_one::Bool=false)
+    f = abspath("$(getpid())_$(time_ns()).heapsnapshot")
+    return take_heap_snapshot(f, all_one)
+end
+
+
 include("Allocs.jl")
 
 end # module
diff --git a/stdlib/Profile/test/allocs.jl b/stdlib/Profile/test/allocs.jl
index b8d6222d07567..c2ec7d2f6cb54 100644
--- a/stdlib/Profile/test/allocs.jl
+++ b/stdlib/Profile/test/allocs.jl
@@ -64,7 +64,8 @@ end
 @testset "alloc profiler start stop fetch clear" begin
     function do_work()
         # Compiling allocates a lot
-        for f in (gensym() for _ in 1:10)
+        nsyms = @static Sys.WORD_SIZE == 32 ? 1 : 10
+        for f in (gensym() for _ in 1:nsyms)
             @eval begin
                 $f() = 10
                 $f()
diff --git a/stdlib/Profile/test/runtests.jl b/stdlib/Profile/test/runtests.jl
index 86c391d573e50..1246dcf25a82c 100644
--- a/stdlib/Profile/test/runtests.jl
+++ b/stdlib/Profile/test/runtests.jl
@@ -120,11 +120,10 @@ end
 @testset "setting sample count and delay in init" begin
     n_, delay_ = Profile.init()
     n_original = n_
-    nthreads = Sys.iswindows() ? 1 : Threads.nthreads()
     sample_size_bytes = sizeof(Ptr)
     def_n = Sys.iswindows() && Sys.WORD_SIZE == 32 ? 1_000_000 : 10_000_000
-    if Sys.WORD_SIZE == 32 && (def_n * nthreads * sample_size_bytes) > 2^29
-        @test n_ * nthreads * sample_size_bytes <= 2^29
+    if Sys.WORD_SIZE == 32 && (def_n * sample_size_bytes) > 2^29
+        @test n_ * sample_size_bytes <= 2^29
     else
         @test n_ == def_n
     end
@@ -133,8 +132,8 @@ end
     @test delay_ == def_delay
     Profile.init(n=1_000_001, delay=0.0005)
     n_, delay_ = Profile.init()
-    if Sys.WORD_SIZE == 32 && (1_000_001 * nthreads * sample_size_bytes) > 2^29
-        @test n_ * nthreads * sample_size_bytes <= 2^29
+    if Sys.WORD_SIZE == 32 && (1_000_001 * sample_size_bytes) > 2^29
+        @test n_ * sample_size_bytes <= 2^29
     else
         @test n_ == 1_000_001
     end
@@ -272,4 +271,16 @@ end
     @test only(node.down).first == lidict[8]
 end
 
+@testset "HeapSnapshot" begin
+    fname = read(`$(Base.julia_cmd()) --startup-file=no -e "using Profile; print(Profile.take_heap_snapshot())"`, String)
+
+    @test isfile(fname)
+
+    open(fname) do fs
+        @test readline(fs) != ""
+    end
+
+    rm(fname)
+end
+
 include("allocs.jl")
diff --git a/stdlib/REPL/src/REPL.jl b/stdlib/REPL/src/REPL.jl
index c7bc30b8d4b10..708a4f895573a 100644
--- a/stdlib/REPL/src/REPL.jl
+++ b/stdlib/REPL/src/REPL.jl
@@ -1399,30 +1399,37 @@ using ..REPL
 __current_ast_transforms() = isdefined(Base, :active_repl_backend) ? Base.active_repl_backend.ast_transforms : REPL.repl_ast_transforms
 
 function repl_eval_counter(hp)
-    length(hp.history)-hp.start_idx
+    return length(hp.history) - hp.start_idx
 end
 
-function out_transform(x, repl::LineEditREPL, n::Ref{Int})
+function out_transform(@nospecialize(x), n::Ref{Int})
     return quote
-        julia_prompt = $repl.interface.modes[1]
-        mod = $REPL.active_module()
-        if !isdefined(mod, :Out)
-            setglobal!(mod, :Out, Dict{Int, Any}())
+        let x = $x
+            $capture_result($n, x)
+            x
         end
-        local __temp_val = $x # workaround https://github.com/JuliaLang/julia/issues/46451
-        if __temp_val !== getglobal(mod, :Out) && __temp_val !== nothing # remove this?
-            getglobal(mod, :Out)[$(n[])] = __temp_val
-        end
-        __temp_val
     end
 end
 
+function capture_result(n::Ref{Int}, @nospecialize(x))
+    n = n[]
+    mod = REPL.active_module()
+    if !isdefined(mod, :Out)
+        setglobal!(mod, :Out, Dict{Int, Any}())
+    end
+    if x !== getglobal(mod, :Out) && x !== nothing # remove this?
+        getglobal(mod, :Out)[n] = x
+    end
+    nothing
+end
+
 function set_prompt(repl::LineEditREPL, n::Ref{Int})
     julia_prompt = repl.interface.modes[1]
     julia_prompt.prompt = function()
         n[] = repl_eval_counter(julia_prompt.hist)+1
         string("In [", n[], "]: ")
     end
+    nothing
 end
 
 function set_output_prefix(repl::LineEditREPL, n::Ref{Int})
@@ -1431,6 +1438,7 @@ function set_output_prefix(repl::LineEditREPL, n::Ref{Int})
         julia_prompt.output_prefix_prefix = Base.text_colors[:red]
     end
     julia_prompt.output_prefix = () -> string("Out[", n[], "]: ")
+    nothing
 end
 
 function __current_ast_transforms(backend)
@@ -1446,7 +1454,7 @@ function ipython_mode!(repl::LineEditREPL=Base.active_repl, backend=nothing)
     n = Ref{Int}(0)
     set_prompt(repl, n)
     set_output_prefix(repl, n)
-    push!(__current_ast_transforms(backend), ast -> out_transform(ast, repl, n))
+    push!(__current_ast_transforms(backend), @nospecialize(ast) -> out_transform(ast, n))
     return
 end
 end
diff --git a/stdlib/REPL/src/REPLCompletions.jl b/stdlib/REPL/src/REPLCompletions.jl
index 798ea1642639b..96f2ba2f5f9fa 100644
--- a/stdlib/REPL/src/REPLCompletions.jl
+++ b/stdlib/REPL/src/REPLCompletions.jl
@@ -320,7 +320,12 @@ function complete_path(path::AbstractString, pos::Int; use_envpath=false, shell_
 end
 
 function complete_expanduser(path::AbstractString, r)
-    expanded = expanduser(path)
+    expanded =
+        try expanduser(path)
+        catch e
+            e isa ArgumentError || rethrow()
+            path
+        end
     return Completion[PathCompletion(expanded)], r, path != expanded
 end
 
diff --git a/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl b/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl
index 127d0cd88a2cf..2dc7161be99da 100644
--- a/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl
+++ b/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl
@@ -131,7 +131,7 @@ end
 """
     header(m::AbstractMenu) -> String
 
-Returns a header string to be printed above the menu.
+Return a header string to be printed above the menu.
 Defaults to "".
 """
 header(m::AbstractMenu) = ""
diff --git a/stdlib/REPL/test/lineedit.jl b/stdlib/REPL/test/lineedit.jl
index 3d68ad1316e02..649e294f7c07d 100644
--- a/stdlib/REPL/test/lineedit.jl
+++ b/stdlib/REPL/test/lineedit.jl
@@ -474,7 +474,8 @@ end
 # julia> is 6 characters + 1 character for space,
 # so the rest of the terminal is 73 characters
 #########################################################################
-let buf = IOBuffer(
+withenv("COLUMNS"=>"80") do
+    buf = IOBuffer(
         "begin\nprint(\"A very very very very very very very very very very very very ve\")\nend")
     seek(buf, 4)
     outbuf = IOBuffer()
diff --git a/stdlib/REPL/test/repl.jl b/stdlib/REPL/test/repl.jl
index 0312e59419b1b..ab25a56510262 100644
--- a/stdlib/REPL/test/repl.jl
+++ b/stdlib/REPL/test/repl.jl
@@ -35,6 +35,33 @@ function kill_timer(delay)
     return Timer(kill_test, delay)
 end
 
+## Debugging toys. Usage:
+##   stdout_read = tee_repr_stdout(stdout_read)
+##   ccall(:jl_breakpoint, Cvoid, (Any,), stdout_read)
+#function tee(f, in::IO)
+#    copy = Base.BufferStream()
+#    t = @async try
+#        while !eof(in)
+#            l = readavailable(in)
+#            f(l)
+#            write(copy, l)
+#        end
+#    catch ex
+#        if !(ex isa Base.IOError && ex.code == Base.UV_EIO)
+#            rethrow() # ignore EIO on `in` stream
+#        end
+#    finally
+#        # TODO: could we call closewrite to propagate an error, instead of always doing a clean close here?
+#        closewrite(copy)
+#    end
+#    Base.errormonitor(t)
+#    return copy
+#end
+#tee(out::IO, in::IO) = tee(l -> write(out, l), in)
+#tee_repr_stdout(io) = tee(io) do x
+#    print(repr(String(copy(x))) * "\n")
+#end
+
 # REPL tests
 function fake_repl(@nospecialize(f); options::REPL.Options=REPL.Options(confirm_exit=false))
     # Use pipes so we can easily do blocking reads
@@ -99,8 +126,8 @@ fake_repl(options = REPL.Options(confirm_exit=false,hascolor=true)) do stdin_wri
     end
 
     global inc = false
-    global b = Condition()
-    global c = Condition()
+    global b = Base.Event(true)
+    global c = Base.Event(true)
     let cmd = "\"Hello REPL\""
         write(stdin_write, "$(curmod_prefix)inc || wait($(curmod_prefix)b); r = $cmd; notify($(curmod_prefix)c); r\r")
     end
@@ -143,44 +170,46 @@ fake_repl(options = REPL.Options(confirm_exit=false,hascolor=true)) do stdin_wri
             homedir_pwd = cd(pwd, homedir())
 
             # Test `cd`'ing to an absolute path
-            write(stdin_write, ";")
+            t = @async write(stdin_write, ";")
             readuntil(stdout_read, "shell> ")
-            write(stdin_write, "cd $(escape_string(tmpdir))\n")
+            wait(t)
+            t = @async write(stdin_write, "cd $(escape_string(tmpdir))\n")
             readuntil(stdout_read, "cd $(escape_string(tmpdir))")
-            readuntil(stdout_read, tmpdir_pwd)
-            readuntil(stdout_read, "\n")
-            readuntil(stdout_read, "\n")
+            readuntil(stdout_read, tmpdir_pwd * "\n\n")
+            wait(t)
             @test samefile(".", tmpdir)
             write(stdin_write, "\b")
 
             # Test using `cd` to move to the home directory
-            write(stdin_write, ";")
+            t = @async write(stdin_write, ";")
             readuntil(stdout_read, "shell> ")
-            write(stdin_write, "cd\n")
-            readuntil(stdout_read, homedir_pwd)
-            readuntil(stdout_read, "\n")
-            readuntil(stdout_read, "\n")
+            wait(t)
+            t = @async write(stdin_write, "cd\n")
+            readuntil(stdout_read, homedir_pwd * "\n\n")
+            wait(t)
             @test samefile(".", homedir_pwd)
-            write(stdin_write, "\b")
+            t1 = @async write(stdin_write, "\b")
 
             # Test using `-` to jump backward to tmpdir
-            write(stdin_write, ";")
+            t = @async write(stdin_write, ";")
             readuntil(stdout_read, "shell> ")
-            write(stdin_write, "cd -\n")
-            readuntil(stdout_read, tmpdir_pwd)
-            readuntil(stdout_read, "\n")
-            readuntil(stdout_read, "\n")
+            wait(t1)
+            wait(t)
+            t = @async write(stdin_write, "cd -\n")
+            readuntil(stdout_read, tmpdir_pwd * "\n\n")
+            wait(t)
             @test samefile(".", tmpdir)
-            write(stdin_write, "\b")
+            t1 = @async write(stdin_write, "\b")
 
             # Test using `~` (Base.expanduser) in `cd` commands
             if !Sys.iswindows()
-                write(stdin_write, ";")
+                t = @async write(stdin_write, ";")
                 readuntil(stdout_read, "shell> ")
-                write(stdin_write, "cd ~\n")
-                readuntil(stdout_read, homedir_pwd)
-                readuntil(stdout_read, "\n")
-                readuntil(stdout_read, "\n")
+                wait(t1)
+                wait(t)
+                t = @async write(stdin_write, "cd ~\n")
+                readuntil(stdout_read, homedir_pwd * "\n\n")
+                wait(t)
                 @test samefile(".", homedir_pwd)
                 write(stdin_write, "\b")
             end
@@ -203,9 +232,10 @@ fake_repl(options = REPL.Options(confirm_exit=false,hascolor=true)) do stdin_wri
 
     # issue #20771
     let s
-        write(stdin_write, ";")
+        t = @async write(stdin_write, ";")
         readuntil(stdout_read, "shell> ")
-        write(stdin_write, "'\n") # invalid input
+        wait(t)
+        t = @async write(stdin_write, "'\n") # invalid input
         s = readuntil(stdout_read, "\n")
         @test occursin("shell> ", s) # check for the echo of the prompt
         @test occursin("'", s) # check for the echo of the input
@@ -213,26 +243,28 @@ fake_repl(options = REPL.Options(confirm_exit=false,hascolor=true)) do stdin_wri
         @test startswith(s, "\e[0mERROR: unterminated single quote\nStacktrace:\n  [1] ") ||
               startswith(s, "\e[0m\e[1m\e[91mERROR: \e[39m\e[22m\e[91munterminated single quote\e[39m\nStacktrace:\n  [1] ")
         write(stdin_write, "\b")
+        wait(t)
     end
 
     # issue #27293
     if Sys.isunix()
         let s, old_stdout = stdout
-            write(stdin_write, ";")
+            t = @async write(stdin_write, ";")
             readuntil(stdout_read, "shell> ")
-            write(stdin_write, "echo ~")
-            s = readuntil(stdout_read, "~")
+            wait(t)
 
             proc_stdout_read, proc_stdout = redirect_stdout()
             get_stdout = @async read(proc_stdout_read, String)
             try
-                write(stdin_write, "\n")
+                t = @async write(stdin_write, "echo ~\n")
+                readuntil(stdout_read, "~")
                 readuntil(stdout_read, "\n")
-                s = readuntil(stdout_read, "\n")
+                s = readuntil(stdout_read, "\n") # the child has exited
+                wait(t)
             finally
                 redirect_stdout(old_stdout)
             end
-            @test s == "\e[0m" # the child has exited
+            @test s == "\e[0m"
             close(proc_stdout)
             # check for the correct, expanded response
             @test occursin(expanduser("~"), fetch(get_stdout))
@@ -261,28 +293,33 @@ fake_repl(options = REPL.Options(confirm_exit=false,hascolor=true)) do stdin_wri
     # issue #10120
     # ensure that command quoting works correctly
     let s, old_stdout = stdout
-        write(stdin_write, ";")
+        t = @async write(stdin_write, ";")
         readuntil(stdout_read, "shell> ")
-        Base.print_shell_escaped(stdin_write, Base.julia_cmd().exec..., special=Base.shell_special)
-        write(stdin_write, """ -e "println(\\"HI\\")\"""")
+        wait(t)
+        t = @async begin
+            Base.print_shell_escaped(stdin_write, Base.julia_cmd().exec..., special=Base.shell_special)
+            write(stdin_write, """ -e "println(\\"HI\\")\"""")
+        end
         readuntil(stdout_read, ")\"")
+        wait(t)
         proc_stdout_read, proc_stdout = redirect_stdout()
         get_stdout = @async read(proc_stdout_read, String)
         try
-            write(stdin_write, '\n')
-            s = readuntil(stdout_read, "\n", keep=true)
-            if s == "\n"
+            t = @async write(stdin_write, '\n')
+            s = readuntil(stdout_read, "\n")
+            if s == ""
                 # if shell width is precisely the text width,
                 # we may print some extra characters to fix the cursor state
-                s = readuntil(stdout_read, "\n", keep=true)
+                s = readuntil(stdout_read, "\n")
                 @test occursin("shell> ", s)
-                s = readuntil(stdout_read, "\n", keep=true)
-                @test s == "\r\r\n"
+                s = readuntil(stdout_read, "\n")
+                @test s == "\r\r"
             else
                 @test occursin("shell> ", s)
             end
-            s = readuntil(stdout_read, "\n", keep=true)
-            @test s == "\e[0m\n" # the child has exited
+            s = readuntil(stdout_read, "\n")
+            @test s == "\e[0m" # the child printed nothing
+            wait(t)
         finally
             redirect_stdout(old_stdout)
         end
@@ -699,17 +736,20 @@ fake_repl() do stdin_write, stdout_read, repl
         REPL.run_repl(repl)
     end
 
-    global c = Condition()
-    sendrepl2(cmd) = write(stdin_write, "$cmd\n notify($(curmod_prefix)c)\n")
+    global c = Base.Event(true)
+    function sendrepl2(cmd)
+        t = @async readuntil(stdout_read, "\"done\"\n\n")
+        write(stdin_write, "$cmd\n notify($(curmod_prefix)c); \"done\"\n")
+        wait(c)
+        fetch(t)
+    end
 
     # Test removal of prefix in single statement paste
     sendrepl2("\e[200~julia> A = 2\e[201~\n")
-    wait(c)
     @test Main.A == 2
 
     # Test removal of prefix in single statement paste
     sendrepl2("\e[200~In [12]: A = 2.2\e[201~\n")
-    wait(c)
     @test Main.A == 2.2
 
     # Test removal of prefix in multiple statement paste
@@ -722,7 +762,6 @@ fake_repl() do stdin_write, stdout_read, repl
 
                     julia> A = 3\e[201~
              """)
-    wait(c)
     @test Main.A == 3
     @test Base.invokelatest(Main.foo, 4)
     @test Base.invokelatest(Main.T17599, 3).a == 3
@@ -735,26 +774,22 @@ fake_repl() do stdin_write, stdout_read, repl
             julia> A = 4
             4\e[201~
              """)
-    wait(c)
     @test Main.A == 4
     @test Base.invokelatest(Main.goo, 4) == 5
 
     # Test prefix removal only active in bracket paste mode
     sendrepl2("julia = 4\n julia> 3 && (A = 1)\n")
-    wait(c)
     @test Main.A == 1
 
     # Test that indentation corresponding to the prompt is removed
-    sendrepl2("""\e[200~julia> begin\n           α=1\n           β=2\n       end\n\e[201~""")
-    wait(c)
-    readuntil(stdout_read, "begin")
-    @test readuntil(stdout_read, "end", keep=true) == "\n\r\e[7C    α=1\n\r\e[7C    β=2\n\r\e[7Cend"
+    s = sendrepl2("""\e[200~julia> begin\n           α=1\n           β=2\n       end\n\e[201~""")
+    s2 = split(rsplit(s, "begin", limit=2)[end], "end", limit=2)[1]
+    @test s2 == "\n\r\e[7C    α=1\n\r\e[7C    β=2\n\r\e[7C"
+
     # for incomplete input (`end` below is added after the end of bracket paste)
-    sendrepl2("""\e[200~julia> begin\n           α=1\n           β=2\n\e[201~end""")
-    wait(c)
-    readuntil(stdout_read, "begin")
-    readuntil(stdout_read, "begin")
-    @test readuntil(stdout_read, "end", keep=true) == "\n\r\e[7C    α=1\n\r\e[7C    β=2\n\r\e[7Cend"
+    s = sendrepl2("""\e[200~julia> begin\n           α=1\n           β=2\n\e[201~end""")
+    s2 = split(rsplit(s, "begin", limit=2)[end], "end", limit=2)[1]
+    @test s2 == "\n\r\e[7C    α=1\n\r\e[7C    β=2\n\r\e[7C"
 
     # Test switching repl modes
     redirect_stdout(devnull) do # to suppress "foo" echoes
@@ -779,7 +814,6 @@ fake_repl() do stdin_write, stdout_read, repl
             julia> B = 2
             2\e[201~
              """)
-    wait(c)
     @test Main.A == 1
     @test Main.B == 2
     end # redirect_stdout
@@ -817,13 +851,13 @@ fake_repl() do stdin_write, stdout_read, repl
 
     repltask = @async REPL.run_interface(repl.t, LineEdit.ModalInterface(Any[panel, search_prompt]))
 
-    write(stdin_write,"a\n")
+    write(stdin_write, "a\n")
     @test wait(c) == "a"
     # Up arrow enter should recall history even at the start
-    write(stdin_write,"\e[A\n")
+    write(stdin_write, "\e[A\n")
     @test wait(c) == "a"
     # And again
-    write(stdin_write,"\e[A\n")
+    write(stdin_write, "\e[A\n")
     @test wait(c) == "a"
     # Close REPL ^D
     write(stdin_write, '\x04')
@@ -849,7 +883,7 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
 
         output = readuntil(ptm, ' ', keep=true)
         if Sys.iswindows()
-        # Our fake pty is actually a pipe, and thus lacks the input echo feature of posix
+            # Our fake pty is actually a pipe, and thus lacks the input echo feature of posix
             @test output == "1\n\njulia> "
         else
             @test output == "1\r\nexit()\r\n1\r\n\r\njulia> "
@@ -1085,16 +1119,18 @@ fake_repl() do stdin_write, stdout_read, repl
     end
 
     @eval Main module TestShowTypeREPL; export TypeA; struct TypeA end; end
-    write(stdin_write, "TestShowTypeREPL.TypeA\n")
-    @test endswith(readline(stdout_read), "\r\e[7CTestShowTypeREPL.TypeA\r\e[29C")
-    readline(stdout_read)
-    @test readline(stdout_read) == ""
+    t = @async write(stdin_write, "TestShowTypeREPL.TypeA\n")
+    s = readuntil(stdout_read, "\n\n")
+    s2 = rsplit(s, "\n", limit=2)[end]
+    @test s2 == "\e[0mMain.TestShowTypeREPL.TypeA"
+    wait(t)
     @eval Main using .TestShowTypeREPL
     readuntil(stdout_read, "julia> ", keep=true)
-    write(stdin_write, "TypeA\n")
-    @test endswith(readline(stdout_read), "\r\e[7CTypeA\r\e[12C")
-    readline(stdout_read)
-    @test readline(stdout_read) == ""
+    t = @async write(stdin_write, "TypeA\n")
+    s = readuntil(stdout_read, "\n\n")
+    s2 = rsplit(s, "\n", limit=2)[end]
+    @test s2 == "\e[0mTypeA"
+    wait(t)
 
     # Close REPL ^D
     readuntil(stdout_read, "julia> ", keep=true)
@@ -1112,19 +1148,18 @@ fake_repl() do stdin_write, stdout_read, repl
         REPL.run_repl(repl)
     end
 
-    write(stdin_write, "(123, Base.Fix1)\n")
-    @test occursin("julia> ", split(readline(stdout_read), "Base.Fix1")[2])
-    @test occursin("(123, Base.Fix1)", readline(stdout_read))
-    readline(stdout_read)
+    write(stdin_write, " ( 123 , Base.Fix1 , ) \n")
+    s = readuntil(stdout_read, "\n\n")
+    @test endswith(s, "(123, Base.Fix1)")
 
     repl.mistate.active_module = Base # simulate activate_module(Base)
-    write(stdin_write, "(456, Base.Fix2)\n")
-    @test occursin("(Base) julia> ", split(readline(stdout_read), "Base.Fix2")[2])
+    write(stdin_write, " ( 456 , Base.Fix2 , ) \n")
+    s = readuntil(stdout_read, "\n\n")
     # ".Base" prefix not shown here
-    @test occursin("(456, Fix2)", readline(stdout_read))
-    readline(stdout_read)
+    @test endswith(s, "(456, Fix2)")
 
     # Close REPL ^D
+    readuntil(stdout_read, "julia> ", keep=true)
     write(stdin_write, '\x04')
     Base.wait(repltask)
 end
@@ -1253,15 +1288,18 @@ fake_repl() do stdin_write, stdout_read, repl
     repltask = @async begin
         REPL.run_repl(repl)
     end
-    write(stdin_write, "Expr(:call, GlobalRef(Base.Math, :float), Core.SlotNumber(1))\n")
+    t = @async write(stdin_write, "Expr(:call, GlobalRef(Base.Math, :float), Core.SlotNumber(1))\n")
     readline(stdout_read)
-    @test readline(stdout_read) == "\e[0m:(Base.Math.float(_1))"
-    @test readline(stdout_read) == ""
+    s = readuntil(stdout_read, "\n\n")
+    @test endswith(s, "\e[0m:(Base.Math.float(_1))")
+    wait(t)
+
     readuntil(stdout_read, "julia> ", keep=true)
-    write(stdin_write, "ans\n")
+    t = @async write(stdin_write, "ans\n")
     readline(stdout_read)
-    @test readline(stdout_read) == "\e[0m:(Base.Math.float(_1))"
-    @test readline(stdout_read) == ""
+    s = readuntil(stdout_read, "\n\n")
+    @test endswith(s, "\e[0m:(Base.Math.float(_1))")
+    wait(t)
     readuntil(stdout_read, "julia> ", keep=true)
     write(stdin_write, '\x04')
     Base.wait(repltask)
@@ -1272,17 +1310,21 @@ fake_repl() do stdin_write, stdout_read, repl
     repltask = @async begin
         REPL.run_repl(repl)
     end
-    write(stdin_write, "struct Errs end\n")
-    readline(stdout_read)
+    t = @async write(stdin_write, "struct Errs end\n")
+    readuntil(stdout_read, "\e[0m")
     readline(stdout_read)
+    wait(t)
     readuntil(stdout_read, "julia> ", keep=true)
-    write(stdin_write, "Base.show(io::IO, ::Errs) = throw(Errs())\n")
+    t = @async write(stdin_write, "Base.show(io::IO, ::Errs) = throw(Errs())\n")
     readline(stdout_read)
+    readuntil(stdout_read, "\e[0m")
     readline(stdout_read)
+    wait(t)
     readuntil(stdout_read, "julia> ", keep=true)
-    write(stdin_write, "Errs()\n")
-    readline(stdout_read)
+    t = @async write(stdin_write, "Errs()\n")
     readline(stdout_read)
+    readuntil(stdout_read, "\n\n")
+    wait(t)
     readuntil(stdout_read, "julia> ", keep=true)
     write(stdin_write, '\x04')
     wait(repltask)
@@ -1296,7 +1338,8 @@ fake_repl() do stdin_write, stdout_read, repl
     end
     write(stdin_write, "?;\n")
     readline(stdout_read)
-    @test endswith(readline(stdout_read), "search: ;")
+    s = readline(stdout_read)
+    @test endswith(s, "search: ;")
     readuntil(stdout_read, "julia> ", keep=true)
     write(stdin_write, '\x04')
     Base.wait(repltask)
@@ -1437,14 +1480,14 @@ fake_repl() do stdin_write, stdout_read, repl
         REPL.run_repl(repl)
     end
     # initialize `err` to `nothing`
+    t = @async (readline(stdout_read); readuntil(stdout_read, "\e[0m\n"))
     write(stdin_write, "global err = nothing\n")
-    readline(stdout_read)
-    readline(stdout_read) == "\e[0m"
+    wait(t)
     readuntil(stdout_read, "julia> ", keep=true)
     # generate top-level error
     write(stdin_write, "foobar\n")
     readline(stdout_read)
-    @test readline(stdout_read) == "\e[0mERROR: UndefVarError: foobar not defined"
+    @test readline(stdout_read) == "\e[0mERROR: UndefVarError: `foobar` not defined"
     @test readline(stdout_read) == ""
     readuntil(stdout_read, "julia> ", keep=true)
     # check that top-level error did not change `err`
@@ -1454,18 +1497,21 @@ fake_repl() do stdin_write, stdout_read, repl
     readuntil(stdout_read, "julia> ", keep=true)
     # generate deeper error
     write(stdin_write, "foo() = foobar\n")
+    readuntil(stdout_read, "\n\e[0m", keep=true)
     readline(stdout_read)
     readuntil(stdout_read, "julia> ", keep=true)
     write(stdin_write, "foo()\n")
     readline(stdout_read)
-    @test readline(stdout_read) == "\e[0mERROR: UndefVarError: foobar not defined"
+    @test readline(stdout_read) == "\e[0mERROR: UndefVarError: `foobar` not defined"
     readuntil(stdout_read, "julia> ", keep=true)
     # check that deeper error did set `err`
     write(stdin_write, "err\n")
     readline(stdout_read)
     @test readline(stdout_read) == "\e[0m1-element ExceptionStack:"
-    @test readline(stdout_read) == "UndefVarError: foobar not defined"
+    @test readline(stdout_read) == "UndefVarError: `foobar` not defined"
     @test readline(stdout_read) == "Stacktrace:"
+    readuntil(stdout_read, "\n\n", keep=true)
+    readuntil(stdout_read, "julia> ", keep=true)
     write(stdin_write, '\x04')
     Base.wait(repltask)
 end
@@ -1565,24 +1611,35 @@ fake_repl() do stdin_write, stdout_read, repl
 
     REPL.ipython_mode!(repl, backend)
 
-    global c = Condition()
-    sendrepl2(cmd) = write(stdin_write, "$cmd\n notify($(curmod_prefix)c)\n")
+    global c = Base.Event(true)
+    function sendrepl2(cmd, txt)
+        t = @async write(stdin_write, "$cmd\n notify($(curmod_prefix)c); \"done\"\n")
+        r = readuntil(stdout_read, txt, keep=true)
+        readuntil(stdout_read, "\"done\"\n\n", keep=true)
+        wait(c)
+        wait(t)
+        return r
+    end
 
-    sendrepl2("\"z\" * \"z\"\n")
-    wait(c)
-    s = String(readuntil(stdout_read, "\"zz\""; keep=true))
+    s = sendrepl2("\"z\" * \"z\"\n", "\"zz\"")
     @test contains(s, "In [1]")
-    @test contains(s, "Out[1]: \"zz\"")
-
-    sendrepl2("\"y\" * \"y\"\n")
-    wait(c)
-    s = String(readuntil(stdout_read, "\"yy\""; keep=true))
-    @test contains(s, "Out[3]: \"yy\"")
-
-    sendrepl2("Out[1] * Out[3]\n")
-    wait(c)
-    s = String(readuntil(stdout_read, "\"zzyy\""; keep=true))
-    @test contains(s, "Out[5]: \"zzyy\"")
+    @test endswith(s, "Out[1]: \"zz\"")
+
+    s = sendrepl2("\"y\" * \"y\"\n", "\"yy\"")
+    @test endswith(s, "Out[3]: \"yy\"")
+
+    s = sendrepl2("Out[1] * Out[3]\n", "\"zzyy\"")
+    @test endswith(s, "Out[5]: \"zzyy\"")
+
+    # test a top-level expression
+    s = sendrepl2("import REPL\n", "In [8]")
+    @test !contains(s, "ERROR")
+    @test !contains(s, "[6]")
+    @test !contains(s, "Out[7]:")
+    @test contains(s, "In [7]: ")
+    @test contains(s, "import REPL")
+    s = sendrepl2("REPL\n", "In [10]")
+    @test contains(s, "Out[9]: REPL")
 
     write(stdin_write, '\x04')
     Base.wait(repltask)
diff --git a/stdlib/REPL/test/replcompletions.jl b/stdlib/REPL/test/replcompletions.jl
index 721ea06854a80..21c8743f90802 100644
--- a/stdlib/REPL/test/replcompletions.jl
+++ b/stdlib/REPL/test/replcompletions.jl
@@ -1099,6 +1099,9 @@ let s, c, r
             s = "\"~"
             @test "tmpfoobar/" in c
             c,r = test_complete(s)
+            s = "\"~user"
+            c, r = test_complete(s)
+            @test isempty(c)
             rm(dir)
         end
     end
diff --git a/stdlib/Random/src/Random.jl b/stdlib/Random/src/Random.jl
index b9adb5ae39f54..95125422eeee5 100644
--- a/stdlib/Random/src/Random.jl
+++ b/stdlib/Random/src/Random.jl
@@ -434,4 +434,10 @@ true
 """
 seed!(rng::AbstractRNG, ::Nothing) = seed!(rng)
 
+# Randomize quicksort pivot selection. This code is here because of bootstrapping:
+# we need to sort things before we load this standard library.
+# TODO move this into Sort.jl
+Base.delete_method(only(methods(Base.Sort.select_pivot)))
+Base.Sort.select_pivot(lo::Integer, hi::Integer) = rand(lo:hi)
+
 end # module
diff --git a/stdlib/Random/src/generation.jl b/stdlib/Random/src/generation.jl
index 61e722a7719db..6fe5b585b088f 100644
--- a/stdlib/Random/src/generation.jl
+++ b/stdlib/Random/src/generation.jl
@@ -210,7 +210,7 @@ SamplerRangeFast(r::AbstractUnitRange{T}) where T<:BitInteger =
     SamplerRangeFast(r, uint_sup(T))
 
 function SamplerRangeFast(r::AbstractUnitRange{T}, ::Type{U}) where {T,U}
-    isempty(r) && throw(ArgumentError("range must be non-empty"))
+    isempty(r) && throw(ArgumentError("collection must be non-empty"))
     m = (last(r) - first(r)) % unsigned(T) % U # % unsigned(T) to not propagate sign bit
     bw = (sizeof(U) << 3 - leading_zeros(m)) % UInt # bit-width
     mask = ((1 % U) << bw) - (1 % U)
@@ -284,7 +284,7 @@ SamplerRangeInt(r::AbstractUnitRange{T}) where T<:BitInteger =
     SamplerRangeInt(r, uint_sup(T))
 
 function SamplerRangeInt(r::AbstractUnitRange{T}, ::Type{U}) where {T,U}
-    isempty(r) && throw(ArgumentError("range must be non-empty"))
+    isempty(r) && throw(ArgumentError("collection must be non-empty"))
     a = first(r)
     m = (last(r) - first(r)) % unsigned(T) % U
     k = m + one(U)
@@ -330,7 +330,7 @@ struct SamplerRangeNDL{U<:Unsigned,T} <: Sampler{T}
 end
 
 function SamplerRangeNDL(r::AbstractUnitRange{T}) where {T}
-    isempty(r) && throw(ArgumentError("range must be non-empty"))
+    isempty(r) && throw(ArgumentError("collection must be non-empty"))
     a = first(r)
     U = uint_sup(T)
     s = (last(r) - first(r)) % unsigned(T) % U + one(U) # overflow ok
@@ -369,7 +369,7 @@ end
 function SamplerBigInt(::Type{RNG}, r::AbstractUnitRange{BigInt}, N::Repetition=Val(Inf)
                        ) where {RNG<:AbstractRNG}
     m = last(r) - first(r)
-    m.size < 0 && throw(ArgumentError("range must be non-empty"))
+    m.size < 0 && throw(ArgumentError("collection must be non-empty"))
     nlimbs = Int(m.size)
     hm = nlimbs == 0 ? Limb(0) : GC.@preserve m unsafe_load(m.d, nlimbs)
     highsp = Sampler(RNG, Limb(0):hm, N)
diff --git a/stdlib/Serialization/src/Serialization.jl b/stdlib/Serialization/src/Serialization.jl
index 98bf7d447b3ec..ebf3bb1319b31 100644
--- a/stdlib/Serialization/src/Serialization.jl
+++ b/stdlib/Serialization/src/Serialization.jl
@@ -520,10 +520,11 @@ function serialize_typename(s::AbstractSerializer, t::Core.TypeName)
         serialize(s, t.mt.name)
         serialize(s, collect(Base.MethodList(t.mt)))
         serialize(s, t.mt.max_args)
-        if isdefined(t.mt, :kwsorter)
-            serialize(s, t.mt.kwsorter)
-        else
+        kws = collect(methods(Core.kwcall, (Any, t.wrapper, Vararg)))
+        if isempty(kws)
             writetag(s.io, UNDEFREF_TAG)
+        else
+            serialize(s, kws)
         end
     else
         writetag(s.io, UNDEFREF_TAG)
@@ -1355,7 +1356,15 @@ function deserialize_typename(s::AbstractSerializer, number)
         if tag != UNDEFREF_TAG
             kws = handle_deserialize(s, tag)
             if makenew
-                tn.mt.kwsorter = kws
+                if kws isa Vector{Method}
+                    for def in kws
+                        kwmt = typeof(Core.kwcall).name.mt
+                        ccall(:jl_method_table_insert, Cvoid, (Any, Any, Ptr{Cvoid}), mt, def, C_NULL)
+                    end
+                else
+                    # old object format -- try to forward from old to new
+                    @eval Core.kwcall(kwargs, f::$ty, args...) = $kws(kwargs, f, args...)
+                end
             end
         end
     elseif makenew
diff --git a/stdlib/Serialization/test/runtests.jl b/stdlib/Serialization/test/runtests.jl
index 0d438040a4cd0..46749d4375538 100644
--- a/stdlib/Serialization/test/runtests.jl
+++ b/stdlib/Serialization/test/runtests.jl
@@ -317,18 +317,23 @@ main_ex = quote
     using Serialization
     $create_serialization_stream() do s
         local g() = :magic_token_anon_fun_test
+        local gkw(; kw=:thekw) = kw
         serialize(s, g)
         serialize(s, g)
+        serialize(s, gkw)
 
         seekstart(s)
         ds = Serializer(s)
         local g2 = deserialize(ds)
-        Base.invokelatest() do
-            $Test.@test g2 !== g
-            $Test.@test g2() === :magic_token_anon_fun_test
-            $Test.@test g2() === :magic_token_anon_fun_test
-            $Test.@test deserialize(ds) === g2
-        end
+        @test g2 !== g
+        $Test.@test Base.invokelatest(g2) === :magic_token_anon_fun_test
+        $Test.@test Base.invokelatest(g2) === :magic_token_anon_fun_test
+        deserialize(ds) === g2
+
+        local gkw2 = deserialize(s)
+        $Test.@test gkw2 !== gkw
+        $Test.@test Base.invokelatest(gkw2) === :thekw
+        $Test.@test Base.invokelatest(gkw2, kw="kwtest") === "kwtest"
 
         # issue #21793
         y = x -> (() -> x)
@@ -336,10 +341,10 @@ main_ex = quote
         serialize(s, y)
         seekstart(s)
         y2 = deserialize(s)
-        Base.invokelatest() do
+        $Test.@test Base.invokelatest() do
             x2 = y2(2)
-            $Test.@test x2() == 2
-        end
+            x2()
+        end === 2
     end
 end
 # This needs to be run on `Main` since the serializer treats it differently.
diff --git a/stdlib/SharedArrays/src/SharedArrays.jl b/stdlib/SharedArrays/src/SharedArrays.jl
index 90de5fbac75be..f9f701c61fcea 100644
--- a/stdlib/SharedArrays/src/SharedArrays.jl
+++ b/stdlib/SharedArrays/src/SharedArrays.jl
@@ -328,7 +328,7 @@ procs(S::SharedArray) = S.pids
 """
     indexpids(S::SharedArray)
 
-Returns the current worker's index in the list of workers
+Return the current worker's index in the list of workers
 mapping the `SharedArray` (i.e. in the same list returned by `procs(S)`), or
 0 if the `SharedArray` is not mapped locally.
 """
@@ -337,7 +337,7 @@ indexpids(S::SharedArray) = S.pidx
 """
     sdata(S::SharedArray)
 
-Returns the actual `Array` object backing `S`.
+Return the actual `Array` object backing `S`.
 """
 sdata(S::SharedArray) = S.s
 sdata(A::AbstractArray) = A
@@ -345,7 +345,7 @@ sdata(A::AbstractArray) = A
 """
     localindices(S::SharedArray)
 
-Returns a range describing the "default" indices to be handled by the
+Return a range describing the "default" indices to be handled by the
 current process.  This range should be interpreted in the sense of
 linear indexing, i.e., as a sub-range of `1:length(S)`.  In
 multi-process contexts, returns an empty range in the parent process
diff --git a/stdlib/Sockets/src/IPAddr.jl b/stdlib/Sockets/src/IPAddr.jl
index 1792008620981..04710e400fe87 100644
--- a/stdlib/Sockets/src/IPAddr.jl
+++ b/stdlib/Sockets/src/IPAddr.jl
@@ -31,7 +31,7 @@ end
 """
     IPv4(host::Integer) -> IPv4
 
-Returns an IPv4 object from ip address `host` formatted as an [`Integer`](@ref).
+Return an IPv4 object from ip address `host` formatted as an [`Integer`](@ref).
 
 # Examples
 ```jldoctest
@@ -84,7 +84,7 @@ end
 """
     IPv6(host::Integer) -> IPv6
 
-Returns an IPv6 object from ip address `host` formatted as an [`Integer`](@ref).
+Return an IPv6 object from ip address `host` formatted as an [`Integer`](@ref).
 
 # Examples
 ```jldoctest
diff --git a/stdlib/Sockets/src/Sockets.jl b/stdlib/Sockets/src/Sockets.jl
index 9f4678fa3e06b..33767c2153211 100644
--- a/stdlib/Sockets/src/Sockets.jl
+++ b/stdlib/Sockets/src/Sockets.jl
@@ -727,7 +727,7 @@ function listenany(host::IPAddr, default_port; backlog::Integer=BACKLOG_DEFAULT)
             return (addr.port, sock)
         end
         close(sock)
-        addr = InetAddr(addr.host, addr.port + 1)
+        addr = InetAddr(addr.host, addr.port + UInt16(1))
         if addr.port == default_port
             error("no ports available")
         end
diff --git a/stdlib/SparseArrays.version b/stdlib/SparseArrays.version
index 060aac7e89342..190e1d8a7be1e 100644
--- a/stdlib/SparseArrays.version
+++ b/stdlib/SparseArrays.version
@@ -1,4 +1,4 @@
 SPARSEARRAYS_BRANCH = main
-SPARSEARRAYS_SHA1 = 1bae96dc8f9a8ca8b7879eef4cf71e186598e982
+SPARSEARRAYS_SHA1 = 3c2b65f9ba6afb3c6c5dc76c03d897a6647e9dd7
 SPARSEARRAYS_GIT_URL := https://github.com/JuliaSparse/SparseArrays.jl.git
 SPARSEARRAYS_TAR_URL = https://api.github.com/repos/JuliaSparse/SparseArrays.jl/tarball/$1
diff --git a/stdlib/Statistics.version b/stdlib/Statistics.version
index a9830fcd8759b..362aec5bdc1f3 100644
--- a/stdlib/Statistics.version
+++ b/stdlib/Statistics.version
@@ -1,4 +1,4 @@
 STATISTICS_BRANCH = master
-STATISTICS_SHA1 = 0588f2cf9e43f9f72af5802feaf0af4b652c3257
+STATISTICS_SHA1 = 20fbe576ec406180b1dddf4c7fbe16458a7aef21
 STATISTICS_GIT_URL := https://github.com/JuliaStats/Statistics.jl.git
 STATISTICS_TAR_URL = https://api.github.com/repos/JuliaStats/Statistics.jl/tarball/$1
diff --git a/stdlib/Tar.version b/stdlib/Tar.version
index 8af321176ea5d..a6b7cf053523d 100644
--- a/stdlib/Tar.version
+++ b/stdlib/Tar.version
@@ -1,4 +1,4 @@
 TAR_BRANCH = master
-TAR_SHA1 = 951955b7fbe0d79e4e8a1405b6816e4081a6976d
+TAR_SHA1 = 6bfc11475a80b752e70518047c3c3463f56bbc1d
 TAR_GIT_URL := https://github.com/JuliaIO/Tar.jl.git
 TAR_TAR_URL = https://api.github.com/repos/JuliaIO/Tar.jl/tarball/$1
diff --git a/stdlib/Test/src/Test.jl b/stdlib/Test/src/Test.jl
index 8a3d028d51769..c19d131781b8f 100644
--- a/stdlib/Test/src/Test.jl
+++ b/stdlib/Test/src/Test.jl
@@ -1689,7 +1689,7 @@ end
 """
     get_testset_depth()
 
-Returns the number of active test sets, not including the default test set
+Return the number of active test sets, not including the default test set
 """
 function get_testset_depth()
     testsets = get(task_local_storage(), :__BASETESTNEXT__, AbstractTestSet[])
@@ -1819,7 +1819,7 @@ end
                                       ambiguous_bottom=false,
                                       allowed_undefineds=nothing)
 
-Returns a vector of `(Method,Method)` pairs of ambiguous methods
+Return a vector of `(Method,Method)` pairs of ambiguous methods
 defined in the specified modules.
 Use `recursive=true` to test in all submodules.
 
@@ -1896,7 +1896,7 @@ end
 """
     detect_unbound_args(mod1, mod2...; recursive=false, allowed_undefineds=nothing)
 
-Returns a vector of `Method`s which may have unbound type parameters.
+Return a vector of `Method`s which may have unbound type parameters.
 Use `recursive=true` to test in all submodules.
 
 By default, any undefined symbols trigger a warning. This warning can
diff --git a/stdlib/Unicode/src/Unicode.jl b/stdlib/Unicode/src/Unicode.jl
index 0467a8d50aa6b..58b9ab41b790a 100644
--- a/stdlib/Unicode/src/Unicode.jl
+++ b/stdlib/Unicode/src/Unicode.jl
@@ -120,7 +120,7 @@ normalize(s::AbstractString; kwargs...) = Base.Unicode.normalize(s; kwargs...)
 """
     Unicode.isassigned(c) -> Bool
 
-Returns `true` if the given char or integer is an assigned Unicode code point.
+Return `true` if the given char or integer is an assigned Unicode code point.
 
 # Examples
 ```jldoctest
@@ -136,7 +136,7 @@ isassigned(c) = Base.Unicode.isassigned(c)
 """
     graphemes(s::AbstractString) -> GraphemeIterator
 
-Returns an iterator over substrings of `s` that correspond to the extended graphemes in the
+Return an iterator over substrings of `s` that correspond to the extended graphemes in the
 string, as defined by Unicode UAX #29. (Roughly, these are what users would perceive as
 single characters, even though they may contain more than one codepoint; for example a
 letter combined with an accent mark is a single grapheme.)
diff --git a/stdlib/Zlib_jll/Project.toml b/stdlib/Zlib_jll/Project.toml
index 77e1da5f9c22e..575863062d8bb 100644
--- a/stdlib/Zlib_jll/Project.toml
+++ b/stdlib/Zlib_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "Zlib_jll"
 uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
-version = "1.2.12+3"
+version = "1.2.13+0"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/Zlib_jll/test/runtests.jl b/stdlib/Zlib_jll/test/runtests.jl
index cc9e64188a0aa..f04f9c70a7054 100644
--- a/stdlib/Zlib_jll/test/runtests.jl
+++ b/stdlib/Zlib_jll/test/runtests.jl
@@ -3,5 +3,5 @@
 using Test, Zlib_jll
 
 @testset "Zlib_jll" begin
-    @test VersionNumber(unsafe_string(ccall((:zlibVersion, libz), Cstring, ()))) == v"1.2.12"
+    @test VersionNumber(unsafe_string(ccall((:zlibVersion, libz), Cstring, ()))) == v"1.2.13"
 end
diff --git a/test/abstractarray.jl b/test/abstractarray.jl
index 604470b50e686..ba7aa8a14d256 100644
--- a/test/abstractarray.jl
+++ b/test/abstractarray.jl
@@ -525,10 +525,6 @@ function test_primitives(::Type{T}, shape, ::Type{TestAbstractArray}) where T
     @test_throws MethodError convert(Union{}, X)
 end
 
-@testset "CanonicalIndexError is a Exception" begin
-    @test Base.CanonicalIndexError <: Exception
-end
-
 mutable struct TestThrowNoGetindex{T} <: AbstractVector{T} end
 @testset "ErrorException if getindex is not defined" begin
     Base.length(::TestThrowNoGetindex) = 2
diff --git a/test/bitarray.jl b/test/bitarray.jl
index c1c596dc5d7d6..05abd610682a2 100644
--- a/test/bitarray.jl
+++ b/test/bitarray.jl
@@ -1494,6 +1494,51 @@ timesofar("reductions")
         C17970 = map(x -> x ? false : true, A17970)
         @test C17970::BitArray{1} == map(~, A17970)
     end
+
+    #=
+    |<----------------dest----------(original_tail)->|
+    |<------------------b2(l)------>|    extra_l     |
+    |<------------------b3(l)------>|
+    |<------------------b4(l+extra_l)--------------->|
+    |<--------------desk_inbetween-------->| extra÷2 |
+    =#
+    @testset "Issue #47011, map! over unequal length bitarray" begin
+        for l = [0, 1, 63, 64, 65, 127, 128, 129, 255, 256, 257, 6399, 6400, 6401]
+            for extra_l = [10, 63, 64, 65, 127, 128, 129, 255, 256, 257, 6399, 6400, 6401]
+
+                dest = bitrand(l+extra_l)
+                b2 = bitrand(l)
+                original_tail = last(dest, extra_l)
+                for op in (!, ~)
+                    map!(op, dest, b2)
+                    @test first(dest, l) == map(op, b2)
+                    # check we didn't change bits we're not suppose to
+                    @test last(dest, extra_l) == original_tail
+                end
+
+                b3 = bitrand(l)
+                b4 = bitrand(l+extra_l)
+                # when dest is longer than one source but shorter than the other
+                dest_inbetween = bitrand(l + extra_l÷2)
+                original_tail_inbetween = last(dest_inbetween, extra_l÷2)
+                for op in (|, ⊻)
+                    map!(op, dest, b2, b3)
+                    @test first(dest, l) == map(op, b2, b3)
+                    # check we didn't change bits we're not suppose to
+                    @test last(dest, extra_l) == original_tail
+
+                    map!(op, dest, b2, b4)
+                    @test first(dest, l) == map(op, b2, b4)
+                    # check we didn't change bits we're not suppose to
+                    @test last(dest, extra_l) == original_tail
+
+                    map!(op, dest_inbetween, b2, b4)
+                    @test first(dest_inbetween, l) == map(op, b2, b4)
+                    @test last(dest_inbetween, extra_l÷2) == original_tail_inbetween
+                end
+            end
+        end
+    end
 end
 
 ## Filter ##
diff --git a/test/client.jl b/test/client.jl
index 195743b1d6208..0649ab3241d62 100644
--- a/test/client.jl
+++ b/test/client.jl
@@ -12,7 +12,7 @@ nested_error_pattern = r"""
     ERROR: DivideError: integer division error
     Stacktrace:.*
 
-    caused by: UndefVarError: __not_a_binding__ not defined
+    caused by: UndefVarError: `__not_a_binding__` not defined
     Stacktrace:.*
     """s
 
@@ -31,7 +31,7 @@ nested_error_pattern = r"""
         DivideError: integer division error
         Stacktrace:.*
 
-        caused by: UndefVarError: __not_a_binding__ not defined
+        caused by: UndefVarError: `__not_a_binding__` not defined
         Stacktrace:.*
         """s, sprint(show, excs))
 end
diff --git a/test/cmdlineargs.jl b/test/cmdlineargs.jl
index cc76c6fcfb0c8..7063f1f87bf68 100644
--- a/test/cmdlineargs.jl
+++ b/test/cmdlineargs.jl
@@ -226,7 +226,7 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     @test errors_not_signals(`$exename --cpu-target=invalidtarget`)
 
     # -t, --threads
-    code = "print(Threads.nthreads())"
+    code = "print(Threads.threadpoolsize())"
     cpu_threads = ccall(:jl_effective_threads, Int32, ())
     @test string(cpu_threads) ==
           read(`$exename --threads auto -e $code`, String) ==
@@ -254,7 +254,7 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
 
     # Combining --threads and --procs: --threads does propagate
     withenv("JULIA_NUM_THREADS" => nothing) do
-        code = "print(sum(remotecall_fetch(Threads.nthreads, x) for x in procs()))"
+        code = "print(sum(remotecall_fetch(Threads.threadpoolsize, x) for x in procs()))"
         @test read(`$exename -p2 -t2 -e $code`, String) == "6"
     end
 
diff --git a/test/compiler/AbstractInterpreter.jl b/test/compiler/AbstractInterpreter.jl
index 5407772cb88a4..cfb26d714db9f 100644
--- a/test/compiler/AbstractInterpreter.jl
+++ b/test/compiler/AbstractInterpreter.jl
@@ -5,6 +5,8 @@ const CC = Core.Compiler
 import Core: MethodInstance, CodeInstance
 import .CC: WorldRange, WorldView
 
+include("irutils.jl")
+
 # define new `AbstractInterpreter` that satisfies the minimum interface requirements
 # while managing its cache independently
 macro newinterp(name)
@@ -193,7 +195,7 @@ function CC.tmerge(𝕃::AnyTaintLattice, @nospecialize(typea), @nospecialize(ty
     if isa(typea, T)
         if isa(typeb, T)
             return T(
-                tmerge(widenlattice(𝕃), typea.typ, typeb),
+                tmerge(widenlattice(𝕃), typea.typ, typeb.typ),
                 typea.slots ∪ typeb.slots)
         else
             typea = typea.typ
@@ -238,3 +240,114 @@ end
 @test CC.tmerge(typeinf_lattice(TaintInterpreter()), Taint(Int, 1), Taint(Int, 2)) == Taint(Int, BitSet(1:2))
 
 # code_typed(ifelse, (Bool, Int, Int); interp=TaintInterpreter())
+
+# CallInfo × inlining
+# ===================
+
+import .CC: CallInfo
+
+struct NoinlineInterpreterCache
+    dict::IdDict{MethodInstance,CodeInstance}
+end
+
+"""
+    NoinlineInterpreter(noinline_modules::Set{Module}) <: AbstractInterpreter
+
+An `AbstractInterpreter` that has additional inlineability rules based on caller module context.
+"""
+struct NoinlineInterpreter <: CC.AbstractInterpreter
+    noinline_modules::Set{Module}
+    interp::CC.NativeInterpreter
+    cache::NoinlineInterpreterCache
+    NoinlineInterpreter(noinline_modules::Set{Module}, world = Base.get_world_counter();
+        interp = CC.NativeInterpreter(world),
+        cache = NoinlineInterpreterCache(IdDict{MethodInstance,CodeInstance}())
+        ) = new(noinline_modules, interp, cache)
+end
+CC.InferenceParams(interp::NoinlineInterpreter) = CC.InferenceParams(interp.interp)
+CC.OptimizationParams(interp::NoinlineInterpreter) = CC.OptimizationParams(interp.interp)
+CC.get_world_counter(interp::NoinlineInterpreter) = CC.get_world_counter(interp.interp)
+CC.get_inference_cache(interp::NoinlineInterpreter) = CC.get_inference_cache(interp.interp)
+CC.code_cache(interp::NoinlineInterpreter) = WorldView(interp.cache, WorldRange(CC.get_world_counter(interp)))
+CC.get(wvc::WorldView{<:NoinlineInterpreterCache}, mi::MethodInstance, default) = get(wvc.cache.dict, mi, default)
+CC.getindex(wvc::WorldView{<:NoinlineInterpreterCache}, mi::MethodInstance) = getindex(wvc.cache.dict, mi)
+CC.haskey(wvc::WorldView{<:NoinlineInterpreterCache}, mi::MethodInstance) = haskey(wvc.cache.dict, mi)
+CC.setindex!(wvc::WorldView{<:NoinlineInterpreterCache}, ci::CodeInstance, mi::MethodInstance) = setindex!(wvc.cache.dict, ci, mi)
+
+struct NoinlineCallInfo <: CallInfo
+    info::CallInfo # wrapped call
+end
+CC.nsplit_impl(info::NoinlineCallInfo) = CC.nsplit(info.info)
+CC.getsplit_impl(info::NoinlineCallInfo, idx::Int) = CC.getsplit(info.info, idx)
+CC.getresult_impl(info::NoinlineCallInfo, idx::Int) = CC.getresult(info.info, idx)
+
+function CC.abstract_call(interp::NoinlineInterpreter,
+    arginfo::CC.ArgInfo, si::CC.StmtInfo, sv::CC.InferenceState, max_methods::Union{Int,Nothing})
+    ret = @invoke CC.abstract_call(interp::CC.AbstractInterpreter,
+        arginfo::CC.ArgInfo, si::CC.StmtInfo, sv::CC.InferenceState, max_methods::Union{Int,Nothing})
+    if sv.mod in interp.noinline_modules
+        return CC.CallMeta(ret.rt, ret.effects, NoinlineCallInfo(ret.info))
+    end
+    return ret
+end
+function CC.inlining_policy(interp::NoinlineInterpreter,
+    @nospecialize(src), @nospecialize(info::CallInfo), stmt_flag::UInt8, mi::MethodInstance,
+    argtypes::Vector{Any})
+    if isa(info, NoinlineCallInfo)
+        return nothing
+    end
+    return @invoke CC.inlining_policy(interp::CC.AbstractInterpreter,
+        src::Any, info::CallInfo, stmt_flag::UInt8, mi::MethodInstance,
+        argtypes::Vector{Any})
+end
+
+@inline function inlined_usually(x, y, z)
+    return x * y + z
+end
+
+# check if the inlining algorithm works as expected
+let src = code_typed1((Float64,Float64,Float64)) do x, y, z
+        inlined_usually(x, y, z)
+    end
+    @test count(isinvoke(:inlined_usually), src.code) == 0
+    @test count(iscall((src, inlined_usually)), src.code) == 0
+end
+let NoinlineModule = Module()
+    interp = NoinlineInterpreter(Set((NoinlineModule,)))
+
+    # this anonymous function's context is Main -- it should be inlined as usual
+    let src = code_typed1((Float64,Float64,Float64); interp) do x, y, z
+            inlined_usually(x, y, z)
+        end
+        @test count(isinvoke(:inlined_usually), src.code) == 0
+        @test count(iscall((src, inlined_usually)), src.code) == 0
+    end
+
+    # it should work for cached results
+    method = only(methods(inlined_usually, (Float64,Float64,Float64,)))
+    mi = CC.specialize_method(method, Tuple{typeof(inlined_usually),Float64,Float64,Float64}, Core.svec())
+    @test haskey(interp.cache.dict, mi)
+    let src = code_typed1((Float64,Float64,Float64); interp) do x, y, z
+            inlined_usually(x, y, z)
+        end
+        @test count(isinvoke(:inlined_usually), src.code) == 0
+        @test count(iscall((src, inlined_usually)), src.code) == 0
+    end
+
+    # now the context module is `NoinlineModule` -- it should not be inlined
+    let src = @eval NoinlineModule $code_typed1((Float64,Float64,Float64); interp=$interp) do x, y, z
+            $inlined_usually(x, y, z)
+        end
+        @test count(isinvoke(:inlined_usually), src.code) == 1
+        @test count(iscall((src, inlined_usually)), src.code) == 0
+    end
+
+    # the context module is totally irrelevant -- it should be inlined as usual
+    OtherModule = Module()
+    let src = @eval OtherModule $code_typed1((Float64,Float64,Float64); interp=$interp) do x, y, z
+            $inlined_usually(x, y, z)
+        end
+        @test count(isinvoke(:inlined_usually), src.code) == 0
+        @test count(iscall((src, inlined_usually)), src.code) == 0
+    end
+end
diff --git a/test/compiler/codegen.jl b/test/compiler/codegen.jl
index d7e87e00f1dac..2d7962351865c 100644
--- a/test/compiler/codegen.jl
+++ b/test/compiler/codegen.jl
@@ -15,9 +15,12 @@ function libjulia_codegen_name()
     is_debug_build ? "libjulia-codegen-debug" : "libjulia-codegen"
 end
 
-# `_dump_function` might be more efficient but it doesn't really matter here...
-get_llvm(@nospecialize(f), @nospecialize(t), raw=true, dump_module=false, optimize=true) =
-    sprint(code_llvm, f, t, raw, dump_module, optimize)
+# The tests below assume a certain format and safepoint_on_entry=true breaks that.
+function get_llvm(@nospecialize(f), @nospecialize(t), raw=true, dump_module=false, optimize=true)
+    params = Base.CodegenParams(safepoint_on_entry=false)
+    d = InteractiveUtils._dump_function(f, t, false, false, !raw, dump_module, :att, optimize, :none, false, params)
+    sprint(print, d)
+end
 
 if !is_debug_build && opt_level > 0
     # Make sure getptls call is removed at IR level with optimization on
diff --git a/test/compiler/datastructures.jl b/test/compiler/datastructures.jl
index 0e75cfb7ace81..c16d968328d18 100644
--- a/test/compiler/datastructures.jl
+++ b/test/compiler/datastructures.jl
@@ -53,3 +53,48 @@ end
         end
     end
 end
+
+@testset "searchsorted" begin
+    @test Core.Compiler.searchsorted([1, 1, 2, 2, 3, 3], 0) === Core.Compiler.UnitRange(1, 0)
+    @test Core.Compiler.searchsorted([1, 1, 2, 2, 3, 3], 1) === Core.Compiler.UnitRange(1, 2)
+    @test Core.Compiler.searchsorted([1, 1, 2, 2, 3, 3], 2) === Core.Compiler.UnitRange(3, 4)
+    @test Core.Compiler.searchsorted([1, 1, 2, 2, 3, 3], 4) === Core.Compiler.UnitRange(7, 6)
+    @test Core.Compiler.searchsorted([1, 1, 2, 2, 3, 3], 2.5; lt=<) === Core.Compiler.UnitRange(5, 4)
+
+    @test Core.Compiler.searchsorted(Core.Compiler.UnitRange(1, 3), 0) === Core.Compiler.UnitRange(1, 0)
+    @test Core.Compiler.searchsorted(Core.Compiler.UnitRange(1, 3), 1) === Core.Compiler.UnitRange(1, 1)
+    @test Core.Compiler.searchsorted(Core.Compiler.UnitRange(1, 3), 2) === Core.Compiler.UnitRange(2, 2)
+    @test Core.Compiler.searchsorted(Core.Compiler.UnitRange(1, 3), 4) === Core.Compiler.UnitRange(4, 3)
+
+    @test Core.Compiler.searchsorted([1:10;], 1, by=(x -> x >= 5)) === Core.Compiler.UnitRange(1, 4)
+    @test Core.Compiler.searchsorted([1:10;], 10, by=(x -> x >= 5)) === Core.Compiler.UnitRange(5, 10)
+    @test Core.Compiler.searchsorted([1:5; 1:5; 1:5], 1, 6, 10, Core.Compiler.Forward) === Core.Compiler.UnitRange(6, 6)
+    @test Core.Compiler.searchsorted(fill(1, 15), 1, 6, 10, Core.Compiler.Forward) === Core.Compiler.UnitRange(6, 10)
+
+    for (rg,I) in Any[(Core.Compiler.UnitRange(49, 57),   47:59),
+                      (Core.Compiler.StepRange(1, 2, 17), -1:19)]
+        rg_r = Core.Compiler.reverse(rg)
+        rgv, rgv_r = Core.Compiler.collect(rg), Core.Compiler.collect(rg_r)
+        for i = I
+            @test Core.Compiler.searchsorted(rg,i) === Core.Compiler.searchsorted(rgv,i)
+            @test Core.Compiler.searchsorted(rg_r,i,rev=true) === Core.Compiler.searchsorted(rgv_r,i,rev=true)
+        end
+    end
+end
+
+@testset "basic sort" begin
+    v = [3,1,2]
+    @test v == [3,1,2]
+    @test Core.Compiler.sort!(v) === v == [1,2,3]
+    @test Core.Compiler.sort!(v, by = x -> -x) === v == [3,2,1]
+    @test Core.Compiler.sort!(v, by = x -> -x, < = >) === v == [1,2,3]
+end
+
+@testset "randomized sorting tests" begin
+    for n in [0, 1, 3, 10, 30, 100, 300], k in [0, 30, 2n]
+        v = rand(-1:k, n)
+        for by in [identity, x -> -x, x -> x^2 + .1x], lt in [<, >]
+            @test sort(v; by, lt) == Core.Compiler.sort!(copy(v); by, < = lt)
+        end
+    end
+end
diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl
index 7aa8ea2b2b20e..01d835772a569 100644
--- a/test/compiler/inference.jl
+++ b/test/compiler/inference.jl
@@ -718,7 +718,8 @@ end
 f_infer_abstract_fieldtype() = fieldtype(HasAbstractlyTypedField, :x)
 @test Base.return_types(f_infer_abstract_fieldtype, ()) == Any[Type{Union{Int,String}}]
 let fieldtype_tfunc = Core.Compiler.fieldtype_tfunc,
-    fieldtype_nothrow = Core.Compiler.fieldtype_nothrow
+    fieldtype_nothrow(@nospecialize(s0), @nospecialize(name)) = Core.Compiler.fieldtype_nothrow(
+        Core.Compiler.OptimizerLattice(), s0, name)
     @test fieldtype_tfunc(Union{}, :x) == Union{}
     @test fieldtype_tfunc(Union{Type{Int32}, Int32}, Const(:x)) == Union{}
     @test fieldtype_tfunc(Union{Type{Base.RefValue{T}}, Type{Int32}} where {T<:Array}, Const(:x)) == Type{<:Array}
@@ -748,6 +749,19 @@ let fieldtype_tfunc = Core.Compiler.fieldtype_tfunc,
     @test TypeVar <: fieldtype_tfunc(Any, Any)
 end
 
+import Core.Compiler: MaybeUndef, builtin_nothrow
+let 𝕃ₒ = Core.Compiler.OptimizerLattice()
+    @test !builtin_nothrow(𝕃ₒ, setfield!, Any[Base.RefValue{String}, Core.Const(:x), MaybeUndef(String)], Any)
+    @test !builtin_nothrow(𝕃ₒ, setfield!, Any[Base.RefValue{String}, Core.Const(:x), MaybeUndef(String), Core.Const(:not_atomic)], Any)
+    @test !builtin_nothrow(𝕃ₒ, isdefined, Any[Any,MaybeUndef(Symbol)], Bool)
+    @test !builtin_nothrow(𝕃ₒ, fieldtype, Any[MaybeUndef(Any),Symbol], Any)
+    @test !builtin_nothrow(𝕃ₒ, isa, Any[Type,MaybeUndef(Type)], Any)
+    @test !builtin_nothrow(𝕃ₒ, <:, Any[MaybeUndef(Any),MaybeUndef(Any)], Any)
+    @test !builtin_nothrow(𝕃ₒ, Core.ifelse, Any[MaybeUndef(Bool),Any,Any], Any)
+    @test !builtin_nothrow(𝕃ₒ, typeassert, Any[MaybeUndef(Any),Type{Symbol}], Any)
+    @test !builtin_nothrow(𝕃ₒ, Core.get_binding_type, Any[Module,MaybeUndef(Symbol)], Any)
+end
+
 # issue #11480
 @noinline f11480(x,y) = x
 let A = Ref
@@ -1526,6 +1540,11 @@ end
 @test nfields_tfunc(Tuple{Int, Vararg{Int}}) === Int
 @test nfields_tfunc(Tuple{Int, Integer}) === Const(2)
 @test nfields_tfunc(Union{Tuple{Int, Float64}, Tuple{Int, Int}}) === Const(2)
+@test nfields_tfunc(@NamedTuple{a::Int,b::Integer}) === Const(2)
+@test nfields_tfunc(NamedTuple{(:a,:b),T} where T<:Tuple{Int,Integer}) === Const(2)
+@test nfields_tfunc(NamedTuple{(:a,:b)}) === Const(2)
+@test nfields_tfunc(NamedTuple{names,Tuple{Any,Any}} where names) === Const(2)
+@test nfields_tfunc(Union{NamedTuple{(:a,:b)},NamedTuple{(:c,:d)}}) === Const(2)
 
 using Core.Compiler: typeof_tfunc
 @test typeof_tfunc(Tuple{Vararg{Int}}) == Type{Tuple{Vararg{Int,N}}} where N
@@ -1752,44 +1771,46 @@ end
 @test setfield!_tfunc(ABCDconst, Const(1), Any) === Union{}
 @test setfield!_tfunc(ABCDconst, Const(2), Any) === Union{}
 @test setfield!_tfunc(ABCDconst, Const(4), Any) === Union{}
-@test setfield!_nothrow(Base.RefValue{Int}, Const(:x), Int)
-@test setfield!_nothrow(Base.RefValue{Int}, Const(1), Int)
-@test setfield!_nothrow(Base.RefValue{Any}, Const(:x), Int)
-@test setfield!_nothrow(Base.RefValue{Any}, Const(1), Int)
-@test setfield!_nothrow(XY{Any,Any}, Const(:x), Int)
-@test setfield!_nothrow(XY{Any,Any}, Const(:x), Any)
-@test setfield!_nothrow(XY{Int,Float64}, Const(:x), Int)
-@test setfield!_nothrow(ABCDconst, Const(:c), Any)
-@test setfield!_nothrow(ABCDconst, Const(3), Any)
-@test !setfield!_nothrow(XY{Int,Float64}, Symbol, Any)
-@test !setfield!_nothrow(XY{Int,Float64}, Int, Any)
-@test !setfield!_nothrow(Base.RefValue{Int}, Const(:x), Any)
-@test !setfield!_nothrow(Base.RefValue{Int}, Const(1), Any)
-@test !setfield!_nothrow(Any[Base.RefValue{Any}, Const(:x), Int, Symbol])
-@test !setfield!_nothrow(Base.RefValue{Any}, Symbol, Int)
-@test !setfield!_nothrow(Base.RefValue{Any}, Int, Int)
-@test !setfield!_nothrow(XY{Int,Float64}, Const(:y), Int)
-@test !setfield!_nothrow(XY{Int,Float64}, Symbol, Int)
-@test !setfield!_nothrow(XY{Int,Float64}, Int, Int)
-@test !setfield!_nothrow(ABCDconst, Const(:a), Any)
-@test !setfield!_nothrow(ABCDconst, Const(:b), Any)
-@test !setfield!_nothrow(ABCDconst, Const(:d), Any)
-@test !setfield!_nothrow(ABCDconst, Symbol, Any)
-@test !setfield!_nothrow(ABCDconst, Const(1), Any)
-@test !setfield!_nothrow(ABCDconst, Const(2), Any)
-@test !setfield!_nothrow(ABCDconst, Const(4), Any)
-@test !setfield!_nothrow(ABCDconst, Int, Any)
-@test !setfield!_nothrow(Union{Base.RefValue{Any},Some{Any}}, Const(:x), Int)
-@test !setfield!_nothrow(Union{Base.RefValue,Some{Any}}, Const(:x), Int)
-@test !setfield!_nothrow(Union{Base.RefValue{Any},Some{Any}}, Const(1), Int)
-@test !setfield!_nothrow(Union{Base.RefValue,Some{Any}}, Const(1), Int)
-@test !setfield!_nothrow(Union{Base.RefValue{Any},Some{Any}}, Symbol, Int)
-@test !setfield!_nothrow(Union{Base.RefValue,Some{Any}}, Symbol, Int)
-@test !setfield!_nothrow(Union{Base.RefValue{Any},Some{Any}}, Int, Int)
-@test !setfield!_nothrow(Union{Base.RefValue,Some{Any}}, Int, Int)
-@test !setfield!_nothrow(Any, Symbol, Int)
-@test !setfield!_nothrow(Any, Int, Int)
-@test !setfield!_nothrow(Any, Any, Int)
+let 𝕃ₒ = Core.Compiler.OptimizerLattice()
+    @test setfield!_nothrow(𝕃ₒ, Base.RefValue{Int}, Const(:x), Int)
+    @test setfield!_nothrow(𝕃ₒ, Base.RefValue{Int}, Const(1), Int)
+    @test setfield!_nothrow(𝕃ₒ, Base.RefValue{Any}, Const(:x), Int)
+    @test setfield!_nothrow(𝕃ₒ, Base.RefValue{Any}, Const(1), Int)
+    @test setfield!_nothrow(𝕃ₒ, XY{Any,Any}, Const(:x), Int)
+    @test setfield!_nothrow(𝕃ₒ, XY{Any,Any}, Const(:x), Any)
+    @test setfield!_nothrow(𝕃ₒ, XY{Int,Float64}, Const(:x), Int)
+    @test setfield!_nothrow(𝕃ₒ, ABCDconst, Const(:c), Any)
+    @test setfield!_nothrow(𝕃ₒ, ABCDconst, Const(3), Any)
+    @test !setfield!_nothrow(𝕃ₒ, XY{Int,Float64}, Symbol, Any)
+    @test !setfield!_nothrow(𝕃ₒ, XY{Int,Float64}, Int, Any)
+    @test !setfield!_nothrow(𝕃ₒ, Base.RefValue{Int}, Const(:x), Any)
+    @test !setfield!_nothrow(𝕃ₒ, Base.RefValue{Int}, Const(1), Any)
+    @test !setfield!_nothrow(𝕃ₒ, Base.RefValue{Any}, Const(:x), Int, Symbol)
+    @test !setfield!_nothrow(𝕃ₒ, Base.RefValue{Any}, Symbol, Int)
+    @test !setfield!_nothrow(𝕃ₒ, Base.RefValue{Any}, Int, Int)
+    @test !setfield!_nothrow(𝕃ₒ, XY{Int,Float64}, Const(:y), Int)
+    @test !setfield!_nothrow(𝕃ₒ, XY{Int,Float64}, Symbol, Int)
+    @test !setfield!_nothrow(𝕃ₒ, XY{Int,Float64}, Int, Int)
+    @test !setfield!_nothrow(𝕃ₒ, ABCDconst, Const(:a), Any)
+    @test !setfield!_nothrow(𝕃ₒ, ABCDconst, Const(:b), Any)
+    @test !setfield!_nothrow(𝕃ₒ, ABCDconst, Const(:d), Any)
+    @test !setfield!_nothrow(𝕃ₒ, ABCDconst, Symbol, Any)
+    @test !setfield!_nothrow(𝕃ₒ, ABCDconst, Const(1), Any)
+    @test !setfield!_nothrow(𝕃ₒ, ABCDconst, Const(2), Any)
+    @test !setfield!_nothrow(𝕃ₒ, ABCDconst, Const(4), Any)
+    @test !setfield!_nothrow(𝕃ₒ, ABCDconst, Int, Any)
+    @test !setfield!_nothrow(𝕃ₒ, Union{Base.RefValue{Any},Some{Any}}, Const(:x), Int)
+    @test !setfield!_nothrow(𝕃ₒ, Union{Base.RefValue,Some{Any}}, Const(:x), Int)
+    @test !setfield!_nothrow(𝕃ₒ, Union{Base.RefValue{Any},Some{Any}}, Const(1), Int)
+    @test !setfield!_nothrow(𝕃ₒ, Union{Base.RefValue,Some{Any}}, Const(1), Int)
+    @test !setfield!_nothrow(𝕃ₒ, Union{Base.RefValue{Any},Some{Any}}, Symbol, Int)
+    @test !setfield!_nothrow(𝕃ₒ, Union{Base.RefValue,Some{Any}}, Symbol, Int)
+    @test !setfield!_nothrow(𝕃ₒ, Union{Base.RefValue{Any},Some{Any}}, Int, Int)
+    @test !setfield!_nothrow(𝕃ₒ, Union{Base.RefValue,Some{Any}}, Int, Int)
+    @test !setfield!_nothrow(𝕃ₒ, Any, Symbol, Int)
+    @test !setfield!_nothrow(𝕃ₒ, Any, Int, Int)
+    @test !setfield!_nothrow(𝕃ₒ, Any, Any, Int)
+end
 
 struct Foo_22708
     x::Ptr{Foo_22708}
@@ -2336,6 +2357,13 @@ end
 # Equivalence of Const(T.instance) and T for singleton types
 @test Const(nothing) ⊑ Nothing && Nothing ⊑ Const(nothing)
 
+# `apply_type_tfunc` should always return accurate result for empty NamedTuple case
+import Core: Const
+import Core.Compiler: apply_type_tfunc
+@test apply_type_tfunc(Const(NamedTuple), Const(()), Type{T} where T<:Tuple{}) === Const(typeof((;)))
+@test apply_type_tfunc(Const(NamedTuple), Const(()), Type{T} where T<:Tuple) === Const(typeof((;)))
+@test apply_type_tfunc(Const(NamedTuple), Tuple{Vararg{Symbol}}, Type{Tuple{}}) === Const(typeof((;)))
+
 # Don't pessimize apply_type to anything worse than Type and yield Bottom for invalid Unions
 @test Core.Compiler.return_type(Core.apply_type, Tuple{Type{Union}}) == Type{Union{}}
 @test Core.Compiler.return_type(Core.apply_type, Tuple{Type{Union},Any}) == Type
@@ -3054,9 +3082,12 @@ const DenseIdx = Union{IntRange,Integer}
 # Non uniformity in expressions with PartialTypeVar
 @test Core.Compiler.:⊑(Core.Compiler.PartialTypeVar(TypeVar(:N), true, true), TypeVar)
 let N = TypeVar(:N)
-    @test Core.Compiler.apply_type_nothrow([Core.Compiler.Const(NTuple),
+    𝕃 = Core.Compiler.OptimizerLattice()
+    argtypes = Any[Core.Compiler.Const(NTuple),
         Core.Compiler.PartialTypeVar(N, true, true),
-        Core.Compiler.Const(Any)], Type{Tuple{Vararg{Any,N}}})
+        Core.Compiler.Const(Any)]
+    rt = Type{Tuple{Vararg{Any,N}}}
+    @test Core.Compiler.apply_type_nothrow(𝕃, argtypes, rt)
 end
 
 # issue #33768
@@ -3921,10 +3952,6 @@ end
         +(UnhandledVarargCond(false), xs...)
     end |> only === Int
 
-    @test (Base.return_types((Vector{Any},)) do xs
-        Core.kwfunc(xs...)
-    end; true)
-
     @test Base.return_types((Vector{Vector{Int}},)) do xs
         Tuple(xs...)
     end |> only === Tuple{Vararg{Int}}
@@ -4233,3 +4260,14 @@ function unused_apply_iterate()
     return nothing
 end
 @test fully_eliminated(unused_apply_iterate, ())
+
+@testset "#45956: non-linearized cglobal needs special treatment for stmt effects" begin
+    function foo()
+        cglobal((a, ))
+        ccall(0, Cvoid, (Nothing,), b)
+    end
+    @test only(code_typed() do
+        cglobal((a, ))
+        ccall(0, Cvoid, (Nothing,), b)
+    end)[2] === Nothing
+end
diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl
index 0e406693b21b3..ab3dd451f82d2 100644
--- a/test/compiler/inline.jl
+++ b/test/compiler/inline.jl
@@ -120,9 +120,7 @@ f29083(;μ,σ) = μ + σ*randn()
 g29083() = f29083(μ=2.0,σ=0.1)
 let c = code_typed(g29083, ())[1][1].code
     # make sure no call to kwfunc remains
-    @test !any(e->(isa(e,Expr) && ((e.head === :invoke && e.args[1].def.name === :kwfunc) ||
-                                   (e.head === :foreigncall && e.args[1] === QuoteNode(:jl_get_keyword_sorter)))),
-               c)
+    @test !any(e->(isa(e,Expr) && (e.head === :invoke && e.args[1].def.name === :kwfunc)), c)
 end
 
 @testset "issue #19122: [no]inline of short func. def. with return type annotation" begin
@@ -1597,44 +1595,44 @@ end
     # @inline, @noinline, @constprop
     let @inline f(::Any; x::Int=1) = 2x
         @test is_inlineable(only(methods(f)).source)
-        @test is_inlineable(only(methods(Core.kwfunc(f))).source)
+        @test is_inlineable(only(methods(Core.kwcall, (Any, typeof(f), Vararg))).source)
     end
     let @noinline f(::Any; x::Int=1) = 2x
         @test !is_inlineable(only(methods(f)).source)
-        @test !is_inlineable(only(methods(Core.kwfunc(f))).source)
+        @test !is_inlineable(only(methods(Core.kwcall, (Any, typeof(f), Vararg))).source)
     end
     let Base.@constprop :aggressive f(::Any; x::Int=1) = 2x
         @test Core.Compiler.is_aggressive_constprop(only(methods(f)))
-        @test Core.Compiler.is_aggressive_constprop(only(methods(Core.kwfunc(f))))
+        @test Core.Compiler.is_aggressive_constprop(only(methods(Core.kwcall, (Any, typeof(f), Vararg))))
     end
     let Base.@constprop :none f(::Any; x::Int=1) = 2x
         @test Core.Compiler.is_no_constprop(only(methods(f)))
-        @test Core.Compiler.is_no_constprop(only(methods(Core.kwfunc(f))))
+        @test Core.Compiler.is_no_constprop(only(methods(Core.kwcall, (Any, typeof(f), Vararg))))
     end
     # @nospecialize
     let f(@nospecialize(A::Any); x::Int=1) = 2x
         @test only(methods(f)).nospecialize == 1
-        @test only(methods(Core.kwfunc(f))).nospecialize == 4
+        @test only(methods(Core.kwcall, (Any, typeof(f), Vararg))).nospecialize == 4
     end
     let f(::Any; x::Int=1) = (@nospecialize; 2x)
         @test only(methods(f)).nospecialize == -1
-        @test only(methods(Core.kwfunc(f))).nospecialize == -1
+        @test only(methods(Core.kwcall, (Any, typeof(f), Vararg))).nospecialize == -1
     end
     # Base.@assume_effects
     let Base.@assume_effects :notaskstate f(::Any; x::Int=1) = 2x
         @test Core.Compiler.decode_effects_override(only(methods(f)).purity).notaskstate
-        @test Core.Compiler.decode_effects_override(only(methods(Core.kwfunc(f))).purity).notaskstate
+        @test Core.Compiler.decode_effects_override(only(methods(Core.kwcall, (Any, typeof(f), Vararg))).purity).notaskstate
     end
     # propagate multiple metadata also
     let @inline Base.@assume_effects :notaskstate Base.@constprop :aggressive f(::Any; x::Int=1) = (@nospecialize; 2x)
         @test is_inlineable(only(methods(f)).source)
         @test Core.Compiler.is_aggressive_constprop(only(methods(f)))
-        @test is_inlineable(only(methods(Core.kwfunc(f))).source)
-        @test Core.Compiler.is_aggressive_constprop(only(methods(Core.kwfunc(f))))
+        @test is_inlineable(only(methods(Core.kwcall, (Any, typeof(f), Vararg))).source)
+        @test Core.Compiler.is_aggressive_constprop(only(methods(Core.kwcall, (Any, typeof(f), Vararg))))
         @test only(methods(f)).nospecialize == -1
-        @test only(methods(Core.kwfunc(f))).nospecialize == -1
+        @test only(methods(Core.kwcall, (Any, typeof(f), Vararg))).nospecialize == -1
         @test Core.Compiler.decode_effects_override(only(methods(f)).purity).notaskstate
-        @test Core.Compiler.decode_effects_override(only(methods(Core.kwfunc(f))).purity).notaskstate
+        @test Core.Compiler.decode_effects_override(only(methods(Core.kwcall, (Any, typeof(f), Vararg))).purity).notaskstate
     end
 end
 
@@ -1756,8 +1754,85 @@ let interp = Core.Compiler.NativeInterpreter()
     # ok, now delete the callsite flag, and see the second inlining pass can inline the call
     @eval Core.Compiler $ir.stmts[$i][:flag] &= ~IR_FLAG_NOINLINE
     inlining = Core.Compiler.InliningState(Core.Compiler.OptimizationParams(interp), nothing,
-        Core.Compiler.code_cache(interp), interp)
+        Core.Compiler.get_world_counter(interp), interp)
     ir = Core.Compiler.ssa_inlining_pass!(ir, inlining, false)
     @test count(isinvoke(:*), ir.stmts.inst) == 0
     @test count(iscall((ir, Core.Intrinsics.mul_int)), ir.stmts.inst) == 1
 end
+
+# Test special purpose inliner for Core.ifelse
+f_ifelse_1(a, b) = Core.ifelse(true, a, b)
+f_ifelse_2(a, b) = Core.ifelse(false, a, b)
+f_ifelse_3(a, b) = Core.ifelse(a, true, b)
+
+@test fully_eliminated(f_ifelse_1, Tuple{Any, Any}; retval=Core.Argument(2))
+@test fully_eliminated(f_ifelse_2, Tuple{Any, Any}; retval=Core.Argument(3))
+@test !fully_eliminated(f_ifelse_3, Tuple{Any, Any})
+
+# inline_splatnew for abstract `NamedTuple`
+@eval construct_splatnew(T, fields) = $(Expr(:splatnew, :T, :fields))
+for tt = Any[(Int,Int), (Integer,Integer), (Any,Any)]
+    let src = code_typed1(tt) do a, b
+            construct_splatnew(NamedTuple{(:a,:b),typeof((a,b))}, (a,b))
+        end
+        @test count(issplatnew, src.code) == 0
+        @test count(isnew, src.code) == 1
+    end
+end
+
+# optimize away `NamedTuple`s used for handling `@nospecialize`d keyword-argument
+# https://github.com/JuliaLang/julia/pull/47059
+abstract type CallInfo end
+struct NewInstruction
+    stmt::Any
+    type::Any
+    info::CallInfo
+    line::Int32
+    flag::UInt8
+    function NewInstruction(@nospecialize(stmt), @nospecialize(type), @nospecialize(info::CallInfo),
+                            line::Int32, flag::UInt8)
+        return new(stmt, type, info, line, flag)
+    end
+end
+@nospecialize
+function NewInstruction(newinst::NewInstruction;
+    stmt=newinst.stmt,
+    type=newinst.type,
+    info::CallInfo=newinst.info,
+    line::Int32=newinst.line,
+    flag::UInt8=newinst.flag)
+    return NewInstruction(stmt, type, info, line, flag)
+end
+@specialize
+let src = code_typed1((NewInstruction,Any,Any,CallInfo)) do newinst, stmt, type, info
+        NewInstruction(newinst; stmt, type, info)
+    end
+    @test count(issplatnew, src.code) == 0
+    @test count(iscall((src,NamedTuple)), src.code) == 0
+    @test count(isnew, src.code) == 1
+end
+
+# Test that inlining can still use nothrow information from concrete-eval
+# even if the result itself is too big to be inlined, and nothrow is not
+# known without concrete-eval
+const THE_BIG_TUPLE = ntuple(identity, 1024)
+function return_the_big_tuple(err::Bool)
+    err && error("BAD")
+    return THE_BIG_TUPLE
+end
+@noinline function return_the_big_tuple_noinline(err::Bool)
+    err && error("BAD")
+    return THE_BIG_TUPLE
+end
+big_tuple_test1() = return_the_big_tuple(false)[1]
+big_tuple_test2() = return_the_big_tuple_noinline(false)[1]
+
+@test fully_eliminated(big_tuple_test2, Tuple{})
+# Currently we don't run these cleanup passes, but let's make sure that
+# if we did, inlining would be able to remove this
+let ir = Base.code_ircode(big_tuple_test1, Tuple{})[1][1]
+    ir = Core.Compiler.compact!(ir, true)
+    ir = Core.Compiler.cfg_simplify!(ir)
+    ir = Core.Compiler.compact!(ir, true)
+    @test length(ir.stmts) == 1
+end
diff --git a/test/compiler/interpreter_exec.jl b/test/compiler/interpreter_exec.jl
index 27143c17052cc..a310a2740131d 100644
--- a/test/compiler/interpreter_exec.jl
+++ b/test/compiler/interpreter_exec.jl
@@ -106,3 +106,17 @@ let m = Meta.@lower 1 + 1
     global test29262 = false
     @test :b === @eval $m
 end
+
+# https://github.com/JuliaLang/julia/issues/47065
+# `Core.Compiler.sort!` should be able to handle a big list
+let n = 1000
+    ex = :(return 1)
+    for _ in 1:n
+        ex = :(rand() < .1 && $(ex))
+    end
+    @eval global function f_1000_blocks()
+        $ex
+        return 0
+    end
+end
+@test f_1000_blocks() == 0
diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl
index 1e5948182adb1..a92dd17e1b6eb 100644
--- a/test/compiler/irpasses.jl
+++ b/test/compiler/irpasses.jl
@@ -743,6 +743,94 @@ let m = Meta.@lower 1 + 1
     @test length(ir.cfg.blocks) == 1 && Core.Compiler.length(ir.stmts) == 1
 end
 
+# Test cfg_simplify in complicated sequences of dropped and merged bbs
+using Core.Compiler: Argument, IRCode, GotoNode, GotoIfNot, ReturnNode, NoCallInfo, BasicBlock, StmtRange, SSAValue
+bb_term(ir, bb) = Core.Compiler.getindex(ir, SSAValue(Core.Compiler.last(ir.cfg.blocks[bb].stmts)))[:inst]
+
+function each_stmt_a_bb(stmts, preds, succs)
+    ir = IRCode()
+    empty!(ir.stmts.inst)
+    append!(ir.stmts.inst, stmts)
+    empty!(ir.stmts.type); append!(ir.stmts.type, [Nothing for _ = 1:length(stmts)])
+    empty!(ir.stmts.flag); append!(ir.stmts.flag, [0x0 for _ = 1:length(stmts)])
+    empty!(ir.stmts.line); append!(ir.stmts.line, [Int32(0) for _ = 1:length(stmts)])
+    empty!(ir.stmts.info); append!(ir.stmts.info, [NoCallInfo() for _ = 1:length(stmts)])
+    empty!(ir.cfg.blocks); append!(ir.cfg.blocks, [BasicBlock(StmtRange(i, i), preds[i], succs[i]) for i = 1:length(stmts)])
+    Core.Compiler.verify_ir(ir)
+    return ir
+end
+
+for gotoifnot in (false, true)
+    stmts = [
+        # BB 1
+        GotoIfNot(Argument(1), 8),
+        # BB 2
+        GotoIfNot(Argument(2), 4),
+        # BB 3
+        GotoNode(9),
+        # BB 4
+        GotoIfNot(Argument(3), 10),
+        # BB 5
+        GotoIfNot(Argument(4), 11),
+        # BB 6
+        GotoIfNot(Argument(5), 12),
+        # BB 7
+        GotoNode(13),
+        # BB 8
+        ReturnNode(1),
+        # BB 9
+        nothing,
+        # BB 10
+        nothing,
+        # BB 11
+        gotoifnot ? GotoIfNot(Argument(6), 13) : GotoNode(13),
+        # BB 12
+        ReturnNode(2),
+        # BB 13
+        ReturnNode(3),
+    ]
+    preds = Vector{Int}[Int[], [1], [2], [2], [4], [5], [6], [1], [3], [4, 9], [5, 10], gotoifnot ? [6,11] : [6], [7, 11]]
+    succs = Vector{Int}[[2, 8], [3, 4], [9], [5, 10], [6, 11], [7, 12], [13], Int[], [10], [11], gotoifnot ? [12, 13] : [13], Int[], Int[]]
+    ir = each_stmt_a_bb(stmts, preds, succs)
+    ir = Core.Compiler.cfg_simplify!(ir)
+    Core.Compiler.verify_ir(ir)
+
+    if gotoifnot
+        let term4 = bb_term(ir, 4), term5 = bb_term(ir, 5)
+            @test isa(term4, GotoIfNot) && bb_term(ir, term4.dest).val == 3
+            @test isa(term5, ReturnNode) && term5.val == 2
+        end
+    else
+        @test length(ir.cfg.blocks) == 10
+        let term = bb_term(ir, 3)
+            @test isa(term, GotoNode) && bb_term(ir, term.label).val == 3
+        end
+    end
+end
+
+let stmts = [
+        # BB 1
+        GotoIfNot(Argument(1), 4),
+        # BB 2
+        GotoIfNot(Argument(2), 5),
+        # BB 3
+        GotoNode(5),
+        # BB 4
+        ReturnNode(1),
+        # BB 5
+        ReturnNode(2)
+    ]
+    preds = Vector{Int}[Int[], [1], [2], [1], [2, 3]]
+    succs = Vector{Int}[[2, 4], [3, 5], [5], Int[], Int[]]
+    ir = each_stmt_a_bb(stmts, preds, succs)
+    ir = Core.Compiler.cfg_simplify!(ir)
+    Core.Compiler.verify_ir(ir)
+
+    @test length(ir.cfg.blocks) == 4
+    terms = map(i->bb_term(ir, i), 1:length(ir.cfg.blocks))
+    @test Set(term.val for term in terms if isa(term, ReturnNode)) == Set([1,2])
+end
+
 let m = Meta.@lower 1 + 1
     # Test that CFG simplify doesn't mess up when chaining past return blocks
     @assert Meta.isexpr(m, :thunk)
@@ -796,6 +884,21 @@ let m = Meta.@lower 1 + 1
     @test length(ir.cfg.blocks) == 1
 end
 
+# `cfg_simplify!` shouldn't error in a presence of `try/catch` block
+let ir = Base.code_ircode(; optimize_until="slot2ssa") do
+        v = try
+        catch
+        end
+        v
+    end |> only |> first
+    Core.Compiler.verify_ir(ir)
+    nb = length(ir.cfg.blocks)
+    ir = Core.Compiler.cfg_simplify!(ir)
+    Core.Compiler.verify_ir(ir)
+    na = length(ir.cfg.blocks)
+    @test na < nb
+end
+
 # Issue #29213
 function f_29213()
     while true
@@ -1065,3 +1168,40 @@ let sroa_no_forward() = begin
     end
     @test sroa_no_forward() == (1, 2.0)
 end
+
+@noinline function foo_defined_last_iter(n::Int)
+    local x
+    for i = 1:n
+        if i == 5
+            x = 1
+        end
+    end
+    if n > 2
+        return x + n
+    end
+    return 0
+end
+const_call_defined_last_iter() = foo_defined_last_iter(3)
+@test foo_defined_last_iter(2) == 0
+@test_throws UndefVarError foo_defined_last_iter(3)
+@test_throws UndefVarError const_call_defined_last_iter()
+@test foo_defined_last_iter(6) == 7
+
+let src = code_typed1(foo_defined_last_iter, Tuple{Int})
+    for i = 1:length(src.code)
+        e = src.code[i]
+        if isexpr(e, :throw_undef_if_not)
+            @assert !isa(e.args[2], Bool)
+        end
+    end
+end
+
+# Issue #47180, incorrect phi counts in CmdRedirect
+function a47180(b; stdout )
+    c = setenv(b, b.env)
+    if true
+        c = pipeline(c, stdout)
+    end
+    c
+end
+@test isa(a47180(``; stdout), Base.AbstractCmd)
diff --git a/test/compiler/irutils.jl b/test/compiler/irutils.jl
index 76f883d6cea2c..ef8fe3efbb315 100644
--- a/test/compiler/irutils.jl
+++ b/test/compiler/irutils.jl
@@ -8,6 +8,7 @@ get_code(args...; kwargs...) = code_typed1(args...; kwargs...).code
 
 # check if `x` is a statement with a given `head`
 isnew(@nospecialize x) = isexpr(x, :new)
+issplatnew(@nospecialize x) = isexpr(x, :splatnew)
 isreturn(@nospecialize x) = isa(x, ReturnNode)
 
 # check if `x` is a dynamic call of a given function
diff --git a/test/compiler/ssair.jl b/test/compiler/ssair.jl
index ddb2fd2f13e09..a33121eeca443 100644
--- a/test/compiler/ssair.jl
+++ b/test/compiler/ssair.jl
@@ -173,7 +173,17 @@ let ci = make_ci([
     ])
     ir = Core.Compiler.inflate_ir(ci)
     ir = Core.Compiler.compact!(ir, true)
-    @test Core.Compiler.verify_ir(ir) == nothing
+    @test Core.Compiler.verify_ir(ir) === nothing
+end
+
+# Test that the verifier doesn't choke on cglobals (which aren't linearized)
+let ci = make_ci([
+        Expr(:call, GlobalRef(Main, :cglobal),
+                    Expr(:call, Core.tuple, :(:c)), Nothing),
+                    Core.Compiler.ReturnNode()
+    ])
+    ir = Core.Compiler.inflate_ir(ci)
+    @test Core.Compiler.verify_ir(ir) === nothing
 end
 
 # Test that GlobalRef in value position is non-canonical
@@ -496,3 +506,47 @@ end
 
     @test show(devnull, ir) === nothing
 end
+
+@testset "IncrementalCompact statefulness" begin
+    foo(i) = i == 1 ? 1 : 2
+    ir = only(Base.code_ircode(foo, (Int,)))[1]
+    compact = Core.Compiler.IncrementalCompact(ir)
+
+    # set up first iterator
+    x = Core.Compiler.iterate(compact)
+    x = Core.Compiler.iterate(compact, x[2])
+
+    # set up second iterator
+    x = Core.Compiler.iterate(compact)
+
+    # consume remainder
+    while x !== nothing
+        x = Core.Compiler.iterate(compact, x[2])
+    end
+
+    ir = Core.Compiler.complete(compact)
+    @test Core.Compiler.verify_ir(ir) === nothing
+end
+
+# insert_node! for pending node
+import Core: SSAValue
+import Core.Compiler: NewInstruction, insert_node!
+let ir = Base.code_ircode((Int,Int); optimize_until="inlining") do a, b
+        a^b
+    end |> only |> first
+    @test length(ir.stmts) == 2
+    @test Meta.isexpr(ir.stmts[1][:inst], :invoke)
+
+    newssa = insert_node!(ir, SSAValue(1), NewInstruction(Expr(:call, println, SSAValue(1)), Nothing), #=attach_after=#true)
+    newssa = insert_node!(ir, newssa, NewInstruction(Expr(:call, println, newssa), Nothing), #=attach_after=#true)
+
+    ir = Core.Compiler.compact!(ir)
+    @test length(ir.stmts) == 4
+    @test Meta.isexpr(ir.stmts[1][:inst], :invoke)
+    call1 = ir.stmts[2][:inst]
+    @test iscall((ir,println), call1)
+    @test call1.args[2] === SSAValue(1)
+    call2 = ir.stmts[3][:inst]
+    @test iscall((ir,println), call2)
+    @test call2.args[2] === SSAValue(2)
+end
diff --git a/test/core.jl b/test/core.jl
index 35b029f93da44..801058a0b87eb 100644
--- a/test/core.jl
+++ b/test/core.jl
@@ -14,7 +14,7 @@ include("testenv.jl")
 # sanity tests that our built-in types are marked correctly for const fields
 for (T, c) in (
         (Core.CodeInfo, []),
-        (Core.CodeInstance, [:def, :min_world, :max_world, :rettype, :rettype_const, :ipo_purity_bits, :argescapes]),
+        (Core.CodeInstance, [:def, :rettype, :rettype_const, :ipo_purity_bits, :argescapes]),
         (Core.Method, [#=:name, :module, :file, :line, :primary_world, :sig, :slot_syms, :external_mt, :nargs, :called, :nospecialize, :nkw, :isva, :pure, :is_for_opaque_closure, :constprop=#]),
         (Core.MethodInstance, [#=:def, :specTypes, :sparam_vals]=#]),
         (Core.MethodTable, [:module]),
@@ -4859,6 +4859,13 @@ let a = Any[]
     @test a == [10, 2]
 end
 
+# issue 47209
+struct f47209
+    x::Int
+    f47209()::Nothing = new(1)
+end
+@test_throws MethodError f47209()
+
 # issue #12096
 let a = Val{Val{TypeVar(:_, Int)}},
     b = Val{Val{x} where x<:Int}
@@ -7837,3 +7844,32 @@ fvarargN(x::Tuple{Vararg{Int, N}}) where {N} = N
 fvarargN(args...) = fvarargN(args)
 finvokevarargN() = Base.inferencebarrier(fvarargN)(1, 2, 3)
 @test finvokevarargN() == 3
+
+# Make sure that @specialize actually overrides a module annotation
+module SpecializeModuleTest
+    @nospecialize
+    f(@specialize(x), y) = 2
+    @specialize
+end
+@test methods(SpecializeModuleTest.f)[1].nospecialize & 0b11 == 0b10
+
+let # https://github.com/JuliaLang/julia/issues/46918
+    # jl_binding_type shouldn't be unstable
+    code = quote
+        res1 = ccall(:jl_binding_type, Any, (Any, Any), Main, :stderr)
+
+        stderr
+
+        res2 = ccall(:jl_binding_type, Any, (Any, Any), Main, :stderr)
+
+        res3 = ccall(:jl_binding_type, Any, (Any, Any), Main, :stderr)
+
+        print(stdout, res1, " ", res2, " ", res3)
+    end |> x->join(x.args, ';')
+    cmd = `$(Base.julia_cmd()) -e $code` # N.B make sure not to pass this code as `:block`
+    stdout = IOBuffer()
+    stderr = IOBuffer()
+    @test success(pipeline(Cmd(cmd); stdout, stderr))
+    @test isempty(String(take!(stderr))) # make sure no error has happened
+    @test String(take!(stdout)) == "nothing IO IO"
+end
diff --git a/test/error.jl b/test/error.jl
index eaf77c5d53912..e9cdfa100bc81 100644
--- a/test/error.jl
+++ b/test/error.jl
@@ -99,3 +99,27 @@ end
         @test s == "MethodError: no method matching f44319(::Int$(Sys.WORD_SIZE))\n\nClosest candidates are:\n  f44319()\n   @ $curmod_str none:0\n"
     end
 end
+
+@testset "All types ending with Exception or Error subtype Exception" begin
+    function test_exceptions(mod, visited=Set{Module}())
+        if mod ∉ visited
+            push!(visited, mod)
+            for name in names(mod, all=true)
+                isdefined(mod, name) || continue
+                value = getfield(mod, name)
+
+                if value isa Module
+                    test_exceptions(value, visited)
+                elseif value isa Type
+                    str = string(value)
+                    if endswith(str, "Exception") || endswith(str, "Error")
+                        @test value <: Exception
+                    end
+                end
+            end
+        end
+        visited
+    end
+    visited = test_exceptions(Base)
+    test_exceptions(Core, visited)
+end
diff --git a/test/errorshow.jl b/test/errorshow.jl
index c31f7d902a0d8..e081695f2f15d 100644
--- a/test/errorshow.jl
+++ b/test/errorshow.jl
@@ -343,7 +343,7 @@ let undefvar
     err_str = @except_str Vector{Any}(undef, 1)[1] UndefRefError
     @test err_str == "UndefRefError: access to undefined reference"
     err_str = @except_str undefvar UndefVarError
-    @test err_str == "UndefVarError: undefvar not defined"
+    @test err_str == "UndefVarError: `undefvar` not defined"
     err_str = @except_str read(IOBuffer(), UInt8) EOFError
     @test err_str == "EOFError: read end of file"
     err_str = @except_str Dict()[:doesnotexist] KeyError
diff --git a/test/keywordargs.jl b/test/keywordargs.jl
index 0e651cf7f4531..366f14393a94f 100644
--- a/test/keywordargs.jl
+++ b/test/keywordargs.jl
@@ -297,7 +297,7 @@ end
     @test_throws UndefKeywordError f34516()
     @test_throws UndefKeywordError f34516(1)
     g34516(@nospecialize(x); k=0) = 0
-    @test first(methods(Core.kwfunc(g34516))).nospecialize != 0
+    @test only(methods(Core.kwcall, (Any, typeof(g34516), Vararg))).nospecialize != 0
 end
 @testset "issue #21518" begin
     a = 0
diff --git a/test/llvmpasses/alloc-opt-unsized.ll b/test/llvmpasses/alloc-opt-unsized.ll
new file mode 100644
index 0000000000000..f7ea31fde6b05
--- /dev/null
+++ b/test/llvmpasses/alloc-opt-unsized.ll
@@ -0,0 +1,35 @@
+; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s
+
+source_filename = "text"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
+target triple = "x86_64-linux-gnu"
+
+declare {}*** @julia.get_pgcstack()
+
+declare {} addrspace(10)* @julia.gc_alloc_obj({}**, i64, {} addrspace(10)*)
+
+declare void @julia.write_barrier({} addrspace(10)*, ...)
+
+define void @diffejulia_objective__1864_inner_1wrap({} addrspace(10)* %arg, i64 %iv.i) {
+entry:
+  %i5 = call {}*** @julia.get_pgcstack()
+  %i13 = bitcast {}*** %i5 to {}**
+  %i14 = getelementptr inbounds {}*, {}** %i13, i64 -12
+  %i18 = call noalias nonnull dereferenceable(8000) dereferenceable_or_null(8000) {} addrspace(10)* @julia.gc_alloc_obj({}** %i14, i64 8000, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 139756155247504 to {}*) to {} addrspace(10)*))
+  %_malloccache.i = bitcast {} addrspace(10)* %i18 to {} addrspace(10)* addrspace(10)*
+  %i23 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %_malloccache.i, i64 %iv.i
+  store {} addrspace(10)* %arg, {} addrspace(10)* addrspace(10)* %i23, align 8
+  %i24 = bitcast {} addrspace(10)* addrspace(10)* %_malloccache.i to {} addrspace(10)*
+  call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* %i24, {} addrspace(10)* %arg)
+  %l = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %i23
+  ret void
+}
+
+; CHECK:   %[[i0:.+]] = alloca {} addrspace(10)*, i64 1000, align 16
+; CHECK:   %[[i1:.+]] = bitcast {} addrspace(10)** %[[i0]] to i8*
+; CHECK:   %i18 = bitcast i8* %[[i1]] to {}*
+; CHECK:   %_malloccache.i = bitcast {}* %i18 to {} addrspace(10)**
+; CHECK:   %i23 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %_malloccache.i, i64 %iv.i
+; CHECK:   store {} addrspace(10)* %arg, {} addrspace(10)** %i23, align 8
+; CHECK:   %i24 = bitcast {} addrspace(10)** %_malloccache.i to {}*
+; CHECK:   %l = load {} addrspace(10)*, {} addrspace(10)** %i23, align 8
diff --git a/test/llvmpasses/late-lower-gc.ll b/test/llvmpasses/late-lower-gc.ll
index cbc7c1c6726a8..65a67c78d7810 100644
--- a/test/llvmpasses/late-lower-gc.ll
+++ b/test/llvmpasses/late-lower-gc.ll
@@ -148,6 +148,25 @@ define {} addrspace(10)* @gclift_switch({} addrspace(13)* addrspace(10)* %input,
   ret {} addrspace(10)* %ret
 }
 
+define void @decayar([2 x {} addrspace(10)* addrspace(11)*] %ar) {
+  %v2 = call {}*** @julia.get_pgcstack()
+  %e0 = extractvalue [2 x {} addrspace(10)* addrspace(11)*] %ar, 0
+  %l0 = load {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %e0
+  %e1 = extractvalue [2 x {} addrspace(10)* addrspace(11)*] %ar, 1
+  %l1 = load {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %e1
+  %r = call i32 @callee_root({} addrspace(10)* %l0, {} addrspace(10)* %l1) 
+  ret void
+}
+
+; CHECK-LABEL: @decayar
+; CHECK:  %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2)
+; CHECK:  %1 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 1)
+; CHECK:  store {} addrspace(10)* %l0, {} addrspace(10)** %1, align 8
+; CHECK:  %2 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 0)
+; CHECK: store {} addrspace(10)* %l1, {} addrspace(10)** %2, align 8
+; CHECK: %r = call i32 @callee_root({} addrspace(10)* %l0, {} addrspace(10)* %l1)
+; CHECK: call void @julia.pop_gc_frame({} addrspace(10)** %gcframe)
+
 !0 = !{i64 0, i64 23}
 !1 = !{!1}
 !2 = !{!7} ; scope list
diff --git a/test/operators.jl b/test/operators.jl
index 7ca958aa24fa3..6a93f70cc21f0 100644
--- a/test/operators.jl
+++ b/test/operators.jl
@@ -184,7 +184,7 @@ end
     @test (@inferred g(1)) == ntuple(Returns(1), 13)
     h = (-) ∘ (-) ∘ (-) ∘ (-) ∘ (-) ∘ (-) ∘ sum
     @test (@inferred h((1, 2, 3); init = 0.0)) == 6.0
-    issue_45877 = reduce(∘, fill(sin,500))
+    issue_45877 = reduce(∘, fill(sin, 50))
     @test Core.Compiler.is_foldable(Base.infer_effects(Base.unwrap_composed, (typeof(issue_45877),)))
     @test fully_eliminated() do
         issue_45877(1.0)
diff --git a/test/precompile.jl b/test/precompile.jl
index fc73231a3e308..098d1ffbba231 100644
--- a/test/precompile.jl
+++ b/test/precompile.jl
@@ -159,10 +159,9 @@ precompile_test_harness(false) do dir
               # issue 16529 (adding a method to a type with no instances)
               (::Task)(::UInt8, ::UInt16, ::UInt32) = 2
 
-              # issue 16471 (capturing references to a kwfunc)
-              Test.@test !isdefined(typeof(sin).name.mt, :kwsorter)
+              # issue 16471
               Base.sin(::UInt8, ::UInt16, ::UInt32; x = 52) = x
-              const sinkw = Core.kwfunc(Base.sin)
+              const sinkw = Core.kwcall
 
               # issue 16908 (some complicated types and external method definitions)
               abstract type CategoricalPool{T, R <: Integer, V} end
@@ -253,9 +252,6 @@ precompile_test_harness(false) do dir
               Base.@ccallable Cint f35014(x::Cint) = x+Cint(1)
           end
           """)
-    # make sure `sin` didn't have a kwfunc (which would invalidate the attempted test)
-    @test !isdefined(typeof(sin).name.mt, :kwsorter)
-
     # Issue #12623
     @test __precompile__(false) === nothing
 
@@ -387,7 +383,7 @@ precompile_test_harness(false) do dir
         @test current_task()(0x01, 0x4000, 0x30031234) == 2
         @test sin(0x01, 0x4000, 0x30031234) == 52
         @test sin(0x01, 0x4000, 0x30031234; x = 9142) == 9142
-        @test Foo.sinkw === Core.kwfunc(Base.sin)
+        @test Foo.sinkw === Core.kwcall
 
         @test Foo.NominalValue() == 1
         @test Foo.OrdinalValue() == 1
@@ -642,16 +638,11 @@ precompile_test_harness("code caching") do dir
     msize = which(size, (Vector{<:Any},))
     hasspec = false
     for i = 1:length(msize.specializations)
-        if isassigned(msize.specializations, i)
-            mi = msize.specializations[i]
-            if isa(mi, Core.MethodInstance)
-                tt = Base.unwrap_unionall(mi.specTypes)
-                if tt.parameters[2] == Vector{Cacheb8321416e8a3e2f1.X}
-                    if isdefined(mi, :cache) && isa(mi.cache, Core.CodeInstance) && mi.cache.max_world == typemax(UInt) && mi.cache.inferred !== nothing
-                        hasspec = true
-                        break
-                    end
-                end
+        mi = msize.specializations[i]
+        if isa(mi, Core.MethodInstance) && mi.specTypes == Tuple{typeof(size),Vector{Cacheb8321416e8a3e2f1.X}}
+            if isdefined(mi, :cache) && isa(mi.cache, Core.CodeInstance) && mi.cache.max_world == typemax(UInt) && mi.cache.inferred !== nothing
+                hasspec = true
+                break
             end
         end
     end
@@ -671,7 +662,7 @@ precompile_test_harness("code caching") do dir
     # Check that internal methods and their roots are accounted appropriately
     minternal = which(M.getelsize, (Vector,))
     mi = minternal.specializations[1]
-    @test Base.unwrap_unionall(mi.specTypes).parameters[2] == Vector{Int32}
+    @test mi.specTypes == Tuple{typeof(M.getelsize),Vector{Int32}}
     ci = mi.cache
     @test ci.relocatability == 1
     @test ci.inferred !== nothing
@@ -787,7 +778,7 @@ precompile_test_harness("code caching") do dir
         end
     end
 
-    # Invalidations (this test is adapted from from SnoopCompile)
+    # Invalidations (this test is adapted from SnoopCompile)
     function hasvalid(mi, world)
         isdefined(mi, :cache) || return false
         ci = mi.cache
@@ -898,26 +889,26 @@ precompile_test_harness("code caching") do dir
 
     # Reporting test
     @test all(i -> isassigned(invalidations, i), eachindex(invalidations))
-    idxs = findall(==("insert_backedges"), invalidations)
     m = only(methods(MB.call_nbits))
-    idxsbits = filter(idxs) do i
-        mi = invalidations[i-1]
-        mi.def == m
-    end
-    idx = only(idxsbits)
     for mi in m.specializations
         mi === nothing && continue
         hv = hasvalid(mi, world)
         @test mi.specTypes.parameters[end] === Integer ? !hv : hv
     end
 
+    setglobal!(Main, :inval, invalidations)
+    idxs = findall(==("verify_methods"), invalidations)
+    idxsbits = filter(idxs) do i
+        mi = invalidations[i-1]
+        mi.def == m
+    end
+    idx = only(idxsbits)
     tagbad = invalidations[idx+1]
-    buildid = invalidations[idx+2]
-    @test isa(buildid, UInt64)
+    @test isa(tagbad, Int32)
     j = findfirst(==(tagbad), invalidations)
-    @test invalidations[j+1] == buildid
-    @test isa(invalidations[j-2], Type)
     @test invalidations[j-1] == "insert_backedges_callee"
+    @test isa(invalidations[j-2], Type)
+    @test isa(invalidations[j+1], Vector{Any}) # [nbits(::UInt8)]
 
     m = only(methods(MB.map_nbits))
     @test !hasvalid(m.specializations[1], world+1) # insert_backedges invalidations also trigger their backedges
diff --git a/test/reduce.jl b/test/reduce.jl
index c03013f880013..84d93b12913e4 100644
--- a/test/reduce.jl
+++ b/test/reduce.jl
@@ -160,12 +160,14 @@ plus(x,y) = x + y
 sum3(A) = reduce(plus, A)
 sum4(itr) = invoke(reduce, Tuple{Function, Any}, plus, itr)
 sum5(A) = reduce(plus, A; init=0)
-sum6(itr) = invoke(Core.kwfunc(reduce), Tuple{NamedTuple{(:init,), Tuple{Int}}, typeof(reduce), Function, Any}, (init=0,), reduce, plus, itr)
+sum6(itr) = invoke(Core.kwcall, Tuple{NamedTuple{(:init,), Tuple{Int}}, typeof(reduce), Function, Any}, (init=0,), reduce, plus, itr)
+sum61(itr) = invoke(reduce, Tuple{Function, Any}, init=0, plus, itr)
 sum7(A) = mapreduce(x->x, plus, A)
 sum8(itr) = invoke(mapreduce, Tuple{Function, Function, Any}, x->x, plus, itr)
 sum9(A) = mapreduce(x->x, plus, A; init=0)
-sum10(itr) = invoke(Core.kwfunc(mapreduce), Tuple{NamedTuple{(:init,),Tuple{Int}}, typeof(mapreduce), Function, Function, Any}, (init=0,), mapreduce, x->x, plus, itr)
-for f in (sum2, sum5, sum6, sum9, sum10)
+sum10(itr) = invoke(Core.kwcall, Tuple{NamedTuple{(:init,),Tuple{Int}}, typeof(mapreduce), Function, Function, Any}, (init=0,), mapreduce, x->x, plus, itr)
+sum11(itr) = invoke(mapreduce, Tuple{Function, Function, Any}, init=0, x->x, plus, itr)
+for f in (sum2, sum5, sum6, sum61, sum9, sum10, sum11)
     @test sum(z) == f(z)
     @test sum(Int[]) == f(Int[]) == 0
     @test sum(Int[7]) == f(Int[7]) == 7
diff --git a/test/runtests.jl b/test/runtests.jl
index 4c9ac1cfd869c..3227804cf7b47 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -127,7 +127,7 @@ cd(@__DIR__) do
     println("""
         Running parallel tests with:
           nworkers() = $(nworkers())
-          nthreads() = $(Threads.nthreads())
+          nthreads() = $(Threads.threadpoolsize())
           Sys.CPU_THREADS = $(Sys.CPU_THREADS)
           Sys.total_memory() = $(Base.format_bytes(Sys.total_memory()))
           Sys.free_memory() = $(Base.format_bytes(Sys.free_memory()))
diff --git a/test/show.jl b/test/show.jl
index 30c06817bff54..459a5d5ba3abb 100644
--- a/test/show.jl
+++ b/test/show.jl
@@ -2036,21 +2036,17 @@ let src = code_typed(my_fun28173, (Int,), debuginfo=:source)[1][1]
     lines2 = split(repr(ir), '\n')
     @test all(isspace, pop!(lines2))
     @test popfirst!(lines2) == "2  1 ──       $(QuoteNode(1))"
-    @test popfirst!(lines2) == "   │          $(QuoteNode(2))" # TODO: this should print after the next statement
     let line1 = popfirst!(lines1)
         line2 = popfirst!(lines2)
         @test startswith(line1, "2  1 ── ")
         @test startswith(line2, "   │    ")
         @test line2[12:end] == line2[12:end]
     end
-    let line1 = pop!(lines1)
-        line2 = pop!(lines2)
-        @test startswith(line1, "17 ")
-        @test startswith(line2, "   ")
-        @test line1[3:end] == line2[3:end]
-    end
-    @test pop!(lines2) == "   │          \$(QuoteNode(4))"
-    @test pop!(lines2) == "17 │          \$(QuoteNode(3))" # TODO: this should print after the next statement
+    @test popfirst!(lines2) == "   │          $(QuoteNode(2))"
+    @test pop!(lines2) == "   └───       \$(QuoteNode(4))"
+    @test pop!(lines1) == "17 └───       return %18"
+    @test pop!(lines2) == "   │          return %18"
+    @test pop!(lines2) == "17 │          \$(QuoteNode(3))"
     @test lines1 == lines2
 
     # verbose linetable
@@ -2479,3 +2475,119 @@ end
     ir = Core.Compiler.complete(compact)
     @test lines_shown(compact) == instructions + 1
 end
+
+@testset "#46424: IncrementalCompact displays wrong basic-block boundaries" begin
+    # get some cfg
+    function foo(i)
+        j = i+42
+        j == 1 ? 1 : 2
+    end
+    ir = only(Base.code_ircode(foo, (Int,)))[1]
+
+    # at every point we should be able to observe these three basic blocks
+    function verify_display(ir)
+        str = sprint(io->show(io, ir))
+        @test contains(str, "1 ─ %1 = ")
+        @test contains(str, r"2 ─ \s+ return 1")
+        @test contains(str, r"3 ─ \s+ return 2")
+    end
+    verify_display(ir)
+
+    # insert some instructions
+    for i in 1:3
+        inst = Core.Compiler.NewInstruction(Expr(:call, :identity, i), Int)
+        Core.Compiler.insert_node!(ir, 2, inst)
+    end
+
+    # compact
+    compact = Core.Compiler.IncrementalCompact(ir)
+    verify_display(compact)
+
+    # Compact the first instruction
+    state = Core.Compiler.iterate(compact)
+
+    # Insert some instructions here
+    for i in 1:2
+        inst = Core.Compiler.NewInstruction(Expr(:call, :identity, i), Int, Int32(1))
+        Core.Compiler.insert_node_here!(compact, inst)
+        verify_display(compact)
+    end
+
+    while state !== nothing
+        state = Core.Compiler.iterate(compact, state[2])
+        verify_display(compact)
+    end
+
+    # complete
+    ir = Core.Compiler.complete(compact)
+    verify_display(ir)
+end
+
+@testset "IRCode: CFG display" begin
+    # get a cfg
+    function foo(i)
+        j = i+42
+        j == 1 ? 1 : 2
+    end
+    ir = only(Base.code_ircode(foo, (Int,)))[1]
+    cfg = ir.cfg
+
+    str = sprint(io->show(io, cfg))
+    @test contains(str, r"CFG with \d+ blocks")
+    @test contains(str, r"bb 1 \(stmt.+\) → bb.*")
+end
+
+@testset "IncrementalCompact: correctly display attach-after nodes" begin
+    # set some IR
+    function foo(i)
+        j = i+42
+        return j
+    end
+    ir = only(Base.code_ircode(foo, (Int,)))[1]
+
+    # insert a bunch of nodes, inserting both before and after instruction 1
+    inst = Core.Compiler.NewInstruction(Expr(:call, :identity, 1), Int)
+    Core.Compiler.insert_node!(ir, 1, inst)
+    inst = Core.Compiler.NewInstruction(Expr(:call, :identity, 2), Int)
+    Core.Compiler.insert_node!(ir, 1, inst)
+    inst = Core.Compiler.NewInstruction(Expr(:call, :identity, 3), Int)
+    Core.Compiler.insert_node!(ir, 1, inst, true)
+    inst = Core.Compiler.NewInstruction(Expr(:call, :identity, 4), Int)
+    Core.Compiler.insert_node!(ir, 1, inst, true)
+
+    # at every point we should be able to observe these instructions (in order)
+    function verify_display(ir)
+        str = sprint(io->show(io, ir))
+        lines = split(str, '\n')
+        patterns = ["identity(1)",
+                    "identity(2)",
+                    "add_int",
+                    "identity(3)",
+                    "identity(4)",
+                    "return"]
+        line_idx = 1
+        pattern_idx = 1
+        while pattern_idx <= length(patterns) && line_idx <= length(lines)
+            # we test pattern-per-pattern, in order,
+            # so that we skip e.g. the compaction boundary
+            if contains(lines[line_idx], patterns[pattern_idx])
+                pattern_idx += 1
+            end
+            line_idx += 1
+        end
+        @test pattern_idx > length(patterns)
+    end
+    verify_display(ir)
+
+    compact = Core.Compiler.IncrementalCompact(ir)
+    verify_display(compact)
+
+    state = Core.Compiler.iterate(compact)
+    while state !== nothing
+        verify_display(compact)
+        state = Core.Compiler.iterate(compact, state[2])
+    end
+
+    ir = Core.Compiler.complete(compact)
+    verify_display(ir)
+end
diff --git a/test/sorting.jl b/test/sorting.jl
index 9766ee99ce751..acb628406581e 100644
--- a/test/sorting.jl
+++ b/test/sorting.jl
@@ -78,6 +78,14 @@ end
     @test sort(Union{}[]) == Union{}[] # issue #45280
 end
 
+@testset "stability" begin
+    for Alg in [InsertionSort, MergeSort, QuickSort, Base.Sort.AdaptiveSort, Base.DEFAULT_STABLE,
+        PartialQuickSort(missing, 1729), PartialQuickSort(1729, missing)]
+        @test issorted(sort(1:2000, alg=Alg, by=x->0))
+        @test issorted(sort(1:2000, alg=Alg, by=x->x÷100))
+    end
+end
+
 @testset "partialsort" begin
     @test partialsort([3,6,30,1,9],3) == 6
     @test partialsort([3,6,30,1,9],3:4) == [6,9]
@@ -120,9 +128,11 @@ Base.step(r::ConstantRange) = 0
     @test searchsortedlast(r, 1.0, Forward) == 5
     @test searchsortedlast(r, 1, Forward) == 5
     @test searchsortedlast(r, UInt(1), Forward) == 5
+end
 
+@testset "Each sorting algorithm individually" begin
     a = rand(1:10000, 1000)
-    for alg in [InsertionSort, MergeSort, Base.DEFAULT_STABLE]
+    for alg in [InsertionSort, MergeSort, QuickSort, Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE]
 
         b = sort(a, alg=alg)
         @test issorted(b)
@@ -187,18 +197,16 @@ Base.step(r::ConstantRange) = 0
         @test b == c
     end
 
-    @testset "unstable algorithms" begin
-        for alg in [QuickSort, Base.DEFAULT_UNSTABLE]
-            b = sort(a, alg=alg)
-            @test issorted(b)
-            @test last(b) == last(sort(a, alg=PartialQuickSort(length(a))))
-            b = sort(a, alg=alg, rev=true)
-            @test issorted(b, rev=true)
-            @test last(b) == last(sort(a, alg=PartialQuickSort(length(a)), rev=true))
-            b = sort(a, alg=alg, by=x->1/x)
-            @test issorted(b, by=x->1/x)
-            @test last(b) == last(sort(a, alg=PartialQuickSort(length(a)), by=x->1/x))
-        end
+    @testset "PartialQuickSort" begin
+        b = sort(a)
+        @test issorted(b)
+        @test last(b) == last(sort(a, alg=PartialQuickSort(length(a))))
+        b = sort(a, rev=true)
+        @test issorted(b, rev=true)
+        @test last(b) == last(sort(a, alg=PartialQuickSort(length(a)), rev=true))
+        b = sort(a, by=x->1/x)
+        @test issorted(b, by=x->1/x)
+        @test last(b) == last(sort(a, alg=PartialQuickSort(length(a)), by=x->1/x))
     end
 end
 @testset "insorted" begin
@@ -259,8 +267,8 @@ end
 @testset "PartialQuickSort" begin
     a = rand(1:10000, 1000)
     # test PartialQuickSort only does a partial sort
-    let alg = PartialQuickSort(1:div(length(a), 10))
-        k = alg.k
+    let k = 1:div(length(a), 10)
+        alg = PartialQuickSort(k)
         b = sort(a, alg=alg)
         c = sort(a, alg=alg, by=x->1/x)
         d = sort(a, alg=alg, rev=true)
@@ -271,8 +279,8 @@ end
         @test !issorted(c, by=x->1/x)
         @test !issorted(d, rev=true)
     end
-    let alg = PartialQuickSort(div(length(a), 10))
-        k = alg.k
+    let k = div(length(a), 10)
+        alg = PartialQuickSort(k)
         b = sort(a, alg=alg)
         c = sort(a, alg=alg, by=x->1/x)
         d = sort(a, alg=alg, rev=true)
@@ -289,6 +297,7 @@ end
     @test partialsortperm([3,6,30,1,9], 2, rev=true) == 5
     @test partialsortperm([3,6,30,1,9], 2, by=x->1/x) == 5
 end
+
 ## more advanced sorting tests ##
 
 randnans(n) = reinterpret(Float64,[rand(UInt64)|0x7ff8000000000000 for i=1:n])
@@ -324,7 +333,7 @@ end
             @test c == v
 
             # stable algorithms
-            for alg in [MergeSort, Base.DEFAULT_STABLE]
+            for alg in [MergeSort, QuickSort, PartialQuickSort(1:n), Base.DEFAULT_STABLE]
                 p = sortperm(v, alg=alg, rev=rev)
                 p2 = sortperm(float(v), alg=alg, rev=rev)
                 @test p == p2
@@ -334,6 +343,10 @@ end
                 @test s == si
                 invpermute!(s, p)
                 @test s == v
+
+                # Ensure stability, even with reverse short circuit
+                @test all(sort!(Real[fill(2.0, 15); fill(2, 15); fill(1.0, 15); fill(1, 15)])
+                           .=== Real[fill(1.0, 15); fill(1, 15); fill(2.0, 15); fill(2, 15)])
             end
 
             # unstable algorithms
@@ -368,8 +381,7 @@ end
         end
 
         v = randn_with_nans(n,0.1)
-        # TODO: alg = PartialQuickSort(n) fails here
-        for alg in [InsertionSort, QuickSort, MergeSort, Base.DEFAULT_UNSTABLE, Base.DEFAULT_STABLE],
+        for alg in [InsertionSort, MergeSort, QuickSort, PartialQuickSort(n), Base.DEFAULT_UNSTABLE, Base.DEFAULT_STABLE],
             rev in [false,true]
             alg === InsertionSort && n >= 3000 && continue
             # test float sorting with NaNs
@@ -431,7 +443,7 @@ end
         @test all(issorted, [sp[inds.==x] for x in 1:200])
     end
 
-    for alg in [InsertionSort, MergeSort, Base.DEFAULT_STABLE]
+    for alg in [InsertionSort, MergeSort, QuickSort, Base.DEFAULT_STABLE]
         sp = sortperm(inds, alg=alg)
         @test all(issorted, [sp[inds.==x] for x in 1:200])
     end
@@ -682,6 +694,52 @@ end
     @test Base.Sort.UIntMappable(Union{Int, UInt}, Base.Forward) === nothing # issue #45280
 end
 
+@testset "invalid lt (#11429)" begin
+    # lt must be a total linear order (e.g. < not <=) so this usage is
+    # not allowed. Consequently, none of the behavior tested in this
+    # testset is gaurunteed to work in future minor versions of Julia.
+
+    n = 1000
+    v = rand(1:5, n);
+    s = sort(v);
+
+    # Nevertheless, it still works...
+    for alg in [InsertionSort, MergeSort, QuickSort,
+            Base.Sort.AdaptiveSort, Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE]
+        @test sort(v, alg=alg, lt = <=) == s
+    end
+    @test partialsort(v, 172, lt = <=) == s[172]
+    @test partialsort(v, 315:415, lt = <=) == s[315:415]
+
+    # ...and it is consistantly reverse stable. All these algorithms swap v[i] and v[j]
+    # where i < j if and only if lt(o, v[j], v[i]). This invariant holds even for
+    # this invalid lt order.
+    perm = reverse(sortperm(v, rev=true))
+    for alg in [InsertionSort, MergeSort, QuickSort,
+            Base.Sort.AdaptiveSort, Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE]
+        @test sort(1:n, alg=alg, lt = (i,j) -> v[i]<=v[j]) == perm
+    end
+    @test partialsort(1:n, 172, lt = (i,j) -> v[i]<=v[j]) == perm[172]
+    @test partialsort(1:n, 315:415, lt = (i,j) -> v[i]<=v[j]) == perm[315:415]
+
+    # lt can be very poorly behaved and sort will still permute its input in some way.
+    for alg in [InsertionSort, MergeSort, QuickSort,
+            Base.Sort.AdaptiveSort, Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE]
+        @test sort!(sort(v, alg=alg, lt = (x,y) -> rand([false, true]))) == s
+    end
+    @test partialsort(v, 172, lt = (x,y) -> rand([false, true])) ∈ 1:5
+    @test all(partialsort(v, 315:415, lt = (x,y) -> rand([false, true])) .∈ (1:5,))
+
+    # issue #32675
+    k = [38, 18, 38, 38, 3, 37, 26, 26, 6, 29, 38, 36, 38, 1, 38, 36, 38, 38, 38, 36, 36,
+        36, 28, 34, 35, 38, 25, 20, 38, 1, 1, 5, 38, 38, 3, 34, 16, 38, 4, 10, 35, 37, 38,
+        38, 2, 38, 25, 35, 38, 1, 35, 36, 20, 33, 36, 18, 38, 1, 24, 4, 38, 18, 12, 38, 34,
+        35, 36, 38, 26, 31, 36, 38, 38, 30, 36, 35, 35, 7, 22, 35, 38, 35, 30, 21, 37]
+    idx = sortperm(k; lt=!isless)
+    @test issorted(k[idx], rev=true)
+end
+
+# This testset is at the end of the file because it is slow
 @testset "sort(x; buffer)" begin
     for n in [1,10,100,1000]
         v = rand(n)
diff --git a/test/spawn.jl b/test/spawn.jl
index a8a2af40643ff..0241c65573886 100644
--- a/test/spawn.jl
+++ b/test/spawn.jl
@@ -5,7 +5,7 @@
 ###################################
 
 using Random, Sockets
-using Downloads: download
+using Downloads: Downloads, download
 
 valgrind_off = ccall(:jl_running_on_valgrind, Cint, ()) == 0
 
@@ -20,8 +20,33 @@ shcmd = `sh`
 sleepcmd = `sleep`
 lscmd = `ls`
 havebb = false
+
+function _tryonce_download_from_cache(desired_url::AbstractString)
+    cache_url = "https://cache.julialang.org/foo/$(desired_url)"
+    cache_output_filename = joinpath(mktempdir(), "myfile")
+    cache_response = Downloads.request(
+        cache_url;
+        output = cache_output_filename,
+        throw = false,
+        timeout = 60,
+    )
+    if cache_response isa Downloads.Response
+        if Downloads.status_ok(cache_response.proto, cache_response.status)
+            return cache_output_filename
+        end
+    end
+    return Downloads.download(desired_url; timeout = 60)
+end
+
+function download_from_cache(desired_url::AbstractString)
+    f = () -> _tryonce_download_from_cache(desired_url)
+    delays = Float64[30, 30, 60, 60, 60]
+    g = retry(f; delays)
+    return g()
+end
+
 if Sys.iswindows()
-    busybox = download("https://cache.julialang.org/https://frippery.org/files/busybox/busybox.exe", joinpath(tempdir(), "busybox.exe"))
+    busybox = download_from_cache("https://frippery.org/files/busybox/busybox.exe")
     havebb = try # use busybox-w32 on windows, if available
         success(`$busybox`)
         true
diff --git a/test/subarray.jl b/test/subarray.jl
index 98335cb257110..884a36670a31e 100644
--- a/test/subarray.jl
+++ b/test/subarray.jl
@@ -288,7 +288,8 @@ if testfull
 end
 
 let B = copy(reshape(1:13^3, 13, 13, 13))
-    @testset "spot checks: $oind" for oind in ((:,:,:),
+    @testset "spot checks: $oind" for oind in (
+                 (:,:,:),
                  (:,:,6),
                  (:,6,:),
                  (6,:,:),
@@ -296,7 +297,6 @@ let B = copy(reshape(1:13^3, 13, 13, 13))
                  (3:7,:,:),
                  (3:7,6,:),
                  (3:7,6,0x6),
-                 (6,UInt(3):UInt(7),3:7),
                  (13:-2:1,:,:),
                  ([8,4,6,12,5,7],:,3:7),
                  (6,CartesianIndex.(6,[8,4,6,12,5,7])),
@@ -307,7 +307,29 @@ let B = copy(reshape(1:13^3, 13, 13, 13))
                  (3,reshape(2:11,5,2),4),
                  (3,reshape(2:2:13,3,2),4),
                  (view(1:13,[9,12,4,13,1]),2:6,4),
-                 ([1:5 2:6 3:7 4:8 5:9], :, 3))
+                 ([1:5 2:6 3:7 4:8 5:9], :, 3),
+        )
+        runsubarraytests(B, oind...)
+        viewB = view(B, oind...)
+        runviews(viewB, index5, index25, index125)
+    end
+end
+
+let B = copy(reshape(1:13^3, 13, 13, 13))
+    @testset "spot checks (other BitIntegers): $oind" for oind in (
+                 (:,:,0x6),
+                 (:,0x00000006,:),
+                 (0x0006,:,:),
+                 (:,0x00000003:0x00000007,:),
+                 (0x0000000000000003:0x0000000000000007,:,:),
+                 (0x0003:0x0007,0x6,:),
+                 (6,UInt(3):UInt(7),3:7),
+                 (Int16(3):Int16(7),Int16(6),:),
+                 (CartesianIndex(0xD,0x6),UInt8[8,4,6,12,5,7]),
+                 (Int8(1),:,view(1:13,[9,12,4,13,1])),
+                 (view(1:13,Int16[9,12,4,13,1]),UInt8(2):UInt16(6),Int8(4)),
+                 (Int8[1:5 2:6 3:7 4:8 5:9],:,UInt64(3)),
+        )
         runsubarraytests(B, oind...)
         viewB = view(B, oind...)
         runviews(viewB, index5, index25, index125)
diff --git a/test/subtype.jl b/test/subtype.jl
index 9a4a5cce5e323..23aabf38e4fa1 100644
--- a/test/subtype.jl
+++ b/test/subtype.jl
@@ -2249,3 +2249,5 @@ T46784{B<:Val, M<:AbstractMatrix} = Tuple{<:Union{B, <:Val{<:B}}, M, Union{Abstr
     # issue 21153
     @test_broken (Tuple{T1,T1} where T1<:(Val{T2} where T2)) <: (Tuple{Val{S},Val{S}} where S)
 end
+
+@test !(Tuple{Any, Any, Any} <: Tuple{Any, Vararg{T}} where T)
diff --git a/test/syntax.jl b/test/syntax.jl
index 2acfe131eea1d..47f09b32ab914 100644
--- a/test/syntax.jl
+++ b/test/syntax.jl
@@ -848,6 +848,14 @@ end
 @test c8925 == 3 && isconst(@__MODULE__, :c8925)
 @test d8925 == 4 && isconst(@__MODULE__, :d8925)
 
+# issue #47168
+let t47168 = (;a47168 = 1, b47168 = 2);
+    global const (;a47168, b47168) = t47168
+    @test a47168 == 1 && isconst(@__MODULE__, :a47168)
+    @test b47168 == 2 && isconst(@__MODULE__, :b47168)
+end
+@test (let x = (;x=1); let (;x) = x; x; end, x; end) == (1, (x = 1,))
+
 # issue #18754: parse ccall as a regular function
 @test Meta.parse("ccall([1], 2)[3]") == Expr(:ref, Expr(:call, :ccall, Expr(:vect, 1), 2), 3)
 @test Meta.parse("ccall(a).member") == Expr(:., Expr(:call, :ccall, :a), QuoteNode(:member))
diff --git a/test/testdefs.jl b/test/testdefs.jl
index 0f8ef610d02c8..4aac988cda7fb 100644
--- a/test/testdefs.jl
+++ b/test/testdefs.jl
@@ -57,7 +57,11 @@ function runtests(name, path, isolate=true; seed=nothing)
                     testset_name = name,
                     testset_path = path,
                 )
-                error(msg)
+                throw_error_str = get(ENV, "JULIA_TEST_CHECK_MUTATED_ENV", "true")
+                throw_error_b = parse(Bool, throw_error_str)
+                if throw_error_b
+                    error(msg)
+                end
             end
         end
         rss = Sys.maxrss()
diff --git a/test/threads.jl b/test/threads.jl
index 09e802757062b..fb684b275e864 100644
--- a/test/threads.jl
+++ b/test/threads.jl
@@ -124,7 +124,7 @@ end
 
 function get_nthreads(options = ``; cpus = nothing)
     cmd = `$(Base.julia_cmd()) --startup-file=no $(options)`
-    cmd = `$cmd -e "print(Threads.nthreads())"`
+    cmd = `$cmd -e "print(Threads.threadpoolsize())"`
     cmd = addenv(cmd, "JULIA_EXCLUSIVE" => "0", "JULIA_NUM_THREADS" => "auto")
     if cpus !== nothing
         cmd = setcpuaffinity(cmd, cpus)
diff --git a/test/threads_exec.jl b/test/threads_exec.jl
index 4bce3ebd71b41..68ba9377cf955 100644
--- a/test/threads_exec.jl
+++ b/test/threads_exec.jl
@@ -2,7 +2,7 @@
 
 using Test
 using Base.Threads
-using Base.Threads: SpinLock
+using Base.Threads: SpinLock, threadpoolsize
 
 # for cfunction_closure
 include("testenv.jl")
@@ -27,9 +27,12 @@ end
 # (expected test duration is about 18-180 seconds)
 Timer(t -> killjob("KILLING BY THREAD TEST WATCHDOG\n"), 1200)
 
+@test Threads.threadid() == 1
+@test 1 <= threadpoolsize() <= Threads.maxthreadid()
+
 # basic lock check
-if nthreads() > 1
-    let lk = Base.Threads.SpinLock()
+if threadpoolsize() > 1
+    let lk = SpinLock()
         c1 = Base.Event()
         c2 = Base.Event()
         @test trylock(lk)
@@ -50,7 +53,7 @@ end
 
 # threading constructs
 
-let a = zeros(Int, 2 * nthreads())
+let a = zeros(Int, 2 * threadpoolsize())
     @threads for i = 1:length(a)
         @sync begin
             @async begin
@@ -70,7 +73,7 @@ end
 
 # parallel loop with parallel atomic addition
 function threaded_loop(a, r, x)
-    counter = Threads.Atomic{Int}(min(Threads.nthreads(), length(r)))
+    counter = Threads.Atomic{Int}(min(threadpoolsize(), length(r)))
     @threads for i in r
         # synchronize the start given that each partition is started sequentially,
         # meaning that without the wait, if the loop is too fast the iteration can happen in order
@@ -208,7 +211,7 @@ function threaded_gc_locked(::Type{LockT}) where LockT
 end
 
 threaded_gc_locked(SpinLock)
-threaded_gc_locked(Threads.ReentrantLock)
+threaded_gc_locked(ReentrantLock)
 
 # Issue 33159
 # Make sure that a Threads.Condition can't be used without being locked, on any thread.
@@ -423,7 +426,7 @@ end
 for T in intersect((Int32, Int64, Float32, Float64), Base.Threads.atomictypes)
     var = Atomic{T}()
     nloops = 1000
-    di = nthreads()
+    di = threadpoolsize()
     @threads for i in 1:di
         test_atomic_cas!(var, i:di:nloops)
     end
@@ -513,7 +516,7 @@ function test_thread_cfunction()
     @test cfs[1] == cf1
     @test cfs[2] == cf(fs[2])
     @test length(unique(cfs)) == 1000
-    ok = zeros(Int, nthreads())
+    ok = zeros(Int, threadpoolsize())
     @threads :static for i in 1:10000
         i = mod1(i, 1000)
         fi = fs[i]
@@ -529,14 +532,14 @@ if cfunction_closure
 end
 
 function test_thread_range()
-    a = zeros(Int, nthreads())
+    a = zeros(Int, threadpoolsize())
     @threads for i in 1:threadid()
         a[i] = 1
     end
     for i in 1:threadid()
         @test a[i] == 1
     end
-    for i in (threadid() + 1):nthreads()
+    for i in (threadid() + 1):threadpoolsize()
         @test a[i] == 0
     end
 end
@@ -576,17 +579,17 @@ test_nested_loops()
 
 function test_thread_too_few_iters()
     x = Atomic()
-    a = zeros(Int, nthreads()+2)
-    threaded_loop(a, 1:nthreads()-1, x)
-    found = zeros(Bool, nthreads()+2)
-    for i=1:nthreads()-1
+    a = zeros(Int, threadpoolsize()+2)
+    threaded_loop(a, 1:threadpoolsize()-1, x)
+    found = zeros(Bool, threadpoolsize()+2)
+    for i=1:threadpoolsize()-1
         found[a[i]] = true
     end
-    @test x[] == nthreads()-1
+    @test x[] == threadpoolsize()-1
     # Next test checks that all loop iterations ran,
     # and were unique (via pigeon-hole principle).
-    @test !(false in found[1:nthreads()-1])
-    @test !(true in found[nthreads():end])
+    @test !(false in found[1:threadpoolsize()-1])
+    @test !(true in found[threadpoolsize():end])
 end
 test_thread_too_few_iters()
 
@@ -728,10 +731,10 @@ function _atthreads_with_error(a, err)
     end
     a
 end
-@test_throws CompositeException _atthreads_with_error(zeros(nthreads()), true)
-let a = zeros(nthreads())
+@test_throws CompositeException _atthreads_with_error(zeros(threadpoolsize()), true)
+let a = zeros(threadpoolsize())
     _atthreads_with_error(a, false)
-    @test a == [1:nthreads();]
+    @test a == [1:threadpoolsize();]
 end
 
 # static schedule
@@ -742,11 +745,11 @@ function _atthreads_static_schedule(n)
     end
     return ids
 end
-@test _atthreads_static_schedule(nthreads()) == 1:nthreads()
+@test _atthreads_static_schedule(threadpoolsize()) == 1:threadpoolsize()
 @test _atthreads_static_schedule(1) == [1;]
 @test_throws(
     "`@threads :static` cannot be used concurrently or nested",
-    @threads(for i = 1:1; _atthreads_static_schedule(nthreads()); end),
+    @threads(for i = 1:1; _atthreads_static_schedule(threadpoolsize()); end),
 )
 
 # dynamic schedule
@@ -759,35 +762,35 @@ function _atthreads_dynamic_schedule(n)
     end
     return inc[], flags
 end
-@test _atthreads_dynamic_schedule(nthreads()) == (nthreads(), ones(nthreads()))
+@test _atthreads_dynamic_schedule(threadpoolsize()) == (threadpoolsize(), ones(threadpoolsize()))
 @test _atthreads_dynamic_schedule(1) == (1, ones(1))
 @test _atthreads_dynamic_schedule(10) == (10, ones(10))
-@test _atthreads_dynamic_schedule(nthreads() * 2) == (nthreads() * 2, ones(nthreads() * 2))
+@test _atthreads_dynamic_schedule(threadpoolsize() * 2) == (threadpoolsize() * 2, ones(threadpoolsize() * 2))
 
 # nested dynamic schedule
 function _atthreads_dynamic_dynamic_schedule()
     inc = Threads.Atomic{Int}(0)
-    Threads.@threads :dynamic for _ = 1:nthreads()
-        Threads.@threads :dynamic for _ = 1:nthreads()
+    Threads.@threads :dynamic for _ = 1:threadpoolsize()
+        Threads.@threads :dynamic for _ = 1:threadpoolsize()
             Threads.atomic_add!(inc, 1)
         end
     end
     return inc[]
 end
-@test _atthreads_dynamic_dynamic_schedule() == nthreads() * nthreads()
+@test _atthreads_dynamic_dynamic_schedule() == threadpoolsize() * threadpoolsize()
 
 function _atthreads_static_dynamic_schedule()
-    ids = zeros(Int, nthreads())
+    ids = zeros(Int, threadpoolsize())
     inc = Threads.Atomic{Int}(0)
-    Threads.@threads :static for i = 1:nthreads()
+    Threads.@threads :static for i = 1:threadpoolsize()
         ids[i] = Threads.threadid()
-        Threads.@threads :dynamic for _ = 1:nthreads()
+        Threads.@threads :dynamic for _ = 1:threadpoolsize()
             Threads.atomic_add!(inc, 1)
         end
     end
     return ids, inc[]
 end
-@test _atthreads_static_dynamic_schedule() == (1:nthreads(), nthreads() * nthreads())
+@test _atthreads_static_dynamic_schedule() == (1:threadpoolsize(), threadpoolsize() * threadpoolsize())
 
 # errors inside @threads :dynamic
 function _atthreads_dynamic_with_error(a)
@@ -796,7 +799,7 @@ function _atthreads_dynamic_with_error(a)
     end
     a
 end
-@test_throws "user error in the loop body" _atthreads_dynamic_with_error(zeros(nthreads()))
+@test_throws "user error in the loop body" _atthreads_dynamic_with_error(zeros(threadpoolsize()))
 
 try
     @macroexpand @threads(for i = 1:10, j = 1:10; end)
@@ -1025,7 +1028,7 @@ function check_sync_end_race()
                 nnotscheduled += y === :notscheduled
             end
             # Useful for tuning the test:
-            @debug "`check_sync_end_race` done" nthreads() ncompleted nnotscheduled nerror
+            @debug "`check_sync_end_race` done" threadpoolsize() ncompleted nnotscheduled nerror
         finally
             done[] = true
         end
@@ -1039,21 +1042,21 @@ end
 
 # issue #41546, thread-safe package loading
 @testset "package loading" begin
-    ch = Channel{Bool}(nthreads())
+    ch = Channel{Bool}(threadpoolsize())
     barrier = Base.Event()
     old_act_proj = Base.ACTIVE_PROJECT[]
     try
         pushfirst!(LOAD_PATH, "@")
         Base.ACTIVE_PROJECT[] = joinpath(@__DIR__, "TestPkg")
         @sync begin
-            for _ in 1:nthreads()
+            for _ in 1:threadpoolsize()
                 Threads.@spawn begin
                     put!(ch, true)
                     wait(barrier)
                     @eval using TestPkg
                 end
             end
-            for _ in 1:nthreads()
+            for _ in 1:threadpoolsize()
                 take!(ch)
             end
             notify(barrier)
diff --git a/test/tuple.jl b/test/tuple.jl
index 39491a249f696..9bd7d3fb57963 100644
--- a/test/tuple.jl
+++ b/test/tuple.jl
@@ -757,3 +757,25 @@ g42457(a, b) = Base.isequal(a, b) ? 1 : 2.0
 
 # issue #46049: setindex(::Tuple) regression
 @inferred Base.setindex((1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16), 42, 1)
+
+# issue #47326
+function fun1_47326(args...)
+    head..., tail = args
+    head
+end
+function fun2_47326(args...)
+    head, tail... = args
+    tail
+end
+@test @inferred(fun1_47326(1,2,3)) === (1, 2)
+@test @inferred(fun2_47326(1,2,3)) === (2, 3)
+
+f47326(x::Union{Tuple, NamedTuple}) = Base.split_rest(x, 1)
+tup = (1, 2, 3)
+namedtup = (;a=1, b=2, c=3)
+@test only(Base.return_types(f47326, (typeof(tup),))) == Tuple{Tuple{Int, Int}, Tuple{Int}}
+@test only(Base.return_types(f47326, (typeof(namedtup),))) ==
+    Tuple{
+        NamedTuple{(:a, :b), Tuple{Int, Int}},
+        NamedTuple{(:c,), Tuple{Int}},
+    }
diff --git a/test/worlds.jl b/test/worlds.jl
index 3c60f006faef2..39a9dc4d9a788 100644
--- a/test/worlds.jl
+++ b/test/worlds.jl
@@ -355,7 +355,7 @@ inner(s::Union{Vector,Dict}; kw=false) = inneri(s, kwi=maximum(s), kwb=kw)
 inneri(s, args...; kwargs...) = inneri(IOBuffer(), s, args...; kwargs...)
 inneri(io::IO, s::Union{Vector,Dict}; kwi=0, kwb=false) = (print(io, first(s), " "^kwi, kwb); String(take!(io)))
 @test outer(Ref{Any}([1,2,3])) == "1   false"
-mi = method_instance(Core.kwfunc(inneri), (NamedTuple{(:kwi,:kwb),TT} where TT<:Tuple{Any,Bool}, typeof(inneri), Vector{T} where T))
+mi = method_instance(Core.kwcall, (NamedTuple{(:kwi,:kwb),TT} where TT<:Tuple{Any,Bool}, typeof(inneri), Vector{T} where T))
 w = worlds(mi)
 abstract type Container{T} end
 Base.eltype(::Type{C}) where {T,C<:Container{T}} = T