diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl
index 68b8394b72c3d..96355f2a6b5dd 100644
--- a/base/compiler/abstractinterpretation.jl
+++ b/base/compiler/abstractinterpretation.jl
@@ -47,223 +47,210 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
     matches = find_method_matches(interp, argtypes, atype; max_methods)
     if isa(matches, FailedMethodMatch)
         add_remark!(interp, sv, matches.reason)
-        return CallMeta(Any, Any, Effects(), NoCallInfo())
+        return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
     end
 
     (; valid_worlds, applicable, info) = matches
     update_valid_age!(sv, valid_worlds)
-    napplicable = length(applicable)
+
+    # final result
+    gfresult = Future{CallMeta}()
+    # intermediate work for computing gfresult
     rettype = exctype = Bottom
     edges = MethodInstance[]
     conditionals = nothing # keeps refinement information of call argument types when the return type is boolean
-    seen = 0               # number of signatures actually inferred
+    seenall = true
     const_results = nothing # or const_results::Vector{Union{Nothing,ConstResult}} if any const results are available
-    multiple_matches = napplicable > 1
     fargs = arginfo.fargs
     all_effects = EFFECTS_TOTAL
     slotrefinements = nothing # keeps refinement information on slot types obtained from call signature
 
-    for i in 1:napplicable
-        match = applicable[i]::MethodMatch
-        method = match.method
-        sig = match.spec_types
-        if bail_out_toplevel_call(interp, InferenceLoopState(sig, rettype, all_effects), sv)
-            # only infer concrete call sites in top-level expressions
-            add_remark!(interp, sv, "Refusing to infer non-concrete call site in top-level expression")
-            break
-        end
-        this_rt = Bottom
-        this_exct = Bottom
-        splitunions = false
-        # TODO: this used to trigger a bug in inference recursion detection, and is unmaintained now
-        # sigtuple = unwrap_unionall(sig)::DataType
-        # splitunions = 1 < unionsplitcost(sigtuple.parameters) * napplicable <= InferenceParams(interp).max_union_splitting
-        if splitunions
-            splitsigs = switchtupleunion(sig)
-            for sig_n in splitsigs
-                result = abstract_call_method(interp, method, sig_n, svec(), multiple_matches, si, sv)
-                (; rt, exct, edge, effects, volatile_inf_result) = result
+    # split the for loop off into a function, so that we can pause and restart it at will
+    i::Int = 1
+    f = Core.Box(f)
+    atype = Core.Box(atype)
+    function infercalls(interp, sv)
+        napplicable = length(applicable)
+        multiple_matches = napplicable > 1
+        while i <= napplicable
+            match = applicable[i]::MethodMatch
+            method = match.method
+            sig = match.spec_types
+            if bail_out_toplevel_call(interp, InferenceLoopState(sig, rettype, all_effects), sv)
+                # only infer concrete call sites in top-level expressions
+                add_remark!(interp, sv, "Refusing to infer non-concrete call site in top-level expression")
+                seenall = false
+                break
+            end
+            # TODO: this is unmaintained now as it didn't seem to improve things, though it does avoid hard-coding the union split at the higher level,
+            # it also can hurt infer-ability of some constrained parameter types (e.g. quacks like a duck)
+            # sigtuple = unwrap_unionall(sig)::DataType
+            # splitunions = 1 < unionsplitcost(sigtuple.parameters) * napplicable <= InferenceParams(interp).max_union_splitting
+            #if splitunions
+            #    splitsigs = switchtupleunion(sig)
+            #    for sig_n in splitsigs
+            #        result = abstract_call_method(interp, method, sig_n, svec(), multiple_matches, si, sv)::Future
+            #        handle1(...)
+            #    end
+            #end
+            mresult = abstract_call_method(interp, method, sig, match.sparams, multiple_matches, si, sv)::Future
+            function handle1(interp, sv)
+                local (; rt, exct, edge, effects, volatile_inf_result) = mresult[]
+                this_conditional = ignorelimited(rt)
+                this_rt = widenwrappedconditional(rt)
+                this_exct = exct
+                # try constant propagation with argtypes for this match
+                # this is in preparation for inlining, or improving the return result
                 this_argtypes = isa(matches, MethodMatches) ? argtypes : matches.applicable_argtypes[i]
                 this_arginfo = ArgInfo(fargs, this_argtypes)
                 const_call_result = abstract_call_method_with_const_args(interp,
-                    result, f, this_arginfo, si, match, sv)
+                    mresult[], f.contents, this_arginfo, si, match, sv)
                 const_result = volatile_inf_result
                 if const_call_result !== nothing
-                    if const_call_result.rt ⊑ₚ rt
-                        rt = const_call_result.rt
+                    this_const_conditional = ignorelimited(const_call_result.rt)
+                    this_const_rt = widenwrappedconditional(const_call_result.rt)
+                    if this_const_rt ⊑ₚ this_rt
+                        # As long as the const-prop result we have is not *worse* than
+                        # what we found out on types, we'd like to use it. Even if the
+                        # end result is exactly equivalent, it is likely that the IR
+                        # we produced while constproping is better than that with
+                        # generic types.
+                        # Return type of const-prop' inference can be wider than that of non const-prop' inference
+                        # e.g. in cases when there are cycles but cached result is still accurate
+                        this_conditional = this_const_conditional
+                        this_rt = this_const_rt
                         (; effects, const_result, edge) = const_call_result
                     elseif is_better_effects(const_call_result.effects, effects)
                         (; effects, const_result, edge) = const_call_result
                     else
                         add_remark!(interp, sv, "[constprop] Discarded because the result was wider than inference")
                     end
-                    if const_call_result.exct ⋤ exct
-                        (; exct, const_result, edge) = const_call_result
+                    # Treat the exception type separately. Currently, constprop often cannot determine the exception type
+                    # because consistent-cy does not apply to exceptions.
+                    if const_call_result.exct ⋤ this_exct
+                        this_exct = const_call_result.exct
+                        (; const_result, edge) = const_call_result
                     else
                         add_remark!(interp, sv, "[constprop] Discarded exception type because result was wider than inference")
                     end
                 end
+
                 all_effects = merge_effects(all_effects, effects)
                 if const_result !== nothing
                     if const_results === nothing
-                        const_results = fill!(Vector{Union{Nothing,ConstResult}}(undef, #=TODO=#napplicable), nothing)
+                        const_results = fill!(Vector{Union{Nothing,ConstResult}}(undef, napplicable), nothing)
                     end
                     const_results[i] = const_result
                 end
                 edge === nothing || push!(edges, edge)
-                this_rt = this_rt ⊔ₚ rt
-                this_exct = this_exct ⊔ₚ exct
-                if bail_out_call(interp, this_rt, sv)
-                    break
+                @assert !(this_conditional isa Conditional || this_rt isa MustAlias) "invalid lattice element returned from inter-procedural context"
+                if can_propagate_conditional(this_conditional, argtypes)
+                    # The only case where we need to keep this in rt is where
+                    # we can directly propagate the conditional to a slot argument
+                    # that is not one of our arguments, otherwise we keep all the
+                    # relevant information in `conditionals` below.
+                    this_rt = this_conditional
                 end
-            end
-            this_conditional = ignorelimited(this_rt)
-            this_rt = widenwrappedconditional(this_rt)
-        else
-            result = abstract_call_method(interp, method, sig, match.sparams, multiple_matches, si, sv)
-            (; rt, exct, edge, effects, volatile_inf_result) = result
-            this_conditional = ignorelimited(rt)
-            this_rt = widenwrappedconditional(rt)
-            this_exct = exct
-            # try constant propagation with argtypes for this match
-            # this is in preparation for inlining, or improving the return result
-            this_argtypes = isa(matches, MethodMatches) ? argtypes : matches.applicable_argtypes[i]
-            this_arginfo = ArgInfo(fargs, this_argtypes)
-            const_call_result = abstract_call_method_with_const_args(interp,
-                result, f, this_arginfo, si, match, sv)
-            const_result = volatile_inf_result
-            if const_call_result !== nothing
-                this_const_conditional = ignorelimited(const_call_result.rt)
-                this_const_rt = widenwrappedconditional(const_call_result.rt)
-                if this_const_rt ⊑ₚ this_rt
-                    # As long as the const-prop result we have is not *worse* than
-                    # what we found out on types, we'd like to use it. Even if the
-                    # end result is exactly equivalent, it is likely that the IR
-                    # we produced while constproping is better than that with
-                    # generic types.
-                    # Return type of const-prop' inference can be wider than that of non const-prop' inference
-                    # e.g. in cases when there are cycles but cached result is still accurate
-                    this_conditional = this_const_conditional
-                    this_rt = this_const_rt
-                    (; effects, const_result, edge) = const_call_result
-                elseif is_better_effects(const_call_result.effects, effects)
-                    (; effects, const_result, edge) = const_call_result
-                else
-                    add_remark!(interp, sv, "[constprop] Discarded because the result was wider than inference")
+
+                rettype = rettype ⊔ₚ this_rt
+                exctype = exctype ⊔ₚ this_exct
+                if has_conditional(𝕃ₚ, sv) && this_conditional !== Bottom && is_lattice_bool(𝕃ₚ, rettype) && fargs !== nothing
+                    if conditionals === nothing
+                        conditionals = Any[Bottom for _ in 1:length(argtypes)],
+                                       Any[Bottom for _ in 1:length(argtypes)]
+                    end
+                    for i = 1:length(argtypes)
+                        cnd = conditional_argtype(𝕃ᵢ, this_conditional, sig, argtypes, i)
+                        conditionals[1][i] = conditionals[1][i] ⊔ᵢ cnd.thentype
+                        conditionals[2][i] = conditionals[2][i] ⊔ᵢ cnd.elsetype
+                    end
                 end
-                # Treat the exception type separately. Currently, constprop often cannot determine the exception type
-                # because consistent-cy does not apply to exceptions.
-                if const_call_result.exct ⋤ this_exct
-                    this_exct = const_call_result.exct
-                    (; const_result, edge) = const_call_result
-                else
-                    add_remark!(interp, sv, "[constprop] Discarded exception type because result was wider than inference")
+                if i < napplicable && bail_out_call(interp, InferenceLoopState(sig, rettype, all_effects), sv)
+                    add_remark!(interp, sv, "Call inference reached maximally imprecise information. Bailing on.")
+                    seenall = false
+                    i = napplicable # break in outer function
                 end
+                i += 1
+                return true
             end
-            all_effects = merge_effects(all_effects, effects)
-            if const_result !== nothing
-                if const_results === nothing
-                    const_results = fill!(Vector{Union{Nothing,ConstResult}}(undef, napplicable), nothing)
-                end
-                const_results[i] = const_result
+            if isready(mresult) && handle1(interp, sv)
+                continue
+            else
+                push!(sv.tasks, handle1)
+                return false
             end
-            edge === nothing || push!(edges, edge)
-        end
-        @assert !(this_conditional isa Conditional || this_rt isa MustAlias) "invalid lattice element returned from inter-procedural context"
-        seen += 1
+        end # while
 
-        if can_propagate_conditional(this_conditional, argtypes)
-            # The only case where we need to keep this in rt is where
-            # we can directly propagate the conditional to a slot argument
-            # that is not one of our arguments, otherwise we keep all the
-            # relevant information in `conditionals` below.
-            this_rt = this_conditional
+        if const_results !== nothing
+            @assert napplicable == nmatches(info) == length(const_results)
+            info = ConstCallInfo(info, const_results)
         end
 
-        rettype = rettype ⊔ₚ this_rt
-        exctype = exctype ⊔ₚ this_exct
-        if has_conditional(𝕃ₚ, sv) && this_conditional !== Bottom && is_lattice_bool(𝕃ₚ, rettype) && fargs !== nothing
-            if conditionals === nothing
-                conditionals = Any[Bottom for _ in 1:length(argtypes)],
-                               Any[Bottom for _ in 1:length(argtypes)]
+        if seenall
+            if !fully_covering(matches) || any_ambig(matches)
+                # Account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
+                all_effects = Effects(all_effects; nothrow=false)
+                exctype = exctype ⊔ₚ MethodError
             end
-            for i = 1:length(argtypes)
-                cnd = conditional_argtype(𝕃ᵢ, this_conditional, sig, argtypes, i)
-                conditionals[1][i] = conditionals[1][i] ⊔ᵢ cnd.thentype
-                conditionals[2][i] = conditionals[2][i] ⊔ᵢ cnd.elsetype
+            if sv isa InferenceState && fargs !== nothing
+                slotrefinements = collect_slot_refinements(𝕃ᵢ, applicable, argtypes, fargs, sv)
             end
-        end
-        if bail_out_call(interp, InferenceLoopState(sig, rettype, all_effects), sv)
-            add_remark!(interp, sv, "Call inference reached maximally imprecise information. Bailing on.")
-            break
-        end
-    end
-
-    if const_results !== nothing
-        @assert napplicable == nmatches(info) == length(const_results)
-        info = ConstCallInfo(info, const_results)
-    end
-
-    if seen ≠ napplicable
-        # there is unanalyzed candidate, widen type and effects to the top
-        rettype = exctype = Any
-        all_effects = Effects()
-    else
-        if !fully_covering(matches) || any_ambig(matches)
-            # Account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
-            all_effects = Effects(all_effects; nothrow=false)
-            exctype = exctype ⊔ₚ MethodError
-        end
-        if sv isa InferenceState && fargs !== nothing
-            slotrefinements = collect_slot_refinements(𝕃ᵢ, applicable, argtypes, fargs, sv)
-        end
-    end
-
-    rettype = from_interprocedural!(interp, rettype, sv, arginfo, conditionals)
-
-    # Also considering inferring the compilation signature for this method, so
-    # it is available to the compiler in case it ends up needing it.
-    if (isa(sv, InferenceState) && infer_compilation_signature(interp) &&
-        (1 == seen == napplicable) && rettype !== Any && rettype !== Bottom &&
-        !is_removable_if_unused(all_effects))
-        match = applicable[1]::MethodMatch
-        method = match.method
-        sig = match.spec_types
-        mi = specialize_method(match; preexisting=true)
-        if mi !== nothing && !const_prop_methodinstance_heuristic(interp, mi, arginfo, sv)
-            csig = get_compileable_sig(method, sig, match.sparams)
-            if csig !== nothing && csig !== sig
-                abstract_call_method(interp, method, csig, match.sparams, multiple_matches, StmtInfo(false), sv)
+        else
+            # there is unanalyzed candidate, widen type and effects to the top
+            rettype = exctype = Any
+            all_effects = Effects()
+        end
+
+        rettype = from_interprocedural!(interp, rettype, sv, arginfo, conditionals)
+
+        # Also considering inferring the compilation signature for this method, so
+        # it is available to the compiler in case it ends up needing it.
+        if (isa(sv, InferenceState) && infer_compilation_signature(interp) &&
+            (seenall && 1 == napplicable) && rettype !== Any && rettype !== Bottom &&
+            !is_removable_if_unused(all_effects))
+            match = applicable[1]::MethodMatch
+            method = match.method
+            sig = match.spec_types
+            mi = specialize_method(match; preexisting=true)
+            if mi !== nothing && !const_prop_methodinstance_heuristic(interp, mi, arginfo, sv)
+                csig = get_compileable_sig(method, sig, match.sparams)
+                if csig !== nothing && csig !== sig
+                    abstract_call_method(interp, method, csig, match.sparams, multiple_matches, StmtInfo(false), sv)::Future
+                end
             end
         end
-    end
 
-    if call_result_unused(si) && !(rettype === Bottom)
-        add_remark!(interp, sv, "Call result type was widened because the return value is unused")
-        # We're mainly only here because the optimizer might want this code,
-        # but we ourselves locally don't typically care about it locally
-        # (beyond checking if it always throws).
-        # So avoid adding an edge, since we don't want to bother attempting
-        # to improve our result even if it does change (to always throw),
-        # and avoid keeping track of a more complex result type.
-        rettype = Any
-    end
-    any_slot_refined = slotrefinements !== nothing
-    add_call_backedges!(interp, rettype, all_effects, any_slot_refined, edges, matches, atype, sv)
-    if isa(sv, InferenceState)
-        # TODO (#48913) implement a proper recursion handling for irinterp:
-        # This works just because currently the `:terminate` condition guarantees that
-        # irinterp doesn't fail into unresolved cycles, but it's not a good solution.
-        # We should revisit this once we have a better story for handling cycles in irinterp.
-        if !isempty(sv.pclimitations) # remove self, if present
-            delete!(sv.pclimitations, sv)
-            for caller in callers_in_cycle(sv)
-                delete!(sv.pclimitations, caller)
+        if call_result_unused(si) && !(rettype === Bottom)
+            add_remark!(interp, sv, "Call result type was widened because the return value is unused")
+            # We're mainly only here because the optimizer might want this code,
+            # but we ourselves locally don't typically care about it locally
+            # (beyond checking if it always throws).
+            # So avoid adding an edge, since we don't want to bother attempting
+            # to improve our result even if it does change (to always throw),
+            # and avoid keeping track of a more complex result type.
+            rettype = Any
+        end
+        any_slot_refined = slotrefinements !== nothing
+        add_call_backedges!(interp, rettype, all_effects, any_slot_refined, edges, matches, atype.contents, sv)
+        if isa(sv, InferenceState)
+            # TODO (#48913) implement a proper recursion handling for irinterp:
+            # This works just because currently the `:terminate` condition guarantees that
+            # irinterp doesn't fail into unresolved cycles, but it's not a good solution.
+            # We should revisit this once we have a better story for handling cycles in irinterp.
+            if !isempty(sv.pclimitations) # remove self, if present
+                delete!(sv.pclimitations, sv)
+                for caller in callers_in_cycle(sv)
+                    delete!(sv.pclimitations, caller)
+                end
             end
         end
-    end
 
-    return CallMeta(rettype, exctype, all_effects, info, slotrefinements)
+        gfresult[] = CallMeta(rettype, exctype, all_effects, info, slotrefinements)
+        return true
+    end # infercalls
+    # start making progress on the first call
+    infercalls(interp, sv) || push!(sv.tasks, infercalls)
+    return gfresult
 end
 
 struct FailedMethodMatch
@@ -607,9 +594,9 @@ function abstract_call_method(interp::AbstractInterpreter,
                               hardlimit::Bool, si::StmtInfo, sv::AbsIntState)
     sigtuple = unwrap_unionall(sig)
     sigtuple isa DataType ||
-        return MethodCallResult(Any, Any, false, false, nothing, Effects())
+        return Future(MethodCallResult(Any, Any, false, false, nothing, Effects()))
     all(@nospecialize(x) -> valid_as_lattice(unwrapva(x), true), sigtuple.parameters) ||
-        return MethodCallResult(Union{}, Any, false, false, nothing, EFFECTS_THROWS) # catch bad type intersections early
+        return Future(MethodCallResult(Union{}, Any, false, false, nothing, EFFECTS_THROWS)) # catch bad type intersections early
 
     if is_nospecializeinfer(method)
         sig = get_nospecializeinfer_sig(method, sig, sparams)
@@ -634,7 +621,7 @@ function abstract_call_method(interp::AbstractInterpreter,
                     # we have a self-cycle in the call-graph, but not in the inference graph (typically):
                     # break this edge now (before we record it) by returning early
                     # (non-typically, this means that we lose the ability to detect a guaranteed StackOverflow in some cases)
-                    return MethodCallResult(Any, Any, true, true, nothing, Effects())
+                    return Future(MethodCallResult(Any, Any, true, true, nothing, Effects()))
                 end
                 topmost = nothing
                 edgecycle = true
@@ -689,7 +676,7 @@ function abstract_call_method(interp::AbstractInterpreter,
                 # since it's very unlikely that we'll try to inline this,
                 # or want make an invoke edge to its calling convention return type.
                 # (non-typically, this means that we lose the ability to detect a guaranteed StackOverflow in some cases)
-                return MethodCallResult(Any, Any, true, true, nothing, Effects())
+                return Future(MethodCallResult(Any, Any, true, true, nothing, Effects()))
             end
             add_remark!(interp, sv, washardlimit ? RECURSION_MSG_HARDLIMIT : RECURSION_MSG)
             # TODO (#48913) implement a proper recursion handling for irinterp:
@@ -745,31 +732,7 @@ function abstract_call_method(interp::AbstractInterpreter,
         sparams = recomputed[2]::SimpleVector
     end
 
-    (; rt, exct, edge, effects, volatile_inf_result) = typeinf_edge(interp, method, sig, sparams, sv)
-
-    if edge === nothing
-        edgecycle = edgelimited = true
-    end
-
-    # we look for the termination effect override here as well, since the :terminates effect
-    # may have been tainted due to recursion at this point even if it's overridden
-    if is_effect_overridden(sv, :terminates_globally)
-        # this frame is known to terminate
-        effects = Effects(effects, terminates=true)
-    elseif is_effect_overridden(method, :terminates_globally)
-        # this edge is known to terminate
-        effects = Effects(effects; terminates=true)
-    elseif edgecycle
-        # Some sort of recursion was detected.
-        if edge !== nothing && !edgelimited && !is_edge_recursed(edge, sv)
-            # no `MethodInstance` cycles -- don't taint :terminate
-        else
-            # we cannot guarantee that the call will terminate
-            effects = Effects(effects; terminates=false)
-        end
-    end
-
-    return MethodCallResult(rt, exct, edgecycle, edgelimited, edge, effects, volatile_inf_result)
+    return typeinf_edge(interp, method, sig, sparams, sv, edgecycle, edgelimited)
 end
 
 function edge_matches_sv(interp::AbstractInterpreter, frame::AbsIntState,
@@ -1331,7 +1294,7 @@ const_prop_result(inf_result::InferenceResult) =
                      inf_result.ipo_effects, inf_result.linfo)
 
 # return cached result of constant analysis
-return_cached_result(::AbstractInterpreter, inf_result::InferenceResult, ::AbsIntState) =
+return_localcache_result(::AbstractInterpreter, inf_result::InferenceResult, ::AbsIntState) =
     const_prop_result(inf_result)
 
 function compute_forwarded_argtypes(interp::AbstractInterpreter, arginfo::ArgInfo, sv::AbsIntState)
@@ -1361,7 +1324,7 @@ function const_prop_call(interp::AbstractInterpreter,
             return nothing
         end
         @assert inf_result.linfo === mi "MethodInstance for cached inference result does not match"
-        return return_cached_result(interp, inf_result, sv)
+        return return_localcache_result(interp, inf_result, sv)
     end
     overridden_by_const = falses(length(argtypes))
     for i = 1:length(argtypes)
@@ -1375,7 +1338,7 @@ function const_prop_call(interp::AbstractInterpreter,
     end
     # perform fresh constant prop'
     inf_result = InferenceResult(mi, argtypes, overridden_by_const)
-    frame = InferenceState(inf_result, #=cache_mode=#:local, interp)
+    frame = InferenceState(inf_result, #=cache_mode=#:local, interp) # TODO: this should also be converted to a stackless Future
     if frame === nothing
         add_remark!(interp, sv, "[constprop] Could not retrieve the source")
         return nothing # this is probably a bad generated function (unsound), but just ignore it
@@ -1517,9 +1480,9 @@ function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft)
         widet = typ.typ
         if isa(widet, DataType)
             if widet.name === Tuple.name
-                return AbstractIterationResult(typ.fields, nothing)
+                return Future(AbstractIterationResult(typ.fields, nothing))
             elseif widet.name === _NAMEDTUPLE_NAME
-                return AbstractIterationResult(typ.fields, nothing)
+                return Future(AbstractIterationResult(typ.fields, nothing))
             end
         end
     end
@@ -1527,7 +1490,7 @@ function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft)
     if isa(typ, Const)
         val = typ.val
         if isa(val, SimpleVector) || isa(val, Tuple) || isa(val, NamedTuple)
-            return AbstractIterationResult(Any[ Const(val[i]) for i in 1:length(val) ], nothing) # avoid making a tuple Generator here!
+            return Future(AbstractIterationResult(Any[ Const(val[i]) for i in 1:length(val) ], nothing)) # avoid making a tuple Generator here!
         end
     end
 
@@ -1544,18 +1507,18 @@ function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft)
         # refine the Union to remove elements that are not valid tags for objects
         filter!(@nospecialize(x) -> valid_as_lattice(x, true), utis)
         if length(utis) == 0
-            return AbstractIterationResult(Any[], nothing) # oops, this statement was actually unreachable
+            return Future(AbstractIterationResult(Any[], nothing)) # oops, this statement was actually unreachable
         elseif length(utis) == 1
             tti = utis[1]
             tti0 = rewrap_unionall(tti, tti0)
         else
             if any(@nospecialize(t) -> !isa(t, DataType) || !(t <: Tuple) || !isknownlength(t), utis)
-                return AbstractIterationResult(Any[Vararg{Any}], nothing, Effects())
+                return Future(AbstractIterationResult(Any[Vararg{Any}], nothing, Effects()))
             end
             ltp = length((utis[1]::DataType).parameters)
             for t in utis
                 if length((t::DataType).parameters) != ltp
-                    return AbstractIterationResult(Any[Vararg{Any}], nothing)
+                    return Future(AbstractIterationResult(Any[Vararg{Any}], nothing))
                 end
             end
             result = Any[ Union{} for _ in 1:ltp ]
@@ -1566,14 +1529,14 @@ function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft)
                     result[j] = tmerge(result[j], rewrap_unionall(tps[j], tti0))
                 end
             end
-            return AbstractIterationResult(result, nothing)
+            return Future(AbstractIterationResult(result, nothing))
         end
     end
     if tti0 <: Tuple
         if isa(tti0, DataType)
-            return AbstractIterationResult(Any[ p for p in tti0.parameters ], nothing)
+            return Future(AbstractIterationResult(Any[ p for p in tti0.parameters ], nothing))
         elseif !isa(tti, DataType)
-            return AbstractIterationResult(Any[Vararg{Any}], nothing)
+            return Future(AbstractIterationResult(Any[Vararg{Any}], nothing))
         else
             len = length(tti.parameters)
             last = tti.parameters[len]
@@ -1586,17 +1549,17 @@ function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft)
                     elts[len] = Vararg{elts[len]}
                 end
             end
-            return AbstractIterationResult(elts, nothing)
+            return Future(AbstractIterationResult(elts, nothing))
         end
     elseif tti0 === SimpleVector
-        return AbstractIterationResult(Any[Vararg{Any}], nothing)
+        return Future(AbstractIterationResult(Any[Vararg{Any}], nothing))
     elseif tti0 === Any
-        return AbstractIterationResult(Any[Vararg{Any}], nothing, Effects())
+        return Future(AbstractIterationResult(Any[Vararg{Any}], nothing, Effects()))
     elseif tti0 <: Array || tti0 <: GenericMemory
         if eltype(tti0) === Union{}
-            return AbstractIterationResult(Any[], nothing)
+            return Future(AbstractIterationResult(Any[], nothing))
         end
-        return AbstractIterationResult(Any[Vararg{eltype(tti0)}], nothing)
+        return Future(AbstractIterationResult(Any[Vararg{eltype(tti0)}], nothing))
     else
         return abstract_iteration(interp, itft, typ, sv)
     end
@@ -1607,95 +1570,144 @@ function abstract_iteration(interp::AbstractInterpreter, @nospecialize(itft), @n
     if isa(itft, Const)
         iteratef = itft.val
     else
-        return AbstractIterationResult(Any[Vararg{Any}], nothing, Effects())
+        return Future(AbstractIterationResult(Any[Vararg{Any}], nothing, Effects()))
     end
     @assert !isvarargtype(itertype)
-    call = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[itft, itertype]), StmtInfo(true), sv)
-    stateordonet = call.rt
-    info = call.info
-    # Return Bottom if this is not an iterator.
-    # WARNING: Changes to the iteration protocol must be reflected here,
-    # this is not just an optimization.
-    # TODO: this doesn't realize that Array, GenericMemory, SimpleVector, Tuple, and NamedTuple do not use the iterate protocol
-    stateordonet === Bottom && return AbstractIterationResult(Any[Bottom], AbstractIterationInfo(CallMeta[CallMeta(Bottom, Any, call.effects, info)], true))
-    valtype = statetype = Bottom
-    ret = Any[]
-    calls = CallMeta[call]
-    stateordonet_widened = widenconst(stateordonet)
-    𝕃ᵢ = typeinf_lattice(interp)
 
-    # Try to unroll the iteration up to max_tuple_splat, which covers any finite
-    # length iterators, or interesting prefix
-    while true
-        if stateordonet_widened === Nothing
-            return AbstractIterationResult(ret, AbstractIterationInfo(calls, true))
-        end
-        if Nothing <: stateordonet_widened || length(ret) >= InferenceParams(interp).max_tuple_splat
-            break
-        end
-        if !isa(stateordonet_widened, DataType) || !(stateordonet_widened <: Tuple) || isvatuple(stateordonet_widened) || length(stateordonet_widened.parameters) != 2
-            break
-        end
-        nstatetype = getfield_tfunc(𝕃ᵢ, stateordonet, Const(2))
-        # If there's no new information in this statetype, don't bother continuing,
-        # the iterator won't be finite.
-        if ⊑(𝕃ᵢ, nstatetype, statetype)
-            return AbstractIterationResult(Any[Bottom], AbstractIterationInfo(calls, false), EFFECTS_THROWS)
-        end
-        valtype = getfield_tfunc(𝕃ᵢ, stateordonet, Const(1))
-        push!(ret, valtype)
-        statetype = nstatetype
-        call = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), StmtInfo(true), sv)
-        stateordonet = call.rt
+    iterateresult = Future{AbstractIterationResult}()
+    call1future = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[itft, itertype]), StmtInfo(true), sv)::Future
+    function inferiterate(interp, sv)
+        call1 = call1future[]
+        stateordonet = call1.rt
+        # Return Bottom if this is not an iterator.
+        # WARNING: Changes to the iteration protocol must be reflected here,
+        # this is not just an optimization.
+        # TODO: this doesn't realize that Array, GenericMemory, SimpleVector, Tuple, and NamedTuple do not use the iterate protocol
+        if stateordonet === Bottom
+            iterateresult[] = AbstractIterationResult(Any[Bottom], AbstractIterationInfo(CallMeta[CallMeta(Bottom, Any, call1.effects, call1.info)], true))
+            return true
+        end
         stateordonet_widened = widenconst(stateordonet)
-        push!(calls, call)
-    end
-    # From here on, we start asking for results on the widened types, rather than
-    # the precise (potentially const) state type
-    # statetype and valtype are reinitialized in the first iteration below from the
-    # (widened) stateordonet, which has not yet been fully analyzed in the loop above
-    valtype = statetype = Bottom
-    may_have_terminated = Nothing <: stateordonet_widened
-    while valtype !== Any
-        nounion = typeintersect(stateordonet_widened, Tuple{Any,Any})
-        if nounion !== Union{} && !isa(nounion, DataType)
-            # nounion is of a type we cannot handle
-            valtype = Any
-            break
-        end
-        if nounion === Union{} || (nounion.parameters[1] <: valtype && nounion.parameters[2] <: statetype)
-            # reached a fixpoint or iterator failed/gave invalid answer
-            if !hasintersect(stateordonet_widened, Nothing)
-                # ... but cannot terminate
-                if !may_have_terminated
-                    #  ... and cannot have terminated prior to this loop
-                    return AbstractIterationResult(Any[Bottom], AbstractIterationInfo(calls, false), Effects())
-                else
-                    # iterator may have terminated prior to this loop, but not during it
-                    valtype = Bottom
+        calls = CallMeta[call1]
+        valtype = statetype = Bottom
+        ret = Any[]
+        𝕃ᵢ = typeinf_lattice(interp)
+        may_have_terminated = false
+        local call2future::Future{CallMeta}
+
+        nextstate::UInt8 = 0x0
+        function inferiterate_2arg(interp, sv)
+            if nextstate === 0x1
+                nextstate = 0xff
+                @goto state1
+            elseif nextstate === 0x2
+                nextstate = 0xff
+                @goto state2
+            else
+                @assert nextstate === 0x0
+                nextstate = 0xff
+            end
+
+            # Try to unroll the iteration up to max_tuple_splat, which covers any finite
+            # length iterators, or interesting prefix
+            while true
+                if stateordonet_widened === Nothing
+                    iterateresult[] = AbstractIterationResult(ret, AbstractIterationInfo(calls, true))
+                    return true
+                end
+                if Nothing <: stateordonet_widened || length(ret) >= InferenceParams(interp).max_tuple_splat
+                    break
+                end
+                if !isa(stateordonet_widened, DataType) || !(stateordonet_widened <: Tuple) || isvatuple(stateordonet_widened) || length(stateordonet_widened.parameters) != 2
+                    break
+                end
+                nstatetype = getfield_tfunc(𝕃ᵢ, stateordonet, Const(2))
+                # If there's no new information in this statetype, don't bother continuing,
+                # the iterator won't be finite.
+                if ⊑(𝕃ᵢ, nstatetype, statetype)
+                    iterateresult[] = AbstractIterationResult(Any[Bottom], AbstractIterationInfo(calls, false), EFFECTS_THROWS)
+                    return true
+                end
+                valtype = getfield_tfunc(𝕃ᵢ, stateordonet, Const(1))
+                push!(ret, valtype)
+                statetype = nstatetype
+                call2future = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), StmtInfo(true), sv)::Future
+                if !isready(call2future)
+                    nextstate = 0x1
+                    return false
+                    @label state1
+                end
+                let call = call2future[]
+                    push!(calls, call)
+                    stateordonet = call.rt
+                    stateordonet_widened = widenconst(stateordonet)
                 end
             end
-            break
-        end
-        valtype = tmerge(valtype, nounion.parameters[1])
-        statetype = tmerge(statetype, nounion.parameters[2])
-        call = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), StmtInfo(true), sv)
-        push!(calls, call)
-        stateordonet = call.rt
-        stateordonet_widened = widenconst(stateordonet)
-    end
-    if valtype !== Union{}
-        push!(ret, Vararg{valtype})
+            # From here on, we start asking for results on the widened types, rather than
+            # the precise (potentially const) state type
+            # statetype and valtype are reinitialized in the first iteration below from the
+            # (widened) stateordonet, which has not yet been fully analyzed in the loop above
+            valtype = statetype = Bottom
+            may_have_terminated = Nothing <: stateordonet_widened
+            while valtype !== Any
+                nounion = typeintersect(stateordonet_widened, Tuple{Any,Any})
+                if nounion !== Union{} && !isa(nounion, DataType)
+                    # nounion is of a type we cannot handle
+                    valtype = Any
+                    break
+                end
+                if nounion === Union{} || (nounion.parameters[1] <: valtype && nounion.parameters[2] <: statetype)
+                    # reached a fixpoint or iterator failed/gave invalid answer
+                    if !hasintersect(stateordonet_widened, Nothing)
+                        # ... but cannot terminate
+                        if may_have_terminated
+                            # ... and iterator may have terminated prior to this loop, but not during it
+                            valtype = Bottom
+                        else
+                            #  ... or cannot have terminated prior to this loop
+                            iterateresult[] = AbstractIterationResult(Any[Bottom], AbstractIterationInfo(calls, false), Effects())
+                            return true
+                        end
+                    end
+                    break
+                end
+                valtype = tmerge(valtype, nounion.parameters[1])
+                statetype = tmerge(statetype, nounion.parameters[2])
+                call2future = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), StmtInfo(true), sv)::Future
+                if !isready(call2future)
+                    nextstate = 0x2
+                    return false
+                    @label state2
+                end
+                let call = call2future[]
+                    push!(calls, call)
+                    stateordonet = call.rt
+                    stateordonet_widened = widenconst(stateordonet)
+                end
+            end
+            if valtype !== Union{}
+                push!(ret, Vararg{valtype})
+            end
+            iterateresult[] = AbstractIterationResult(ret, AbstractIterationInfo(calls, false))
+            return true
+        end # inferiterate_2arg
+        # continue making progress as much as possible, on iterate(arg, state)
+        inferiterate_2arg(interp, sv) || push!(sv.tasks, inferiterate_2arg)
+        return true
+    end # inferiterate
+    # continue making progress as soon as possible, on iterate(arg)
+    if !(isready(call1future) && inferiterate(interp, sv))
+        push!(sv.tasks, inferiterate)
     end
-    return AbstractIterationResult(ret, AbstractIterationInfo(calls, false))
+    return iterateresult
 end
 
 # do apply(af, fargs...), where af is a function value
 function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, si::StmtInfo,
                         sv::AbsIntState, max_methods::Int=get_max_methods(interp, sv))
-    itft = argtype_by_index(argtypes, 2)
+    itft = Core.Box(argtype_by_index(argtypes, 2))
     aft = argtype_by_index(argtypes, 3)
-    (itft === Bottom || aft === Bottom) && return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
+    (itft.contents === Bottom || aft === Bottom) && return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
     aargtypes = argtype_tail(argtypes, 4)
     aftw = widenconst(aft)
     if !isa(aft, Const) && !isa(aft, PartialOpaque) && (!isType(aftw) || has_free_typevars(aftw))
@@ -1703,100 +1715,155 @@ function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, si::
             add_remark!(interp, sv, "Core._apply_iterate called on a function of a non-concrete type")
             # bail now, since it seems unlikely that abstract_call will be able to do any better after splitting
             # this also ensures we don't call abstract_call_gf_by_type below on an IntrinsicFunction or Builtin
-            return CallMeta(Any, Any, Effects(), NoCallInfo())
+            return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
         end
     end
     res = Union{}
-    nargs = length(aargtypes)
     splitunions = 1 < unionsplitcost(typeinf_lattice(interp), aargtypes) <= InferenceParams(interp).max_apply_union_enum
-    ctypes = [Any[aft]]
-    infos = Vector{MaybeAbstractIterationInfo}[MaybeAbstractIterationInfo[]]
-    effects = EFFECTS_TOTAL
-    for i = 1:nargs
-        ctypes´ = Vector{Any}[]
-        infos′ = Vector{MaybeAbstractIterationInfo}[]
-        for ti in (splitunions ? uniontypes(aargtypes[i]) : Any[aargtypes[i]])
-            if !isvarargtype(ti)
-                (;cti, info, ai_effects) = precise_container_type(interp, itft, ti, sv)
-            else
-                (;cti, info, ai_effects) = precise_container_type(interp, itft, unwrapva(ti), sv)
-                # We can't represent a repeating sequence of the same types,
-                # so tmerge everything together to get one type that represents
-                # everything.
-                argt = cti[end]
-                if isvarargtype(argt)
-                    argt = unwrapva(argt)
+    ctypes::Vector{Vector{Any}} = [Any[aft]]
+    infos::Vector{Vector{MaybeAbstractIterationInfo}} = Vector{MaybeAbstractIterationInfo}[MaybeAbstractIterationInfo[]]
+    all_effects::Effects = EFFECTS_TOTAL
+    retinfos = ApplyCallInfo[]
+    retinfo = UnionSplitApplyCallInfo(retinfos)
+    exctype = Union{}
+    ctypes´ = Vector{Any}[]
+    infos´ = Vector{MaybeAbstractIterationInfo}[]
+    local ti, argtypesi
+    local ctfuture::Future{AbstractIterationResult}
+    local callfuture::Future{CallMeta}
+
+    applyresult = Future{CallMeta}()
+    # split the rest into a resumable state machine
+    i::Int = 1
+    j::Int = 1
+    nextstate::UInt8 = 0x0
+    function infercalls(interp, sv)
+        # n.b. Remember that variables will lose their values across restarts,
+        # so be sure to manually hoist any values that must be preserved and do
+        # not rely on program order.
+        # This is a little more complex than the closure continuations often used elsewhere, but avoids needing to manage all of that indentation
+        if nextstate === 0x1
+            nextstate = 0xff
+            @goto state1
+        elseif nextstate === 0x2
+            nextstate = 0xff
+            @goto state2
+        elseif nextstate === 0x3
+            nextstate = 0xff
+            @goto state3
+        else
+            @assert nextstate === 0x0
+            nextstate = 0xff
+        end
+        while i <= length(aargtypes)
+            argtypesi = (splitunions ? uniontypes(aargtypes[i]) : Any[aargtypes[i]])
+            i += 1
+            j = 1
+            while j <= length(argtypesi)
+                ti = argtypesi[j]
+                j += 1
+                if !isvarargtype(ti)
+                    ctfuture = precise_container_type(interp, itft.contents, ti, sv)::Future
+                    if !isready(ctfuture)
+                        nextstate = 0x1
+                        return false
+                        @label state1
+                    end
+                    (;cti, info, ai_effects) = ctfuture[]
+                else
+                    ctfuture = precise_container_type(interp, itft.contents, unwrapva(ti), sv)::Future
+                    if !isready(ctfuture)
+                        nextstate = 0x2
+                        return false
+                        @label state2
+                    end
+                    (;cti, info, ai_effects) = ctfuture[]
+                    # We can't represent a repeating sequence of the same types,
+                    # so tmerge everything together to get one type that represents
+                    # everything.
+                    argt = cti[end]
+                    if isvarargtype(argt)
+                        argt = unwrapva(argt)
+                    end
+                    for k in 1:(length(cti)-1)
+                        argt = tmerge(argt, cti[k])
+                    end
+                    cti = Any[Vararg{argt}]
                 end
-                for i in 1:(length(cti)-1)
-                    argt = tmerge(argt, cti[i])
+                all_effects = merge_effects(all_effects, ai_effects)
+                if info !== nothing
+                    for call in info.each
+                        all_effects = merge_effects(all_effects, call.effects)
+                    end
+                end
+                if any(@nospecialize(t) -> t === Bottom, cti)
+                    continue
+                end
+                for k = 1:length(ctypes)
+                    ct = ctypes[k]
+                    if isvarargtype(ct[end])
+                        # This is vararg, we're not gonna be able to do any inlining,
+                        # drop the info
+                        info = nothing
+                        tail = tuple_tail_elem(typeinf_lattice(interp), unwrapva(ct[end]), cti)
+                        push!(ctypes´, push!(ct[1:(end - 1)], tail))
+                    else
+                        push!(ctypes´, append!(ct[:], cti))
+                    end
+                    push!(infos´, push!(copy(infos[k]), info))
                 end
-                cti = Any[Vararg{argt}]
             end
-            effects = merge_effects(effects, ai_effects)
-            if info !== nothing
-                for call in info.each
-                    effects = merge_effects(effects, call.effects)
+            # swap for the new array and empty the temporary one
+            ctypes´, ctypes = ctypes, ctypes´
+            infos´, infos = infos, infos´
+            empty!(ctypes´)
+            empty!(infos´)
+        end
+        all_effects.nothrow || (exctype = Any)
+
+        i = 1
+        while i <= length(ctypes)
+            ct = ctypes[i]
+            lct = length(ct)
+            # truncate argument list at the first Vararg
+            for k = 1:lct-1
+                cti = ct[k]
+                if isvarargtype(cti)
+                    ct[k] = tuple_tail_elem(typeinf_lattice(interp), unwrapva(cti), ct[(k+1):lct])
+                    resize!(ct, k)
+                    break
                 end
             end
-            if any(@nospecialize(t) -> t === Bottom, cti)
-                continue
+            callfuture = abstract_call(interp, ArgInfo(nothing, ct), si, sv, max_methods)::Future
+            if !isready(callfuture)
+                nextstate = 0x3
+                return false
+                @label state3
             end
-            for j = 1:length(ctypes)
-                ct = ctypes[j]::Vector{Any}
-                if isvarargtype(ct[end])
-                    # This is vararg, we're not gonna be able to do any inlining,
-                    # drop the info
-                    info = nothing
-                    tail = tuple_tail_elem(typeinf_lattice(interp), unwrapva(ct[end]), cti)
-                    push!(ctypes´, push!(ct[1:(end - 1)], tail))
-                else
-                    push!(ctypes´, append!(ct[:], cti))
+            let (; info, rt, exct, effects) = callfuture[]
+                push!(retinfos, ApplyCallInfo(info, infos[i]))
+                res = tmerge(typeinf_lattice(interp), res, rt)
+                exctype = tmerge(typeinf_lattice(interp), exctype, exct)
+                all_effects = merge_effects(all_effects, effects)
+                if i < length(ctypes) && bail_out_apply(interp, InferenceLoopState(ctypes[i], res, all_effects), sv)
+                    add_remark!(interp, sv, "_apply_iterate inference reached maximally imprecise information. Bailing on.")
+                    # there is unanalyzed candidate, widen type and effects to the top
+                    let retinfo = NoCallInfo() # NOTE this is necessary to prevent the inlining processing
+                        applyresult[] = CallMeta(Any, Any, Effects(), retinfo)
+                        return true
+                    end
                 end
-                push!(infos′, push!(copy(infos[j]), info))
             end
+            i += 1
         end
-        ctypes = ctypes´
-        infos = infos′
-    end
-    retinfos = ApplyCallInfo[]
-    retinfo = UnionSplitApplyCallInfo(retinfos)
-    napplicable = length(ctypes)
-    seen = 0
-    exct = effects.nothrow ? Union{} : Any
-    for i = 1:napplicable
-        ct = ctypes[i]
-        arginfo = infos[i]
-        lct = length(ct)
-        # truncate argument list at the first Vararg
-        for i = 1:lct-1
-            cti = ct[i]
-            if isvarargtype(cti)
-                ct[i] = tuple_tail_elem(typeinf_lattice(interp), unwrapva(cti), ct[(i+1):lct])
-                resize!(ct, i)
-                break
-            end
-        end
-        call = abstract_call(interp, ArgInfo(nothing, ct), si, sv, max_methods)
-        seen += 1
-        push!(retinfos, ApplyCallInfo(call.info, arginfo))
-        res = tmerge(typeinf_lattice(interp), res, call.rt)
-        exct = tmerge(typeinf_lattice(interp), exct, call.exct)
-        effects = merge_effects(effects, call.effects)
-        if bail_out_apply(interp, InferenceLoopState(ct, res, effects), sv)
-            add_remark!(interp, sv, "_apply_iterate inference reached maximally imprecise information. Bailing on.")
-            break
-        end
-    end
-    if seen ≠ napplicable
-        # there is unanalyzed candidate, widen type and effects to the top
-        res = Any
-        exct = Any
-        effects = Effects()
-        retinfo = NoCallInfo() # NOTE this is necessary to prevent the inlining processing
+        # TODO: Add a special info type to capture all the iteration info.
+        # For now, only propagate info if we don't also union-split the iteration
+        applyresult[] = CallMeta(res, exctype, all_effects, retinfo)
+        return true
     end
-    # TODO: Add a special info type to capture all the iteration info.
-    # For now, only propagate info if we don't also union-split the iteration
-    return CallMeta(res, exct, effects, retinfo)
+    # start making progress on the first call
+    infercalls(interp, sv) || push!(sv.tasks, infercalls)
+    return applyresult
 end
 
 function argtype_by_index(argtypes::Vector{Any}, i::Int)
@@ -2135,66 +2202,69 @@ function abstract_invoke(interp::AbstractInterpreter, arginfo::ArgInfo, si::Stmt
     argtypes = arginfo.argtypes
     ft′ = argtype_by_index(argtypes, 2)
     ft = widenconst(ft′)
-    ft === Bottom && return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
+    ft === Bottom && return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
     (types, isexact, isconcrete, istype) = instanceof_tfunc(argtype_by_index(argtypes, 3), false)
-    isexact || return CallMeta(Any, Any, Effects(), NoCallInfo())
+    isexact || return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
     unwrapped = unwrap_unionall(types)
-    types === Bottom && return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
+    types === Bottom && return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
     if !(unwrapped isa DataType && unwrapped.name === Tuple.name)
-        return CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo())
+        return Future(CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo()))
     end
     argtype = argtypes_to_type(argtype_tail(argtypes, 4))
     nargtype = typeintersect(types, argtype)
-    nargtype === Bottom && return CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo())
-    nargtype isa DataType || return CallMeta(Any, Any, Effects(), NoCallInfo()) # other cases are not implemented below
-    isdispatchelem(ft) || return CallMeta(Any, Any, Effects(), NoCallInfo()) # check that we might not have a subtype of `ft` at runtime, before doing supertype lookup below
+    nargtype === Bottom && return Future(CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo()))
+    nargtype isa DataType || return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) # other cases are not implemented below
+    isdispatchelem(ft) || return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) # check that we might not have a subtype of `ft` at runtime, before doing supertype lookup below
     ft = ft::DataType
     lookupsig = rewrap_unionall(Tuple{ft, unwrapped.parameters...}, types)::Type
     nargtype = Tuple{ft, nargtype.parameters...}
     argtype = Tuple{ft, argtype.parameters...}
     match, valid_worlds = findsup(lookupsig, method_table(interp))
-    match === nothing && return CallMeta(Any, Any, Effects(), NoCallInfo())
+    match === nothing && return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
     update_valid_age!(sv, valid_worlds)
     method = match.method
     tienv = ccall(:jl_type_intersection_with_env, Any, (Any, Any), nargtype, method.sig)::SimpleVector
-    ti = tienv[1]; env = tienv[2]::SimpleVector
-    result = abstract_call_method(interp, method, ti, env, false, si, sv)
-    (; rt, exct, edge, effects, volatile_inf_result) = result
+    ti = tienv[1]
+    env = tienv[2]::SimpleVector
+    mresult = abstract_call_method(interp, method, ti, env, false, si, sv)::Future
     match = MethodMatch(ti, env, method, argtype <: method.sig)
-    res = nothing
-    sig = match.spec_types
-    argtypes′ = invoke_rewrite(argtypes)
-    fargs = arginfo.fargs
-    fargs′ = fargs === nothing ? nothing : invoke_rewrite(fargs)
-    arginfo = ArgInfo(fargs′, argtypes′)
-    # # typeintersect might have narrowed signature, but the accuracy gain doesn't seem worth the cost involved with the lattice comparisons
-    # for i in 1:length(argtypes′)
-    #     t, a = ti.parameters[i], argtypes′[i]
-    #     argtypes′[i] = t ⊑ a ? t : a
-    # end
-    𝕃ₚ = ipo_lattice(interp)
-    ⊑, ⋤, ⊔ = partialorder(𝕃ₚ), strictneqpartialorder(𝕃ₚ), join(𝕃ₚ)
-    f = singleton_type(ft′)
-    invokecall = InvokeCall(types, lookupsig)
-    const_call_result = abstract_call_method_with_const_args(interp,
-        result, f, arginfo, si, match, sv, invokecall)
-    const_result = volatile_inf_result
-    if const_call_result !== nothing
-        if const_call_result.rt ⊑ rt
-            (; rt, effects, const_result, edge) = const_call_result
+    return Future{CallMeta}(mresult, interp, sv) do result, interp, sv
+        (; rt, exct, edge, effects, volatile_inf_result) = result
+        res = nothing
+        sig = match.spec_types
+        argtypes′ = invoke_rewrite(argtypes)
+        fargs = arginfo.fargs
+        fargs′ = fargs === nothing ? nothing : invoke_rewrite(fargs)
+        arginfo = ArgInfo(fargs′, argtypes′)
+        # # typeintersect might have narrowed signature, but the accuracy gain doesn't seem worth the cost involved with the lattice comparisons
+        # for i in 1:length(argtypes′)
+        #     t, a = ti.parameters[i], argtypes′[i]
+        #     argtypes′[i] = t ⊑ a ? t : a
+        # end
+        𝕃ₚ = ipo_lattice(interp)
+        ⊑, ⋤, ⊔ = partialorder(𝕃ₚ), strictneqpartialorder(𝕃ₚ), join(𝕃ₚ)
+        f = singleton_type(ft′)
+        invokecall = InvokeCall(types, lookupsig)
+        const_call_result = abstract_call_method_with_const_args(interp,
+            result, f, arginfo, si, match, sv, invokecall)
+        const_result = volatile_inf_result
+        if const_call_result !== nothing
+            if const_call_result.rt ⊑ rt
+                (; rt, effects, const_result, edge) = const_call_result
+            end
+            if const_call_result.exct ⋤ exct
+                (; exct, const_result, edge) = const_call_result
+            end
         end
-        if const_call_result.exct ⋤ exct
-            (; exct, const_result, edge) = const_call_result
+        rt = from_interprocedural!(interp, rt, sv, arginfo, sig)
+        info = InvokeCallInfo(match, const_result)
+        edge !== nothing && add_invoke_backedge!(sv, lookupsig, edge)
+        if !match.fully_covers
+            effects = Effects(effects; nothrow=false)
+            exct = exct ⊔ TypeError
         end
+        return CallMeta(rt, exct, effects, info)
     end
-    rt = from_interprocedural!(interp, rt, sv, arginfo, sig)
-    info = InvokeCallInfo(match, const_result)
-    edge !== nothing && add_invoke_backedge!(sv, lookupsig, edge)
-    if !match.fully_covers
-        effects = Effects(effects; nothrow=false)
-        exct = exct ⊔ TypeError
-    end
-    return CallMeta(rt, exct, effects, info)
 end
 
 function invoke_rewrite(xs::Vector{Any})
@@ -2207,10 +2277,12 @@ end
 function abstract_finalizer(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::AbsIntState)
     if length(argtypes) == 3
         finalizer_argvec = Any[argtypes[2], argtypes[3]]
-        call = abstract_call(interp, ArgInfo(nothing, finalizer_argvec), StmtInfo(false), sv, #=max_methods=#1)
-        return CallMeta(Nothing, Any, Effects(), FinalizerInfo(call.info, call.effects))
+        call = abstract_call(interp, ArgInfo(nothing, finalizer_argvec), StmtInfo(false), sv, #=max_methods=#1)::Future
+        return Future{CallMeta}(call, interp, sv) do call, interp, sv
+            return CallMeta(Nothing, Any, Effects(), FinalizerInfo(call.info, call.effects))
+        end
     end
-    return CallMeta(Nothing, Any, Effects(), NoCallInfo())
+    return Future(CallMeta(Nothing, Any, Effects(), NoCallInfo()))
 end
 
 function abstract_throw(interp::AbstractInterpreter, argtypes::Vector{Any}, ::AbsIntState)
@@ -2228,7 +2300,7 @@ function abstract_throw(interp::AbstractInterpreter, argtypes::Vector{Any}, ::Ab
     else
         exct = ArgumentError
     end
-    return CallMeta(Union{}, exct, EFFECTS_THROWS, NoCallInfo())
+    return Future(CallMeta(Union{}, exct, EFFECTS_THROWS, NoCallInfo()))
 end
 
 function abstract_throw_methoderror(interp::AbstractInterpreter, argtypes::Vector{Any}, ::AbsIntState)
@@ -2240,7 +2312,7 @@ function abstract_throw_methoderror(interp::AbstractInterpreter, argtypes::Vecto
         ⊔ = join(typeinf_lattice(interp))
         MethodError ⊔ ArgumentError
     end
-    return CallMeta(Union{}, exct, EFFECTS_THROWS, NoCallInfo())
+    return Future(CallMeta(Union{}, exct, EFFECTS_THROWS, NoCallInfo()))
 end
 
 # call where the function is known exactly
@@ -2285,60 +2357,70 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
                 end
             end
         end
-        return CallMeta(rt, exct, effects, NoCallInfo(), refinements)
+        return Future(CallMeta(rt, exct, effects, NoCallInfo(), refinements))
     elseif isa(f, Core.OpaqueClosure)
         # calling an OpaqueClosure about which we have no information returns no information
-        return CallMeta(typeof(f).parameters[2], Any, Effects(), NoCallInfo())
+        return Future(CallMeta(typeof(f).parameters[2], Any, Effects(), NoCallInfo()))
     elseif f === TypeVar && !isvarargtype(argtypes[end])
         # Manually look through the definition of TypeVar to
         # make sure to be able to get `PartialTypeVar`s out.
-        2 ≤ la ≤ 4 || return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
-        n = argtypes[2]
-        ub_var = Const(Any)
-        lb_var = Const(Union{})
-        if la == 4
-            ub_var = argtypes[4]
-            lb_var = argtypes[3]
-        elseif la == 3
-            ub_var = argtypes[3]
-        end
+        2 ≤ la ≤ 4 || return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
         # make sure generic code is prepared for inlining if needed later
-        call = let T = Any[Type{TypeVar}, Any, Any, Any]
+        let T = Any[Type{TypeVar}, Any, Any, Any]
             resize!(T, la)
             atype = Tuple{T...}
             T[1] = Const(TypeVar)
-            abstract_call_gf_by_type(interp, f, ArgInfo(nothing, T), si, atype, sv, max_methods)
-        end
-        pT = typevar_tfunc(𝕃ᵢ, n, lb_var, ub_var)
-        typevar_argtypes = Any[n, lb_var, ub_var]
-        effects = builtin_effects(𝕃ᵢ, Core._typevar, typevar_argtypes, pT)
-        if effects.nothrow
-            exct = Union{}
-        else
-            exct = builtin_exct(𝕃ᵢ, Core._typevar, typevar_argtypes, pT)
+            let call = abstract_call_gf_by_type(interp, f, ArgInfo(nothing, T), si, atype, sv, max_methods)::Future
+                return Future{CallMeta}(call, interp, sv) do call, interp, sv
+                    n = argtypes[2]
+                    ub_var = Const(Any)
+                    lb_var = Const(Union{})
+                    if la == 4
+                        ub_var = argtypes[4]
+                        lb_var = argtypes[3]
+                    elseif la == 3
+                        ub_var = argtypes[3]
+                    end
+                    pT = typevar_tfunc(𝕃ᵢ, n, lb_var, ub_var)
+                    typevar_argtypes = Any[n, lb_var, ub_var]
+                    effects = builtin_effects(𝕃ᵢ, Core._typevar, typevar_argtypes, pT)
+                    if effects.nothrow
+                        exct = Union{}
+                    else
+                        exct = builtin_exct(𝕃ᵢ, Core._typevar, typevar_argtypes, pT)
+                    end
+                    return CallMeta(pT, exct, effects, call.info)
+                end
+            end
         end
-        return CallMeta(pT, exct, effects, call.info)
     elseif f === UnionAll
-        call = abstract_call_gf_by_type(interp, f, ArgInfo(nothing, Any[Const(UnionAll), Any, Any]), si, Tuple{Type{UnionAll}, Any, Any}, sv, max_methods)
-        return abstract_call_unionall(interp, argtypes, call)
+        let call = abstract_call_gf_by_type(interp, f, ArgInfo(nothing, Any[Const(UnionAll), Any, Any]), si, Tuple{Type{UnionAll}, Any, Any}, sv, max_methods)::Future
+            return Future{CallMeta}(call, interp, sv) do call, interp, sv
+                return abstract_call_unionall(interp, argtypes, call)
+            end
+        end
     elseif f === Tuple && la == 2
         aty = argtypes[2]
         ty = isvarargtype(aty) ? unwrapva(aty) : widenconst(aty)
         if !isconcretetype(ty)
-            return CallMeta(Tuple, Any, EFFECTS_UNKNOWN, NoCallInfo())
+            return Future(CallMeta(Tuple, Any, EFFECTS_UNKNOWN, NoCallInfo()))
         end
     elseif is_return_type(f)
         return return_type_tfunc(interp, argtypes, si, sv)
     elseif la == 3 && f === Core.:(!==)
         # mark !== as exactly a negated call to ===
-        call = abstract_call_gf_by_type(interp, f, ArgInfo(fargs, Any[Const(f), Any, Any]), si, Tuple{typeof(f), Any, Any}, sv, max_methods)
-        rty = abstract_call_known(interp, (===), arginfo, si, sv, max_methods).rt
-        if isa(rty, Conditional)
-            return CallMeta(Conditional(rty.slot, rty.elsetype, rty.thentype), Bottom, EFFECTS_TOTAL, NoCallInfo()) # swap if-else
-        elseif isa(rty, Const)
-            return CallMeta(Const(rty.val === false), Bottom, EFFECTS_TOTAL, MethodResultPure())
-        end
-        return call
+        let callfuture = abstract_call_gf_by_type(interp, f, ArgInfo(fargs, Any[Const(f), Any, Any]), si, Tuple{typeof(f), Any, Any}, sv, max_methods)::Future,
+            rtfuture = abstract_call_known(interp, (===), arginfo, si, sv, max_methods)::Future
+            return Future{CallMeta}(isready(callfuture) && isready(rtfuture), interp, sv) do interp, sv
+                local rty = rtfuture[].rt
+                if isa(rty, Conditional)
+                    return CallMeta(Conditional(rty.slot, rty.elsetype, rty.thentype), Bottom, EFFECTS_TOTAL, NoCallInfo()) # swap if-else
+                elseif isa(rty, Const)
+                    return CallMeta(Const(rty.val === false), Bottom, EFFECTS_TOTAL, MethodResultPure())
+                end
+                return callfuture[]
+            end
+        end
     elseif la == 3 && f === Core.:(>:)
         # mark issupertype as a exact alias for issubtype
         # swap T1 and T2 arguments and call <:
@@ -2350,12 +2432,12 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
         argtypes = Any[typeof(<:), argtypes[3], argtypes[2]]
         return abstract_call_known(interp, <:, ArgInfo(fargs, argtypes), si, sv, max_methods)
     elseif la == 2 && f === Core.typename
-        return CallMeta(typename_static(argtypes[2]), Bottom, EFFECTS_TOTAL, MethodResultPure())
+        return Future(CallMeta(typename_static(argtypes[2]), Bottom, EFFECTS_TOTAL, MethodResultPure()))
     elseif f === Core._hasmethod
-        return _hasmethod_tfunc(interp, argtypes, sv)
+        return Future(_hasmethod_tfunc(interp, argtypes, sv))
     end
     atype = argtypes_to_type(argtypes)
-    return abstract_call_gf_by_type(interp, f, arginfo, si, atype, sv, max_methods)
+    return abstract_call_gf_by_type(interp, f, arginfo, si, atype, sv, max_methods)::Future
 end
 
 function abstract_call_opaque_closure(interp::AbstractInterpreter,
@@ -2364,40 +2446,44 @@ function abstract_call_opaque_closure(interp::AbstractInterpreter,
     tt = closure.typ
     ocargsig = rewrap_unionall((unwrap_unionall(tt)::DataType).parameters[1], tt)
     ocargsig′ = unwrap_unionall(ocargsig)
-    ocargsig′ isa DataType || return CallMeta(Any, Any, Effects(), NoCallInfo())
+    ocargsig′ isa DataType || return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
     ocsig = rewrap_unionall(Tuple{Tuple, ocargsig′.parameters...}, ocargsig)
-    hasintersect(sig, ocsig) || return CallMeta(Union{}, Union{MethodError,TypeError}, EFFECTS_THROWS, NoCallInfo())
+    hasintersect(sig, ocsig) || return Future(CallMeta(Union{}, Union{MethodError,TypeError}, EFFECTS_THROWS, NoCallInfo()))
     ocmethod = closure.source::Method
-    result = abstract_call_method(interp, ocmethod, sig, Core.svec(), false, si, sv)
-    (; rt, exct, edge, effects, volatile_inf_result) = result
     match = MethodMatch(sig, Core.svec(), ocmethod, sig <: ocsig)
-    𝕃ₚ = ipo_lattice(interp)
-    ⊑, ⋤, ⊔ = partialorder(𝕃ₚ), strictneqpartialorder(𝕃ₚ), join(𝕃ₚ)
-    const_result = volatile_inf_result
-    if !result.edgecycle
-        const_call_result = abstract_call_method_with_const_args(interp, result,
-            nothing, arginfo, si, match, sv)
-        if const_call_result !== nothing
-            if const_call_result.rt ⊑ rt
-                (; rt, effects, const_result, edge) = const_call_result
-            end
-            if const_call_result.exct ⋤ exct
-                (; exct, const_result, edge) = const_call_result
+    mresult = abstract_call_method(interp, ocmethod, sig, Core.svec(), false, si, sv)
+    ocsig_box = Core.Box(ocsig)
+    return Future{CallMeta}(mresult, interp, sv) do result, interp, sv
+        (; rt, exct, edge, effects, volatile_inf_result, edgecycle) = result
+        𝕃ₚ = ipo_lattice(interp)
+        ⊑, ⋤, ⊔ = partialorder(𝕃ₚ), strictneqpartialorder(𝕃ₚ), join(𝕃ₚ)
+        const_result = volatile_inf_result
+        if !edgecycle
+            const_call_result = abstract_call_method_with_const_args(interp, result,
+                nothing, arginfo, si, match, sv)
+            if const_call_result !== nothing
+                if const_call_result.rt ⊑ rt
+                    (; rt, effects, const_result, edge) = const_call_result
+                end
+                if const_call_result.exct ⋤ exct
+                    (; exct, const_result, edge) = const_call_result
+                end
             end
         end
-    end
-    if check # analyze implicit type asserts on argument and return type
-        rty = (unwrap_unionall(tt)::DataType).parameters[2]
-        rty = rewrap_unionall(rty isa TypeVar ? rty.ub : rty, tt)
-        if !(rt ⊑ rty && sig ⊑ ocsig)
-            effects = Effects(effects; nothrow=false)
-            exct = exct ⊔ TypeError
+        if check # analyze implicit type asserts on argument and return type
+            ftt = closure.typ
+            rty = (unwrap_unionall(ftt)::DataType).parameters[2]
+            rty = rewrap_unionall(rty isa TypeVar ? rty.ub : rty, ftt)
+            if !(rt ⊑ rty && sig ⊑ ocsig_box.contents)
+                effects = Effects(effects; nothrow=false)
+                exct = exct ⊔ TypeError
+            end
         end
+        rt = from_interprocedural!(interp, rt, sv, arginfo, match.spec_types)
+        info = OpaqueClosureCallInfo(match, const_result)
+        edge !== nothing && add_backedge!(sv, edge)
+        return CallMeta(rt, exct, effects, info)
     end
-    rt = from_interprocedural!(interp, rt, sv, arginfo, match.spec_types)
-    info = OpaqueClosureCallInfo(match, const_result)
-    edge !== nothing && add_backedge!(sv, edge)
-    return CallMeta(rt, exct, effects, info)
 end
 
 function most_general_argtypes(closure::PartialOpaque)
@@ -2422,17 +2508,17 @@ function abstract_call_unknown(interp::AbstractInterpreter, @nospecialize(ft),
     wft = widenconst(ft)
     if hasintersect(wft, Builtin)
         add_remark!(interp, sv, "Could not identify method table for call")
-        return CallMeta(Any, Any, Effects(), NoCallInfo())
+        return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
     elseif hasintersect(wft, Core.OpaqueClosure)
         uft = unwrap_unionall(wft)
         if isa(uft, DataType)
-            return CallMeta(rewrap_unionall(uft.parameters[2], wft), Any, Effects(), NoCallInfo())
+            return Future(CallMeta(rewrap_unionall(uft.parameters[2], wft), Any, Effects(), NoCallInfo()))
         end
-        return CallMeta(Any, Any, Effects(), NoCallInfo())
+        return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
     end
     # non-constant function, but the number of arguments is known and the `f` is not a builtin or intrinsic
     atype = argtypes_to_type(arginfo.argtypes)
-    return abstract_call_gf_by_type(interp, nothing, arginfo, si, atype, sv, max_methods)
+    return abstract_call_gf_by_type(interp, nothing, arginfo, si, atype, sv, max_methods)::Future
 end
 
 # call where the function is any lattice element
@@ -2503,7 +2589,7 @@ function abstract_eval_cfunction(interp::AbstractInterpreter, e::Expr, vtypes::U
     # this may be the wrong world for the call,
     # but some of the result is likely to be valid anyways
     # and that may help generate better codegen
-    abstract_call(interp, ArgInfo(nothing, at), StmtInfo(false), sv)
+    abstract_call(interp, ArgInfo(nothing, at), StmtInfo(false), sv)::Future
     rt = e.args[1]
     isa(rt, Type) || (rt = Any)
     return RTEffects(rt, Any, EFFECTS_UNKNOWN)
@@ -2544,6 +2630,7 @@ function abstract_eval_value_expr(interp::AbstractInterpreter, e::Expr, sv::AbsI
         # TODO: We still have non-linearized cglobal
         @assert e.args[1] === Core.tuple || e.args[1] === GlobalRef(Core, :tuple)
     else
+        @assert e.head !== :(=)
         # Some of our tests expect us to handle invalid IR here and error later
         # - permit that for now.
         # @assert false "Unexpected EXPR head in value position"
@@ -2592,8 +2679,13 @@ function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, sv::Infere
         add_curr_ssaflag!(sv, IR_FLAG_UNUSED)
     end
     si = StmtInfo(!unused)
-    call = abstract_call(interp, arginfo, si, sv)
-    sv.stmt_info[sv.currpc] = call.info
+    call = abstract_call(interp, arginfo, si, sv)::Future
+    Future{Nothing}(call, interp, sv) do call, interp, sv
+        # this only is needed for the side-effect, sequenced before any task tries to consume the return value,
+        # which this will do even without returning this Future
+        sv.stmt_info[sv.currpc] = call.info
+        nothing
+    end
     return call
 end
 
@@ -2602,11 +2694,14 @@ function abstract_eval_call(interp::AbstractInterpreter, e::Expr, vtypes::Union{
     ea = e.args
     argtypes = collect_argtypes(interp, ea, vtypes, sv)
     if argtypes === nothing
-        return RTEffects(Bottom, Any, Effects())
+        return Future(RTEffects(Bottom, Any, Effects()))
     end
     arginfo = ArgInfo(ea, argtypes)
-    (; rt, exct, effects, refinements) = abstract_call(interp, arginfo, sv)
-    return RTEffects(rt, exct, effects, refinements)
+    call = abstract_call(interp, arginfo, sv)::Future
+    return Future{RTEffects}(call, interp, sv) do call, interp, sv
+        (; rt, exct, effects, refinements) = call
+        return RTEffects(rt, exct, effects, refinements)
+    end
 end
 
 function abstract_eval_new(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing},
@@ -2736,12 +2831,15 @@ function abstract_eval_new_opaque_closure(interp::AbstractInterpreter, e::Expr,
                 argtypes = most_general_argtypes(rt)
                 pushfirst!(argtypes, rt.env)
                 callinfo = abstract_call_opaque_closure(interp, rt,
-                    ArgInfo(nothing, argtypes), StmtInfo(true), sv, #=check=#false)
-                sv.stmt_info[sv.currpc] = OpaqueClosureCreateInfo(callinfo)
+                    ArgInfo(nothing, argtypes), StmtInfo(true), sv, #=check=#false)::Future
+                Future{Nothing}(callinfo, interp, sv) do callinfo, interp, sv
+                    sv.stmt_info[sv.currpc] = OpaqueClosureCreateInfo(callinfo)
+                    nothing
+                end
             end
         end
     end
-    return RTEffects(rt, Any, effects)
+    return Future(RTEffects(rt, Any, effects))
 end
 
 function abstract_eval_copyast(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing},
@@ -2837,7 +2935,7 @@ function abstract_eval_static_parameter(::AbstractInterpreter, e::Expr, sv::AbsI
 end
 
 function abstract_eval_statement_expr(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing},
-                                      sv::AbsIntState)
+                                      sv::AbsIntState)::Future{RTEffects}
     ehead = e.head
     if ehead === :call
         return abstract_eval_call(interp, e, vtypes, sv)
@@ -2935,43 +3033,7 @@ function stmt_taints_inbounds_consistency(sv::AbsIntState)
     return has_curr_ssaflag(sv, IR_FLAG_INBOUNDS)
 end
 
-function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e), vtypes::VarTable, sv::InferenceState)
-    if !isa(e, Expr)
-        if isa(e, PhiNode)
-            add_curr_ssaflag!(sv, IR_FLAGS_REMOVABLE)
-            # Implement convergence for PhiNodes. In particular, PhiNodes need to tmerge over
-            # the incoming values from all iterations, but `abstract_eval_phi` will only tmerge
-            # over the first and last iterations. By tmerging in the current old_rt, we ensure that
-            # we will not lose an intermediate value.
-            rt = abstract_eval_phi(interp, e, vtypes, sv)
-            old_rt = sv.ssavaluetypes[sv.currpc]
-            rt = old_rt === NOT_FOUND ? rt : tmerge(typeinf_lattice(interp), old_rt, rt)
-            return RTEffects(rt, Union{}, EFFECTS_TOTAL)
-        end
-        (; rt, exct, effects, refinements) = abstract_eval_special_value(interp, e, vtypes, sv)
-    else
-        (; rt, exct, effects, refinements) = abstract_eval_statement_expr(interp, e, vtypes, sv)
-        if effects.noub === NOUB_IF_NOINBOUNDS
-            if has_curr_ssaflag(sv, IR_FLAG_INBOUNDS)
-                effects = Effects(effects; noub=ALWAYS_FALSE)
-            elseif !propagate_inbounds(sv)
-                # The callee read our inbounds flag, but unless we propagate inbounds,
-                # we ourselves don't read our parent's inbounds.
-                effects = Effects(effects; noub=ALWAYS_TRUE)
-            end
-        end
-        e = e::Expr
-        @assert !isa(rt, TypeVar) "unhandled TypeVar"
-        rt = maybe_singleton_const(rt)
-        if !isempty(sv.pclimitations)
-            if rt isa Const || rt === Union{}
-                empty!(sv.pclimitations)
-            else
-                rt = LimitedAccuracy(rt, sv.pclimitations)
-                sv.pclimitations = IdSet{InferenceState}()
-            end
-        end
-    end
+function merge_override_effects!(interp::AbstractInterpreter, effects::Effects, sv::InferenceState)
     # N.B.: This only applies to the effects of the statement itself.
     # It is possible for arguments (GlobalRef/:static_parameter) to throw,
     # but these will be recomputed during SSA construction later.
@@ -2979,8 +3041,11 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e),
     effects = override_effects(effects, override)
     set_curr_ssaflag!(sv, flags_for_effects(effects), IR_FLAGS_EFFECTS)
     merge_effects!(interp, sv, effects)
+    return effects
+end
 
-    return RTEffects(rt, exct, effects, refinements)
+function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e), vtypes::VarTable, sv::InferenceState)
+    @assert !isa(e, Union{Expr, PhiNode, NewvarNode})
 end
 
 function override_effects(effects::Effects, override::EffectsOverride)
@@ -3226,60 +3291,6 @@ function handle_control_backedge!(interp::AbstractInterpreter, frame::InferenceS
     return nothing
 end
 
-struct BasicStmtChange
-    changes::Union{Nothing,StateUpdate}
-    rt::Any # extended lattice element or `nothing` - `nothing` if this statement may not be used as an SSA Value
-    exct::Any
-    # TODO effects::Effects
-    refinements # ::Union{Nothing,SlotRefinement,Vector{Any}}
-    function BasicStmtChange(changes::Union{Nothing,StateUpdate}, rt::Any, exct::Any,
-                             refinements=nothing)
-        @nospecialize rt exct refinements
-        return new(changes, rt, exct, refinements)
-    end
-end
-
-@inline function abstract_eval_basic_statement(interp::AbstractInterpreter,
-    @nospecialize(stmt), pc_vartable::VarTable, frame::InferenceState)
-    if isa(stmt, NewvarNode)
-        changes = StateUpdate(stmt.slot, VarState(Bottom, true))
-        return BasicStmtChange(changes, nothing, Union{})
-    elseif !isa(stmt, Expr)
-        (; rt, exct) = abstract_eval_statement(interp, stmt, pc_vartable, frame)
-        return BasicStmtChange(nothing, rt, exct)
-    end
-    changes = nothing
-    hd = stmt.head
-    if hd === :(=)
-        (; rt, exct, refinements) = abstract_eval_statement(interp, stmt.args[2], pc_vartable, frame)
-        if rt === Bottom
-            return BasicStmtChange(nothing, Bottom, exct, refinements)
-        end
-        lhs = stmt.args[1]
-        if isa(lhs, SlotNumber)
-            changes = StateUpdate(lhs, VarState(rt, false))
-        elseif isa(lhs, GlobalRef)
-            handle_global_assignment!(interp, frame, lhs, rt)
-        elseif !isa(lhs, SSAValue)
-            merge_effects!(interp, frame, EFFECTS_UNKNOWN)
-        end
-        return BasicStmtChange(changes, rt, exct, refinements)
-    elseif hd === :method
-        fname = stmt.args[1]
-        if isa(fname, SlotNumber)
-            changes = StateUpdate(fname, VarState(Any, false))
-        end
-        return BasicStmtChange(changes, nothing, Union{})
-    elseif (hd === :code_coverage_effect || (
-            hd !== :boundscheck && # :boundscheck can be narrowed to Bool
-            is_meta_expr(stmt)))
-        return BasicStmtChange(nothing, Nothing, Bottom)
-    else
-        (; rt, exct, refinements) = abstract_eval_statement(interp, stmt, pc_vartable, frame)
-        return BasicStmtChange(nothing, rt, exct, refinements)
-    end
-end
-
 function update_bbstate!(𝕃ᵢ::AbstractLattice, frame::InferenceState, bb::Int, vartable::VarTable)
     bbtable = frame.bb_vartables[bb]
     if bbtable === nothing
@@ -3379,27 +3390,45 @@ function update_cycle_worklists!(callback, frame::InferenceState)
 end
 
 # make as much progress on `frame` as possible (without handling cycles)
-function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
+struct CurrentState
+    result::Future
+    currstate::VarTable
+    bbstart::Int
+    bbend::Int
+    CurrentState(result::Future, currstate::VarTable, bbstart::Int, bbend::Int) = new(result, currstate, bbstart, bbend)
+    CurrentState() = new()
+end
+function typeinf_local(interp::AbstractInterpreter, frame::InferenceState, nextresult::CurrentState)
     @assert !is_inferred(frame)
     W = frame.ip
     ssavaluetypes = frame.ssavaluetypes
     bbs = frame.cfg.blocks
     nbbs = length(bbs)
     𝕃ᵢ = typeinf_lattice(interp)
-
+    states = frame.bb_vartables
     currbb = frame.currbb
+    currpc = frame.currpc
+
+    if isdefined(nextresult, :result)
+        # for reasons that are fairly unclear, some state is arbitrarily on the stack instead in the InferenceState as normal
+        bbstart = nextresult.bbstart
+        bbend = nextresult.bbend
+        currstate = nextresult.currstate
+        @goto injectresult
+    end
+
     if currbb != 1
         currbb = frame.currbb = _bits_findnext(W.bits, 1)::Int # next basic block
     end
-
-    states = frame.bb_vartables
     currstate = copy(states[currbb]::VarTable)
     while currbb <= nbbs
         delete!(W, currbb)
         bbstart = first(bbs[currbb].stmts)
         bbend = last(bbs[currbb].stmts)
 
-        for currpc in bbstart:bbend
+        currpc = bbstart - 1
+        while currpc < bbend
+            currpc += 1
             frame.currpc = currpc
             empty_backedges!(frame, currpc)
             stmt = frame.src.code[currpc]
@@ -3511,14 +3540,14 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                             return caller.ssavaluetypes[caller_pc] !== Any
                         end
                     end
-                    ssavaluetypes[frame.currpc] = Any
+                    ssavaluetypes[currpc] = Any
                     @goto find_next_bb
                 elseif isa(stmt, EnterNode)
                     ssavaluetypes[currpc] = Any
                     add_curr_ssaflag!(frame, IR_FLAG_NOTHROW)
                     if isdefined(stmt, :scope)
                         scopet = abstract_eval_value(interp, stmt.scope, currstate, frame)
-                        handler = gethandler(frame, frame.currpc+1)::TryCatchFrame
+                        handler = gethandler(frame, currpc + 1)::TryCatchFrame
                         @assert handler.scopet !== nothing
                         if !⊑(𝕃ᵢ, scopet, handler.scopet)
                             handler.scopet = tmerge(𝕃ᵢ, scopet, handler.scopet)
@@ -3537,8 +3566,91 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                 # Fall through terminator - treat as regular stmt
             end
             # Process non control-flow statements
-            (; changes, rt, exct, refinements) = abstract_eval_basic_statement(interp,
-                stmt, currstate, frame)
+            @assert isempty(frame.tasks)
+            rt = nothing
+            exct = Bottom
+            changes = nothing
+            refinements = nothing
+            effects = nothing
+            if isa(stmt, NewvarNode)
+                changes = StateUpdate(stmt.slot, VarState(Bottom, true))
+            elseif isa(stmt, PhiNode)
+                add_curr_ssaflag!(frame, IR_FLAGS_REMOVABLE)
+                # Implement convergence for PhiNodes. In particular, PhiNodes need to tmerge over
+                # the incoming values from all iterations, but `abstract_eval_phi` will only tmerge
+                # over the first and last iterations. By tmerging in the current old_rt, we ensure that
+                # we will not lose an intermediate value.
+                rt = abstract_eval_phi(interp, stmt, currstate, frame)
+                old_rt = frame.ssavaluetypes[currpc]
+                rt = old_rt === NOT_FOUND ? rt : tmerge(typeinf_lattice(interp), old_rt, rt)
+            else
+                lhs = nothing
+                if isexpr(stmt, :(=))
+                    lhs = stmt.args[1]
+                    stmt = stmt.args[2]
+                end
+                if !isa(stmt, Expr)
+                    (; rt, exct, effects, refinements) = abstract_eval_special_value(interp, stmt, currstate, frame)
+                else
+                    hd = stmt.head
+                    if hd === :method
+                        fname = stmt.args[1]
+                        if isa(fname, SlotNumber)
+                            changes = StateUpdate(fname, VarState(Any, false))
+                        end
+                    elseif (hd === :code_coverage_effect || (
+                            hd !== :boundscheck && # :boundscheck can be narrowed to Bool
+                            is_meta_expr(stmt)))
+                        rt = Nothing
+                    else
+                        result = abstract_eval_statement_expr(interp, stmt, currstate, frame)::Future
+                        if !isready(result) || !isempty(frame.tasks)
+                            return CurrentState(result, currstate, bbstart, bbend)
+                            @label injectresult
+                            # reload local variables
+                            stmt = frame.src.code[currpc]
+                            changes = nothing
+                            lhs = nothing
+                            if isexpr(stmt, :(=))
+                                lhs = stmt.args[1]
+                                stmt = stmt.args[2]
+                            end
+                            result = nextresult.result::Future{RTEffects}
+                        end
+                        result = result[]
+                        (; rt, exct, effects, refinements) = result
+                        if effects.noub === NOUB_IF_NOINBOUNDS
+                            if has_curr_ssaflag(frame, IR_FLAG_INBOUNDS)
+                                effects = Effects(effects; noub=ALWAYS_FALSE)
+                            elseif !propagate_inbounds(frame)
+                                # The callee read our inbounds flag, but unless we propagate inbounds,
+                                # we ourselves don't read our parent's inbounds.
+                                effects = Effects(effects; noub=ALWAYS_TRUE)
+                            end
+                        end
+                        @assert !isa(rt, TypeVar) "unhandled TypeVar"
+                        rt = maybe_singleton_const(rt)
+                        if !isempty(frame.pclimitations)
+                            if rt isa Const || rt === Union{}
+                                empty!(frame.pclimitations)
+                            else
+                                rt = LimitedAccuracy(rt, frame.pclimitations)
+                                frame.pclimitations = IdSet{InferenceState}()
+                            end
+                        end
+                    end
+                end
+                effects === nothing || merge_override_effects!(interp, effects, frame)
+                if lhs !== nothing && rt !== Bottom
+                    if isa(lhs, SlotNumber)
+                        changes = StateUpdate(lhs, VarState(rt, false))
+                    elseif isa(lhs, GlobalRef)
+                        handle_global_assignment!(interp, frame, lhs, rt)
+                    elseif !isa(lhs, SSAValue)
+                        merge_effects!(interp, frame, EFFECTS_UNKNOWN)
+                    end
+                end
+            end
             if !has_curr_ssaflag(frame, IR_FLAG_NOTHROW)
                 if exct !== Union{}
                     update_exc_bestguess!(interp, exct, frame)
@@ -3601,7 +3713,7 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
         end
     end # while currbb <= nbbs
 
-    nothing
+    return CurrentState()
 end
 
 function apply_refinement!(𝕃ᵢ::AbstractLattice, slot::SlotNumber, @nospecialize(newtyp),
@@ -3652,31 +3764,81 @@ function condition_object_change(currstate::VarTable, condt::Conditional,
 end
 
 # make as much progress on `frame` as possible (by handling cycles)
-function typeinf_nocycle(interp::AbstractInterpreter, frame::InferenceState)
-    typeinf_local(interp, frame)
-    @assert isempty(frame.ip)
+warnlength::Int = 2500
+function typeinf(interp::AbstractInterpreter, frame::InferenceState)
     callstack = frame.callstack::Vector{AbsIntState}
-    frame.cycleid == length(callstack) && return true
-
-    no_active_ips_in_callers = false
-    while true
-        # If the current frame is not the top part of a cycle, continue to the top of the cycle before resuming work
-        frame.cycleid == frame.frameid || return false
-        # If done, return and finalize this cycle
-        no_active_ips_in_callers && return true
-        # Otherwise, do at least one iteration over the entire current cycle
-        no_active_ips_in_callers = true
-        for i = reverse(frame.cycleid:length(callstack))
-            caller = callstack[i]::InferenceState
-            if !isempty(caller.ip)
-                # Note that `typeinf_local(interp, caller)` can potentially modify the other frames
-                # `frame.cycleid`, which is why making incremental progress requires the
-                # outer while loop.
-                typeinf_local(interp, caller)
-                no_active_ips_in_callers = false
-            end
-            update_valid_age!(caller, frame.valid_worlds)
+    nextstates = CurrentState[]
+    takenext = frame.frameid
+    minwarn = warnlength
+    takeprev = 0
+    while takenext >= frame.frameid
+        callee = takenext == 0 ? frame : callstack[takenext]::InferenceState
+        if !isempty(callstack)
+            if length(callstack) - frame.frameid >= minwarn
+                topmethod = callstack[1].linfo
+                topmethod.def isa Method || (topmethod = callstack[2].linfo)
+                print(Core.stderr, "info: inference of ", topmethod, " exceeding ", length(callstack), " frames (may be slow).\n")
+                minwarn *= 2
+            end
+            topcallee = (callstack[end]::InferenceState)
+            if topcallee.cycleid != callee.cycleid
+                callee = topcallee
+                takenext = length(callstack)
+            end
+        end
+        nextstateid = takenext + 1 - frame.frameid
+        while length(nextstates) < nextstateid
+            push!(nextstates, CurrentState())
+        end
+        if doworkloop(interp, callee)
+            # First drain the workloop. Note that since some scheduled work doesn't
+            # affect the result (e.g. cfunction or abstract_call_method on
+            # get_compileable_sig), but still must be finished up since it may see and
+            # change the local variables of the InferenceState at currpc, we do this
+            # even if the nextresult status is already completed.
+            continue
+        elseif isdefined(nextstates[nextstateid], :result) || !isempty(callee.ip)
+            # Next make progress on this frame
+            prev = length(callee.tasks) + 1
+            nextstates[nextstateid] = typeinf_local(interp, callee, nextstates[nextstateid])
+            reverse!(callee.tasks, prev)
+        elseif callee.cycleid == length(callstack)
+            # With no active ip's and no cycles, frame is done
+            finish_nocycle(interp, callee)
+            callee.frameid == 0 && break
+            takenext = length(callstack)
+            nextstateid = takenext + 1 - frame.frameid
+            #@assert length(nextstates) == nextstateid + 1
+            #@assert all(i -> !isdefined(nextstates[i], :result), nextstateid+1:length(nextstates))
+            resize!(nextstates, nextstateid)
+        elseif callee.cycleid == callee.frameid
+            # If the current frame is the top part of a cycle, check if the whole cycle
+            # is done, and if not, pick the next item to work on.
+            no_active_ips_in_cycle = true
+            for i = callee.cycleid:length(callstack)
+                caller = callstack[i]::InferenceState
+                @assert caller.cycleid == callee.cycleid
+                if !isempty(caller.tasks) || isdefined(nextstates[i+1-frame.frameid], :result) || !isempty(caller.ip)
+                    no_active_ips_in_cycle = false
+                    break
+                end
+            end
+            if no_active_ips_in_cycle
+                finish_cycle(interp, callstack, callee.cycleid)
+            end
+            takenext = length(callstack)
+            nextstateid = takenext + 1 - frame.frameid
+            if no_active_ips_in_cycle
+                #@assert all(i -> !isdefined(nextstates[i], :result), nextstateid+1:length(nextstates))
+                resize!(nextstates, nextstateid)
+            else
+                #@assert length(nextstates) == nextstateid
+            end
+        else
+            # Continue to the next frame in this cycle
+            takenext = takenext - 1
         end
     end
-    return true
+    #@assert all(nextresult -> !isdefined(nextresult, :result), nextstates)
+    return is_inferred(frame)
 end
diff --git a/base/compiler/inferencestate.jl b/base/compiler/inferencestate.jl
index 6953dea5b9bd7..05d95d1d5bdc7 100644
--- a/base/compiler/inferencestate.jl
+++ b/base/compiler/inferencestate.jl
@@ -251,6 +251,7 @@ mutable struct InferenceState
     stmt_info::Vector{CallInfo}
 
     #= intermediate states for interprocedural abstract interpretation =#
+    tasks::Vector{WorkThunk}
     pclimitations::IdSet{InferenceState} # causes of precision restrictions (LimitedAccuracy) on currpc ssavalue
     limitations::IdSet{InferenceState} # causes of precision restrictions (LimitedAccuracy) on return
     cycle_backedges::Vector{Tuple{InferenceState, Int}} # call-graph backedges connecting from callee to caller
@@ -328,6 +329,7 @@ mutable struct InferenceState
         limitations = IdSet{InferenceState}()
         cycle_backedges = Vector{Tuple{InferenceState,Int}}()
         callstack = AbsIntState[]
+        tasks = WorkThunk[]
 
         valid_worlds = WorldRange(1, get_world_counter())
         bestguess = Bottom
@@ -351,7 +353,7 @@ mutable struct InferenceState
         this = new(
             mi, world, mod, sptypes, slottypes, src, cfg, method_info,
             currbb, currpc, ip, handler_info, ssavalue_uses, bb_vartables, ssavaluetypes, stmt_edges, stmt_info,
-            pclimitations, limitations, cycle_backedges, callstack, 0, 0, 0,
+            tasks, pclimitations, limitations, cycle_backedges, callstack, 0, 0, 0,
             result, unreachable, valid_worlds, bestguess, exc_bestguess, ipo_effects,
             restrict_abstract_call_sites, cache_mode, insert_coverage,
             interp)
@@ -800,6 +802,7 @@ mutable struct IRInterpretationState
     const ssa_refined::BitSet
     const lazyreachability::LazyCFGReachability
     valid_worlds::WorldRange
+    const tasks::Vector{WorkThunk}
     const edges::Vector{Any}
     callstack #::Vector{AbsIntState}
     frameid::Int
@@ -825,10 +828,11 @@ mutable struct IRInterpretationState
         ssa_refined = BitSet()
         lazyreachability = LazyCFGReachability(ir)
         valid_worlds = WorldRange(min_world, max_world == typemax(UInt) ? get_world_counter() : max_world)
+        tasks = WorkThunk[]
         edges = Any[]
         callstack = AbsIntState[]
         return new(method_info, ir, mi, world, curridx, argtypes_refined, ir.sptypes, tpdum,
-                ssa_refined, lazyreachability, valid_worlds, edges, callstack, 0, 0)
+                ssa_refined, lazyreachability, valid_worlds, tasks, edges, callstack, 0, 0)
     end
 end
 
@@ -870,6 +874,7 @@ function print_callstack(frame::AbsIntState)
         print(frame_instance(sv))
         is_cached(sv) || print("  [uncached]")
         sv.parentid == idx - 1 || print(" [parent=", sv.parentid, "]")
+        isempty(callers_in_cycle(sv)) || print(" [cycle=", sv.cycleid, "]")
         println()
         @assert sv.frameid == idx
     end
@@ -994,7 +999,10 @@ of the same cycle, only if it is part of a cycle with multiple frames.
 function callers_in_cycle(sv::InferenceState)
     callstack = sv.callstack::Vector{AbsIntState}
     cycletop = cycleid = sv.cycleid
-    while cycletop < length(callstack) && (callstack[cycletop + 1]::InferenceState).cycleid == cycleid
+    while cycletop < length(callstack)
+        frame = callstack[cycletop + 1]
+        frame isa InferenceState || break
+        frame.cycleid == cycleid || break
         cycletop += 1
     end
     return AbsIntCycle(callstack, cycletop == cycleid ? 0 : cycleid, cycletop)
@@ -1054,6 +1062,7 @@ function merge_effects!(::AbstractInterpreter, caller::InferenceState, effects::
         effects = Effects(effects; effect_free=ALWAYS_TRUE)
     end
     caller.ipo_effects = merge_effects(caller.ipo_effects, effects)
+    nothing
 end
 merge_effects!(::AbstractInterpreter, ::IRInterpretationState, ::Effects) = return
 
@@ -1116,3 +1125,90 @@ function get_max_methods_for_module(mod::Module)
     max_methods < 0 && return nothing
     return max_methods
 end
+
+"""
+    Future{T}
+
+Delayed return value for a value of type `T`, similar to RefValue{T}, but
+explicitly represents completed as a `Bool` rather than as `isdefined`.
+Set once with `f[] = v` and accessed with `f[]` afterwards.
+
+Can also be constructed with the `completed` flag value and a closure to
+produce `x`, as well as the additional arguments to avoid always capturing the
+same couple of values.
+"""
+struct Future{T}
+    later::Union{Nothing,RefValue{T}}
+    now::Union{Nothing,T}
+    Future{T}() where {T} = new{T}(RefValue{T}(), nothing)
+    Future{T}(x) where {T} = new{T}(nothing, x)
+    Future(x::T) where {T} = new{T}(nothing, x)
+end
+isready(f::Future) = f.later === nothing
+getindex(f::Future{T}) where {T} = (later = f.later; later === nothing ? f.now::T : later[])
+setindex!(f::Future, v) = something(f.later)[] = v
+convert(::Type{Future{T}}, x) where {T} = Future{T}(x) # support return type conversion
+convert(::Type{Future{T}}, x::Future) where {T} = x::Future{T}
+function Future{T}(f, immediate::Bool, interp::AbstractInterpreter, sv::AbsIntState) where {T}
+    if immediate
+        return Future{T}(f(interp, sv))
+    else
+        @assert applicable(f, interp, sv)
+        result = Future{T}()
+        push!(sv.tasks, function (interp, sv)
+            result[] = f(interp, sv)
+            return true
+        end)
+        return result
+    end
+end
+function Future{T}(f, prev::Future{S}, interp::AbstractInterpreter, sv::AbsIntState) where {T, S}
+    later = prev.later
+    if later === nothing
+        return Future{T}(f(prev[], interp, sv))
+    else
+        @assert Core._hasmethod(Tuple{Core.Typeof(f), S, typeof(interp), typeof(sv)})
+        result = Future{T}()
+        push!(sv.tasks, function (interp, sv)
+            result[] = f(later[], interp, sv) # capture just later, instead of all of prev
+            return true
+        end)
+        return result
+    end
+end
+
+
+"""
+    doworkloop(args...)
+
+Run a tasks inside the abstract interpreter, returning false if there are none.
+Tasks will be run in DFS post-order tree order, such that all child tasks will
+be run in the order scheduled, prior to running any subsequent tasks. This
+allows tasks to generate more child tasks, which will be run before anything else.
+Each task will be run repeatedly when returning `false`, until it returns `true`.
+"""
+function doworkloop(interp::AbstractInterpreter, sv::AbsIntState)
+    tasks = sv.tasks
+    prev = length(tasks)
+    prev == 0 && return false
+    task = pop!(tasks)
+    completed = task(interp, sv)
+    tasks = sv.tasks # allow dropping gc root over the previous call
+    completed isa Bool || throw(TypeError(:return, "", Bool, task)) # print the task on failure as part of the error message, instead of just "@ workloop:line"
+    completed || push!(tasks, task)
+    # efficient post-order visitor: items pushed are executed in reverse post order such
+    # that later items are executed before earlier ones, but are fully executed
+    # (including any dependencies scheduled by them) before going on to the next item
+    reverse!(tasks, #=start=#prev)
+    return true
+end
+
+
+#macro workthunk(name::Symbol, body)
+#    name = esc(name)
+#    body = esc(body)
+#    return replace_linenums!(
+#        :(function $name($(esc(interp)), $(esc(sv)))
+#              $body
+#          end), __source__)
+#end
diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl
index 960da88ddffc8..fdcb4621c5c0f 100644
--- a/base/compiler/ssair/ir.jl
+++ b/base/compiler/ssair/ir.jl
@@ -1432,6 +1432,7 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
     elseif isa(stmt, OldSSAValue)
         ssa_rename[idx] = ssa_rename[stmt.id]
     elseif isa(stmt, GotoNode) && cfg_transforms_enabled
+        stmt.label < 0 && (println(stmt); println(compact))
         label = bb_rename_succ[stmt.label]
         @assert label > 0
         ssa_rename[idx] = SSAValue(result_idx)
diff --git a/base/compiler/ssair/irinterp.jl b/base/compiler/ssair/irinterp.jl
index 1aeb87accbcd7..ca8ca770df413 100644
--- a/base/compiler/ssair/irinterp.jl
+++ b/base/compiler/ssair/irinterp.jl
@@ -51,8 +51,11 @@ end
 
 function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, irsv::IRInterpretationState)
     si = StmtInfo(true) # TODO better job here?
-    call = abstract_call(interp, arginfo, si, irsv)
-    irsv.ir.stmts[irsv.curridx][:info] = call.info
+    call = abstract_call(interp, arginfo, si, irsv)::Future
+    Future{Nothing}(call, interp, irsv) do call, interp, irsv
+        irsv.ir.stmts[irsv.curridx][:info] = call.info
+        nothing
+    end
     return call
 end
 
@@ -143,7 +146,19 @@ function reprocess_instruction!(interp::AbstractInterpreter, inst::Instruction,
         head = stmt.head
         if (head === :call || head === :foreigncall || head === :new || head === :splatnew ||
             head === :static_parameter || head === :isdefined || head === :boundscheck)
-            (; rt, effects) = abstract_eval_statement_expr(interp, stmt, nothing, irsv)
+            @assert isempty(irsv.tasks) # TODO: this whole function needs to be converted to a stackless design to be a valid AbsIntState, but this should work here for now
+            result = abstract_eval_statement_expr(interp, stmt, nothing, irsv)
+            reverse!(irsv.tasks)
+            while true
+                if length(irsv.callstack) > irsv.frameid
+                    typeinf(interp, irsv.callstack[irsv.frameid + 1])
+                elseif !doworkloop(interp, irsv)
+                    break
+                end
+            end
+            @assert length(irsv.callstack) == irsv.frameid && isempty(irsv.tasks)
+            result isa Future && (result = result[])
+            (; rt, effects) = result
             add_flag!(inst, flags_for_effects(effects))
         elseif head === :invoke
             rt, (nothrow, noub) = abstract_eval_invoke_inst(interp, inst, irsv)
@@ -293,7 +308,7 @@ function is_all_const_call(@nospecialize(stmt), interp::AbstractInterpreter, irs
     return true
 end
 
-function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IRInterpretationState;
+function ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IRInterpretationState;
         externally_refined::Union{Nothing,BitSet} = nothing)
     (; ir, tpdum, ssa_refined) = irsv
 
@@ -449,18 +464,3 @@ function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IR
 
     return Pair{Any,Tuple{Bool,Bool}}(maybe_singleton_const(ultimate_rt), (nothrow, noub))
 end
-
-function ir_abstract_constant_propagation(interp::NativeInterpreter, irsv::IRInterpretationState)
-    if __measure_typeinf__[]
-        inf_frame = Timings.InferenceFrameInfo(irsv.mi, irsv.world, VarState[], Any[], length(irsv.ir.argtypes))
-        Timings.enter_new_timer(inf_frame)
-        ret = _ir_abstract_constant_propagation(interp, irsv)
-        append!(inf_frame.slottypes, irsv.ir.argtypes)
-        Timings.exit_current_timer(inf_frame)
-        return ret
-    else
-        return _ir_abstract_constant_propagation(interp, irsv)
-    end
-end
-ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IRInterpretationState) =
-    _ir_abstract_constant_propagation(interp, irsv)
diff --git a/base/compiler/ssair/verify.jl b/base/compiler/ssair/verify.jl
index a4286177e93a4..268991282c483 100644
--- a/base/compiler/ssair/verify.jl
+++ b/base/compiler/ssair/verify.jl
@@ -1,9 +1,11 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 function maybe_show_ir(ir::IRCode)
-    if isdefined(Core, :Main)
+    if isdefined(Core, :Main) && isdefined(Core.Main, :Base)
         # ensure we use I/O that does not yield, as this gets called during compilation
         invokelatest(Core.Main.Base.show, Core.stdout, "text/plain", ir)
+    else
+        Core.show(ir)
     end
 end
 
@@ -25,6 +27,7 @@ is_toplevel_expr_head(head::Symbol) = head === :global || head === :method || he
 is_value_pos_expr_head(head::Symbol) = head === :static_parameter
 function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int, use_idx::Int, printed_use_idx::Int, print::Bool, isforeigncall::Bool, arg_idx::Int, allow_frontend_forms::Bool)
     if isa(op, SSAValue)
+        op.id > 0 || @verify_error "Def ($(op.id)) is invalid in final IR"
         if op.id > length(ir.stmts)
             def_bb = block_for_inst(ir.cfg, ir.new_nodes.info[op.id - length(ir.stmts)].pos)
         else
diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl
index cc8ba227bd088..a6b7e53c6f320 100644
--- a/base/compiler/tfuncs.jl
+++ b/base/compiler/tfuncs.jl
@@ -1383,10 +1383,10 @@ end
 
     nargs = length(argtypes)
     if !isempty(argtypes) && isvarargtype(argtypes[nargs])
-        nargs - 1 <= maxargs || return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
-        nargs + 1 >= op_argi || return CallMeta(Any, Any, Effects(), NoCallInfo())
+        nargs - 1 <= maxargs || return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
+        nargs + 1 >= op_argi || return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
     else
-        minargs <= nargs <= maxargs || return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
+        minargs <= nargs <= maxargs || return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
     end
     𝕃ᵢ = typeinf_lattice(interp)
     if ff === modifyfield!
@@ -1417,15 +1417,22 @@ end
         op = unwrapva(argtypes[op_argi])
         v = unwrapva(argtypes[v_argi])
         callinfo = abstract_call(interp, ArgInfo(nothing, Any[op, TF, v]), StmtInfo(true), sv, #=max_methods=#1)
-        TF2 = tmeet(callinfo.rt, widenconst(TF))
-        if TF2 === Bottom
-            RT = Bottom
-        elseif isconcretetype(RT) && has_nontrivial_extended_info(𝕃ᵢ, TF2) # isconcrete condition required to form a PartialStruct
-            RT = PartialStruct(RT, Any[TF, TF2])
+        TF = Core.Box(TF)
+        RT = Core.Box(RT)
+        return Future{CallMeta}(callinfo, interp, sv) do callinfo, interp, sv
+            TF = TF.contents
+            RT = RT.contents
+            TF2 = tmeet(callinfo.rt, widenconst(TF))
+            if TF2 === Bottom
+                RT = Bottom
+            elseif isconcretetype(RT) && has_nontrivial_extended_info(𝕃ᵢ, TF2) # isconcrete condition required to form a PartialStruct
+                RT = PartialStruct(RT, Any[TF, TF2])
+            end
+            info = ModifyOpInfo(callinfo.info)
+            return CallMeta(RT, Any, Effects(), info)
         end
-        info = ModifyOpInfo(callinfo.info)
     end
-    return CallMeta(RT, Any, Effects(), info)
+    return Future(CallMeta(RT, Any, Effects(), info))
 end
 
 # we could use tuple_tfunc instead of widenconst, but `o` is mutable, so that is unlikely to be beneficial
@@ -2895,17 +2902,17 @@ end
 function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, si::StmtInfo, sv::AbsIntState)
     UNKNOWN = CallMeta(Type, Any, Effects(EFFECTS_THROWS; nortcall=false), NoCallInfo())
     if !(2 <= length(argtypes) <= 3)
-        return UNKNOWN
+        return Future(UNKNOWN)
     end
 
     tt = widenslotwrapper(argtypes[end])
     if !isa(tt, Const) && !(isType(tt) && !has_free_typevars(tt))
-        return UNKNOWN
+        return Future(UNKNOWN)
     end
 
     af_argtype = isa(tt, Const) ? tt.val : (tt::DataType).parameters[1]
     if !isa(af_argtype, DataType) || !(af_argtype <: Tuple)
-        return UNKNOWN
+        return Future(UNKNOWN)
     end
 
     if length(argtypes) == 3
@@ -2918,7 +2925,7 @@ function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, s
     end
     if !(isa(aft, Const) || (isType(aft) && !has_free_typevars(aft)) ||
             (isconcretetype(aft) && !(aft <: Builtin) && !iskindtype(aft)))
-        return UNKNOWN
+        return Future(UNKNOWN)
     end
 
     # effects are not an issue if we know this statement will get removed, but if it does not get removed,
@@ -2926,7 +2933,7 @@ function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, s
     RT_CALL_EFFECTS = Effects(EFFECTS_TOTAL; nortcall=false)
 
     if contains_is(argtypes_vec, Union{})
-        return CallMeta(Const(Union{}), Union{}, RT_CALL_EFFECTS, NoCallInfo())
+        return Future(CallMeta(Const(Union{}), Union{}, RT_CALL_EFFECTS, NoCallInfo()))
     end
 
     # Run the abstract_call without restricting abstract call
@@ -2935,42 +2942,45 @@ function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, s
     if isa(sv, InferenceState)
         old_restrict = sv.restrict_abstract_call_sites
         sv.restrict_abstract_call_sites = false
-        call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, #=max_methods=#-1)
-        sv.restrict_abstract_call_sites = old_restrict
-    else
-        call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, #=max_methods=#-1)
-    end
-    info = verbose_stmt_info(interp) ? MethodResultPure(ReturnTypeCallInfo(call.info)) : MethodResultPure()
-    rt = widenslotwrapper(call.rt)
-    if isa(rt, Const)
-        # output was computed to be constant
-        return CallMeta(Const(typeof(rt.val)), Union{}, RT_CALL_EFFECTS, info)
-    end
-    rt = widenconst(rt)
-    if rt === Bottom || (isconcretetype(rt) && !iskindtype(rt))
-        # output cannot be improved so it is known for certain
-        return CallMeta(Const(rt), Union{}, RT_CALL_EFFECTS, info)
-    elseif isa(sv, InferenceState) && !isempty(sv.pclimitations)
-        # conservatively express uncertainty of this result
-        # in two ways: both as being a subtype of this, and
-        # because of LimitedAccuracy causes
-        return CallMeta(Type{<:rt}, Union{}, RT_CALL_EFFECTS, info)
-    elseif isa(tt, Const) || isconstType(tt)
-        # input arguments were known for certain
-        # XXX: this doesn't imply we know anything about rt
-        return CallMeta(Const(rt), Union{}, RT_CALL_EFFECTS, info)
-    elseif isType(rt)
-        return CallMeta(Type{rt}, Union{}, RT_CALL_EFFECTS, info)
-    else
-        return CallMeta(Type{<:rt}, Union{}, RT_CALL_EFFECTS, info)
+    end
+    call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, #=max_methods=#-1)
+    tt = Core.Box(tt)
+    return Future{CallMeta}(call, interp, sv) do call, interp, sv
+        if isa(sv, InferenceState)
+            sv.restrict_abstract_call_sites = old_restrict
+        end
+        info = verbose_stmt_info(interp) ? MethodResultPure(ReturnTypeCallInfo(call.info)) : MethodResultPure()
+        rt = widenslotwrapper(call.rt)
+        if isa(rt, Const)
+            # output was computed to be constant
+            return CallMeta(Const(typeof(rt.val)), Union{}, RT_CALL_EFFECTS, info)
+        end
+        rt = widenconst(rt)
+        if rt === Bottom || (isconcretetype(rt) && !iskindtype(rt))
+            # output cannot be improved so it is known for certain
+            return CallMeta(Const(rt), Union{}, RT_CALL_EFFECTS, info)
+        elseif isa(sv, InferenceState) && !isempty(sv.pclimitations)
+            # conservatively express uncertainty of this result
+            # in two ways: both as being a subtype of this, and
+            # because of LimitedAccuracy causes
+            return CallMeta(Type{<:rt}, Union{}, RT_CALL_EFFECTS, info)
+        elseif isa(tt.contents, Const) || isconstType(tt.contents)
+            # input arguments were known for certain
+            # XXX: this doesn't imply we know anything about rt
+            return CallMeta(Const(rt), Union{}, RT_CALL_EFFECTS, info)
+        elseif isType(rt)
+            return CallMeta(Type{rt}, Union{}, RT_CALL_EFFECTS, info)
+        else
+            return CallMeta(Type{<:rt}, Union{}, RT_CALL_EFFECTS, info)
+        end
     end
 end
 
 # a simplified model of abstract_call_gf_by_type for applicable
 function abstract_applicable(interp::AbstractInterpreter, argtypes::Vector{Any},
                              sv::AbsIntState, max_methods::Int)
-    length(argtypes) < 2 && return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
-    isvarargtype(argtypes[2]) && return CallMeta(Bool, Any, EFFECTS_THROWS, NoCallInfo())
+    length(argtypes) < 2 && return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
+    isvarargtype(argtypes[2]) && return Future(CallMeta(Bool, Any, EFFECTS_THROWS, NoCallInfo()))
     argtypes = argtypes[2:end]
     atype = argtypes_to_type(argtypes)
     matches = find_method_matches(interp, argtypes, atype; max_methods)
@@ -2997,7 +3007,7 @@ function abstract_applicable(interp::AbstractInterpreter, argtypes::Vector{Any},
         # added that did not intersect with any existing method
         add_uncovered_edges!(sv, matches, atype)
     end
-    return CallMeta(rt, Union{}, EFFECTS_TOTAL, NoCallInfo())
+    return Future(CallMeta(rt, Union{}, EFFECTS_TOTAL, NoCallInfo()))
 end
 add_tfunc(applicable, 1, INT_INF, @nospecs((𝕃::AbstractLattice, f, args...)->Bool), 40)
 
diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl
index 315a068e611fe..77a2e02129ce4 100644
--- a/base/compiler/typeinfer.jl
+++ b/base/compiler/typeinfer.jl
@@ -56,7 +56,7 @@ end
 Timing(mi_info, start_time, cur_start_time, time, children) = Timing(mi_info, start_time, cur_start_time, time, children, nothing)
 Timing(mi_info, start_time) = Timing(mi_info, start_time, start_time, UInt64(0), Timing[])
 
-_time_ns() = ccall(:jl_hrtime, UInt64, ())  # Re-implemented here because Base not yet available.
+_time_ns() = ccall(:jl_hrtime, UInt64, ())
 
 # We keep a stack of the Timings for each of the MethodInstances currently being timed.
 # Since type inference currently operates via a depth-first search (during abstract
@@ -77,114 +77,14 @@ const ROOTmi = Core.Compiler.specialize_method(
 Empty out the previously recorded type inference timings (`Core.Compiler._timings`), and
 start the ROOT() timer again. `ROOT()` measures all time spent _outside_ inference.
 """
-function reset_timings()
-    empty!(_timings)
-    push!(_timings, Timing(
-        # The MethodInstance for ROOT(), and default empty values for other fields.
-        InferenceFrameInfo(ROOTmi, 0x0, Core.Compiler.VarState[], Any[Core.Const(ROOT)], 1),
-        _time_ns()))
-    return nothing
-end
-reset_timings()
-
-# (This is split into a function so that it can be called both in this module, at the top
-# of `enter_new_timer()`, and once at the Very End of the operation, by whoever started
-# the operation and called `reset_timings()`.)
-# NOTE: the @inline annotations here are not to make it faster, but to reduce the gap between
-# timer manipulations and the tasks we're timing.
-@inline function close_current_timer()
-    stop_time = _time_ns()
-    parent_timer = _timings[end]
-    accum_time = stop_time - parent_timer.cur_start_time
-
-    # Add in accum_time ("modify" the immutable struct)
-    @inbounds begin
-        _timings[end] = Timing(
-            parent_timer.mi_info,
-            parent_timer.start_time,
-            parent_timer.cur_start_time,
-            parent_timer.time + accum_time,
-            parent_timer.children,
-            parent_timer.bt,
-        )
-    end
-    return nothing
-end
-
-@inline function enter_new_timer(frame)
-    # Very first thing, stop the active timer: get the current time and add in the
-    # time since it was last started to its aggregate exclusive time.
-    close_current_timer()
-
-    mi_info = _typeinf_identifier(frame)
-
-    # Start the new timer right before returning
-    push!(_timings, Timing(mi_info, UInt64(0)))
-    len = length(_timings)
-    new_timer = @inbounds _timings[len]
-    # Set the current time _after_ appending the node, to try to exclude the
-    # overhead from measurement.
-    start = _time_ns()
-
-    @inbounds begin
-        _timings[len] = Timing(
-            new_timer.mi_info,
-            start,
-            start,
-            new_timer.time,
-            new_timer.children,
-        )
-    end
-
-    return nothing
-end
-
-# _expected_frame_ is not needed within this function; it is used in the `@assert`, to
-# assert that indeed we are always returning to a parent after finishing all of its
-# children (that is, asserting that inference proceeds via depth-first-search).
-@inline function exit_current_timer(_expected_frame_)
-    # Finish the new timer
-    stop_time = _time_ns()
-
-    expected_mi_info = _typeinf_identifier(_expected_frame_)
-
-    # Grab the new timer again because it might have been modified in _timings
-    # (since it's an immutable struct)
-    # And remove it from the current timings stack
-    new_timer = pop!(_timings)
-    Core.Compiler.@assert new_timer.mi_info.mi === expected_mi_info.mi
-
-    # Prepare to unwind one level of the stack and record in the parent
-    parent_timer = _timings[end]
-
-    accum_time = stop_time - new_timer.cur_start_time
-    # Add in accum_time ("modify" the immutable struct)
-    new_timer = Timing(
-        new_timer.mi_info,
-        new_timer.start_time,
-        new_timer.cur_start_time,
-        new_timer.time + accum_time,
-        new_timer.children,
-        parent_timer.mi_info.mi === ROOTmi ? backtrace() : nothing,
-    )
-    # Record the final timing with the original parent timer
-    push!(parent_timer.children, new_timer)
-
-    # And finally restart the parent timer:
-    len = length(_timings)
-    @inbounds begin
-        _timings[len] = Timing(
-            parent_timer.mi_info,
-            parent_timer.start_time,
-            _time_ns(),
-            parent_timer.time,
-            parent_timer.children,
-            parent_timer.bt,
-        )
-    end
-
-    return nothing
-end
+function reset_timings() end
+push!(_timings, Timing(
+    # The MethodInstance for ROOT(), and default empty values for other fields.
+    InferenceFrameInfo(ROOTmi, 0x0, Core.Compiler.VarState[], Any[Core.Const(ROOT)], 1),
+    _time_ns()))
+function close_current_timer() end
+function enter_new_timer(frame) end
+function exit_current_timer(_expected_frame_) end
 
 end  # module Timings
 
@@ -194,19 +94,7 @@ end  # module Timings
 If set to `true`, record per-method-instance timings within type inference in the Compiler.
 """
 __set_measure_typeinf(onoff::Bool) = __measure_typeinf__[] = onoff
-const __measure_typeinf__ = fill(false)
-
-# Wrapper around `_typeinf` that optionally records the exclusive time for each invocation.
-function typeinf(interp::AbstractInterpreter, frame::InferenceState)
-    if __measure_typeinf__[]
-        Timings.enter_new_timer(frame)
-        v = _typeinf(interp, frame)
-        Timings.exit_current_timer(frame)
-        return v
-    else
-        return _typeinf(interp, frame)
-    end
-end
+const __measure_typeinf__ = RefValue{Bool}(false)
 
 function finish!(interp::AbstractInterpreter, caller::InferenceState;
                  can_discard_trees::Bool=may_discard_trees(interp))
@@ -258,19 +146,6 @@ function finish!(interp::AbstractInterpreter, caller::InferenceState;
     return nothing
 end
 
-function _typeinf(interp::AbstractInterpreter, frame::InferenceState)
-    typeinf_nocycle(interp, frame) || return false # frame is now part of a higher cycle
-    # with no active ip's, frame is done
-    frames = frame.callstack::Vector{AbsIntState}
-    if length(frames) == frame.cycleid
-        finish_nocycle(interp, frame)
-    else
-        @assert frame.cycleid != 0
-        finish_cycle(interp, frames, frame.cycleid)
-    end
-    return true
-end
-
 function finish_nocycle(::AbstractInterpreter, frame::InferenceState)
     finishinfer!(frame, frame.interp)
     opt = frame.result.src
@@ -762,16 +637,11 @@ function merge_call_chain!(interp::AbstractInterpreter, parent::InferenceState,
         add_cycle_backedge!(parent, child)
         parent.cycleid === ancestorid && break
         child = parent
-        parent = frame_parent(child)
-        while !isa(parent, InferenceState)
-            # XXX we may miss some edges here?
-            parent = frame_parent(parent::IRInterpretationState)
-        end
+        parent = frame_parent(child)::InferenceState
     end
     # ensure that walking the callstack has the same cycleid (DAG)
     for frame = reverse(ancestorid:length(frames))
-        frame = frames[frame]
-        frame isa InferenceState || continue
+        frame = frames[frame]::InferenceState
         frame.cycleid == ancestorid && break
         @assert frame.cycleid > ancestorid
         frame.cycleid = ancestorid
@@ -796,9 +666,9 @@ end
 # returned instead.
 function resolve_call_cycle!(interp::AbstractInterpreter, mi::MethodInstance, parent::AbsIntState)
     # TODO (#48913) implement a proper recursion handling for irinterp:
-    # This works just because currently the `:terminate` condition guarantees that
-    # irinterp doesn't fail into unresolved cycles, but it's not a good solution.
-    # We should revisit this once we have a better story for handling cycles in irinterp.
+    # This works currently just because the irinterp code doesn't get used much with
+    # `@assume_effects`, so it never sees a cycle normally, but that may not be a sustainable solution.
+    parent isa InferenceState || return false
     frames = parent.callstack::Vector{AbsIntState}
     uncached = false
     for frame = reverse(1:length(frames))
@@ -837,15 +707,43 @@ struct EdgeCallResult
 end
 
 # return cached result of regular inference
-function return_cached_result(::AbstractInterpreter, codeinst::CodeInstance, caller::AbsIntState)
+function return_cached_result(interp::AbstractInterpreter, method::Method, codeinst::CodeInstance, caller::AbsIntState, edgecycle::Bool, edgelimited::Bool)
     rt = cached_return_type(codeinst)
     effects = ipo_effects(codeinst)
     update_valid_age!(caller, WorldRange(min_world(codeinst), max_world(codeinst)))
-    return EdgeCallResult(rt, codeinst.exctype, codeinst.def, effects)
+    return Future(EdgeCall_to_MethodCall_Result(interp, caller, method, EdgeCallResult(rt, codeinst.exctype, codeinst.def, effects), edgecycle, edgelimited))
+end
+
+function EdgeCall_to_MethodCall_Result(interp::AbstractInterpreter, sv::AbsIntState, method::Method, result::EdgeCallResult, edgecycle::Bool, edgelimited::Bool)
+    (; rt, exct, edge, effects, volatile_inf_result) = result
+
+    if edge === nothing
+        edgecycle = edgelimited = true
+    end
+
+    # we look for the termination effect override here as well, since the :terminates effect
+    # may have been tainted due to recursion at this point even if it's overridden
+    if is_effect_overridden(sv, :terminates_globally)
+        # this frame is known to terminate
+        effects = Effects(effects, terminates=true)
+    elseif is_effect_overridden(method, :terminates_globally)
+        # this edge is known to terminate
+        effects = Effects(effects; terminates=true)
+    elseif edgecycle
+        # Some sort of recursion was detected.
+        if edge !== nothing && !edgelimited && !is_edge_recursed(edge, sv)
+            # no `MethodInstance` cycles -- don't taint :terminate
+        else
+            # we cannot guarantee that the call will terminate
+            effects = Effects(effects; terminates=false)
+        end
+    end
+
+    return MethodCallResult(rt, exct, edgecycle, edgelimited, edge, effects, volatile_inf_result)
 end
 
 # compute (and cache) an inferred AST and return the current best estimate of the result type
-function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, caller::AbsIntState)
+function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, caller::AbsIntState, edgecycle::Bool, edgelimited::Bool)
     mi = specialize_method(method, atype, sparams)::MethodInstance
     cache_mode = CACHE_MODE_GLOBAL # cache edge targets globally by default
     force_inline = is_stmt_inline(get_curr_ssaflag(caller))
@@ -859,13 +757,13 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize
                 cache_mode = CACHE_MODE_VOLATILE
             else
                 @assert codeinst.def === mi "MethodInstance for cached edge does not match"
-                return return_cached_result(interp, codeinst, caller)
+                return return_cached_result(interp, method, codeinst, caller, edgecycle, edgelimited)
             end
         end
     end
     if ccall(:jl_get_module_infer, Cint, (Any,), method.module) == 0 && !generating_output(#=incremental=#false)
         add_remark!(interp, caller, "[typeinf_edge] Inference is disabled for the target module")
-        return EdgeCallResult(Any, Any, nothing, Effects())
+        return Future(EdgeCall_to_MethodCall_Result(interp, caller, method, EdgeCallResult(Any, Any, nothing, Effects()), edgecycle, edgelimited))
     end
     if !is_cached(caller) && frame_parent(caller) === nothing
         # this caller exists to return to the user
@@ -886,7 +784,7 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize
                         cache_mode = CACHE_MODE_VOLATILE
                     else
                         @assert codeinst.def === mi "MethodInstance for cached edge does not match"
-                        return return_cached_result(interp, codeinst, caller)
+                        return return_cached_result(interp, method, codeinst, caller, edgecycle, edgelimited)
                     end
                 end
             end
@@ -902,31 +800,40 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize
             if cache_mode == CACHE_MODE_GLOBAL
                 engine_reject(interp, ci)
             end
-            return EdgeCallResult(Any, Any, nothing, Effects())
+            return Future(EdgeCall_to_MethodCall_Result(interp, caller, method, EdgeCallResult(Any, Any, nothing, Effects()), edgecycle, edgelimited))
         end
         assign_parentchild!(frame, caller)
-        typeinf(interp, frame)
-        update_valid_age!(caller, frame.valid_worlds)
-        isinferred = is_inferred(frame)
-        edge = isinferred ? mi : nothing
-        effects = isinferred ? frame.result.ipo_effects : # effects are adjusted already within `finish` for ipo_effects
-            adjust_effects(effects_for_cycle(frame.ipo_effects), method)
-        exc_bestguess = refine_exception_type(frame.exc_bestguess, effects)
-        # propagate newly inferred source to the inliner, allowing efficient inlining w/o deserialization:
-        # note that this result is cached globally exclusively, so we can use this local result destructively
-        volatile_inf_result = isinferred ? VolatileInferenceResult(result) : nothing
-        return EdgeCallResult(frame.bestguess, exc_bestguess, edge, effects, volatile_inf_result)
+        # the actual inference task for this edge is going to be scheduled within `typeinf_local` via the callstack queue
+        # while splitting off the rest of the work for this caller into a separate workq thunk
+        let mresult = Future{MethodCallResult}()
+            push!(caller.tasks, function get_infer_result(interp, caller)
+                update_valid_age!(caller, frame.valid_worlds)
+                local isinferred = is_inferred(frame)
+                local edge = isinferred ? mi : nothing
+                local effects = isinferred ? frame.result.ipo_effects : # effects are adjusted already within `finish` for ipo_effects
+                    adjust_effects(effects_for_cycle(frame.ipo_effects), method)
+                local exc_bestguess = refine_exception_type(frame.exc_bestguess, effects)
+                # propagate newly inferred source to the inliner, allowing efficient inlining w/o deserialization:
+                # note that this result is cached globally exclusively, so we can use this local result destructively
+                local volatile_inf_result = isinferred ? VolatileInferenceResult(result) : nothing
+                local edgeresult = EdgeCallResult(frame.bestguess, exc_bestguess, edge, effects, volatile_inf_result)
+                mresult[] = EdgeCall_to_MethodCall_Result(interp, caller, method, edgeresult, edgecycle, edgelimited)
+                return true
+            end)
+            return mresult
+        end
     elseif frame === true
         # unresolvable cycle
         add_remark!(interp, caller, "[typeinf_edge] Unresolvable cycle")
-        return EdgeCallResult(Any, Any, nothing, Effects())
+        return Future(EdgeCall_to_MethodCall_Result(interp, caller, method, EdgeCallResult(Any, Any, nothing, Effects()), edgecycle, edgelimited))
     end
     # return the current knowledge about this cycle
     frame = frame::InferenceState
     update_valid_age!(caller, frame.valid_worlds)
     effects = adjust_effects(effects_for_cycle(frame.ipo_effects), method)
     exc_bestguess = refine_exception_type(frame.exc_bestguess, effects)
-    return EdgeCallResult(frame.bestguess, exc_bestguess, nothing, effects)
+    edgeresult = EdgeCallResult(frame.bestguess, exc_bestguess, nothing, effects)
+    return Future(EdgeCall_to_MethodCall_Result(interp, caller, method, edgeresult, edgecycle, edgelimited))
 end
 
 # The `:terminates` effect bit must be conservatively tainted unless recursion cycle has
diff --git a/base/compiler/types.jl b/base/compiler/types.jl
index b475e360dac02..c51785f23ea29 100644
--- a/base/compiler/types.jl
+++ b/base/compiler/types.jl
@@ -1,4 +1,12 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
+#
+
+const WorkThunk = Any
+# #@eval struct WorkThunk
+#    thunk::Core.OpaqueClosure{Tuple{Vector{Tasks}}, Bool}
+#    WorkThunk(work) = new($(Expr(:opaque_closure, :(Tuple{Vector{Tasks}}), :Bool, :Bool, :((tasks) -> work(tasks))))) # @opaque Vector{Tasks}->Bool (tasks)->work(tasks)
+# end
+# (p::WorkThunk)() = p.thunk()
 
 """
     AbstractInterpreter
diff --git a/base/reflection.jl b/base/reflection.jl
index fe48b6f9aa6b9..df29b9a5b1a4e 100644
--- a/base/reflection.jl
+++ b/base/reflection.jl
@@ -2447,7 +2447,7 @@ true
 ```
 """
 function hasmethod(@nospecialize(f), @nospecialize(t))
-    return Core._hasmethod(f, t isa Type ? t : to_tuple_type(t))
+    return Core._hasmethod(signature_type(f, t))
 end
 
 function Core.kwcall(kwargs::NamedTuple, ::typeof(hasmethod), @nospecialize(f), @nospecialize(t))
diff --git a/test/compiler/AbstractInterpreter.jl b/test/compiler/AbstractInterpreter.jl
index bab4fe02a5168..009128b289ade 100644
--- a/test/compiler/AbstractInterpreter.jl
+++ b/test/compiler/AbstractInterpreter.jl
@@ -415,10 +415,13 @@ function CC.abstract_call(interp::NoinlineInterpreter,
     arginfo::CC.ArgInfo, si::CC.StmtInfo, sv::CC.InferenceState, max_methods::Int)
     ret = @invoke CC.abstract_call(interp::CC.AbstractInterpreter,
         arginfo::CC.ArgInfo, si::CC.StmtInfo, sv::CC.InferenceState, max_methods::Int)
-    if sv.mod in noinline_modules(interp)
-        return CC.CallMeta(ret.rt, ret.exct, ret.effects, NoinlineCallInfo(ret.info))
+    return CC.Future{CC.CallMeta}(ret, interp, sv) do ret, interp, sv
+        if sv.mod in noinline_modules(interp)
+            (;rt, exct, effects, info) = ret
+            return CC.CallMeta(rt, exct, effects, NoinlineCallInfo(info))
+        end
+        return ret
     end
-    return ret
 end
 function CC.src_inlining_policy(interp::NoinlineInterpreter,
     @nospecialize(src), @nospecialize(info::CallInfo), stmt_flag::UInt32)
diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl
index 46009e0790942..7c7726413004a 100644
--- a/test/compiler/inference.jl
+++ b/test/compiler/inference.jl
@@ -3887,113 +3887,6 @@ f_apply_cglobal(args...) = cglobal(args...)
 f37532(T, x) = (Core.bitcast(Ptr{T}, x); x)
 @test Base.return_types(f37532, Tuple{Any, Int}) == Any[Int]
 
-# PR #37749
-# Helper functions for Core.Compiler.Timings. These are normally accessed via a package -
-# usually (SnoopCompileCore).
-function time_inference(f)
-    Core.Compiler.Timings.reset_timings()
-    Core.Compiler.__set_measure_typeinf(true)
-    f()
-    Core.Compiler.__set_measure_typeinf(false)
-    Core.Compiler.Timings.close_current_timer()
-    return Core.Compiler.Timings._timings[1]
-end
-function depth(t::Core.Compiler.Timings.Timing)
-    maximum(depth.(t.children), init=0) + 1
-end
-function flatten_times(t::Core.Compiler.Timings.Timing)
-    collect(Iterators.flatten([(t.time => t.mi_info,), flatten_times.(t.children)...]))
-end
-# Some very limited testing of timing the type inference (#37749).
-@testset "Core.Compiler.Timings" begin
-    # Functions that call each other
-    @eval module M1
-        i(x) = x+5
-        i2(x) = x+2
-        h(a::Array) = i2(a[1]::Integer) + i(a[1]::Integer) + 2
-        g(y::Integer, x) = h(Any[y]) + Int(x)
-    end
-    timing1 = time_inference() do
-        @eval M1.g(2, 3.0)
-    end
-    @test occursin(r"Core.Compiler.Timings.Timing\(InferenceFrameInfo for Core.Compiler.Timings.ROOT\(\)\) with \d+ children", sprint(show, timing1))
-    # The last two functions to be inferred should be `i` and `i2`, inferred at runtime with
-    # their concrete types.
-    @test sort([mi_info.mi.def.name for (time,mi_info) in flatten_times(timing1)[end-1:end]]) == [:i, :i2]
-    @test all(child->isa(child.bt, Vector), timing1.children)
-    @test all(child->child.bt===nothing, timing1.children[1].children)
-    # Test the stacktrace
-    @test isa(stacktrace(timing1.children[1].bt), Vector{Base.StackTraces.StackFrame})
-    # Test that inference has cached some of the Method Instances
-    timing2 = time_inference() do
-        @eval M1.g(2, 3.0)
-    end
-    @test length(flatten_times(timing2)) < length(flatten_times(timing1))
-    # Printing of InferenceFrameInfo for mi.def isa Module
-    @eval module M2
-        i(x) = x+5
-        i2(x) = x+2
-        h(a::Array) = i2(a[1]::Integer) + i(a[1]::Integer) + 2
-        g(y::Integer, x) = h(Any[y]) + Int(x)
-    end
-    # BEGIN LINE NUMBER SENSITIVITY (adjust the line offset below as needed)
-    timingmod = time_inference() do
-        @eval @testset "Outer" begin
-            @testset "Inner" begin
-                for i = 1:2 M2.g(2, 3.0) end
-            end
-        end
-    end
-    @test occursin("thunk from $(@__MODULE__) starting at $(@__FILE__):$((@__LINE__) - 6)", string(timingmod.children))
-    # END LINE NUMBER SENSITIVITY
-
-    # Recursive function
-    @eval module _Recursive f(n::Integer) = n == 0 ? 0 : f(n-1) + 1 end
-    timing = time_inference() do
-        @eval _Recursive.f(Base.inferencebarrier(5))
-    end
-    @test 2 <= depth(timing) <= 3  # root -> f (-> +)
-    @test 2 <= length(flatten_times(timing)) <= 3  # root, f, +
-
-    # Functions inferred with multiple constants
-    @eval module C
-        i(x) = x === 0 ? 0 : 1 / x
-        a(x) = i(0) * i(x)
-        b() = i(0) * i(1) * i(0)
-        function loopc(n)
-            s = 0
-            for i = 1:n
-                s += i
-            end
-            return s
-        end
-        call_loopc() = loopc(5)
-        myfloor(::Type{T}, x) where T = floor(T, x)
-        d(x) = myfloor(Int16, x)
-    end
-    timing = time_inference() do
-        @eval C.a(2)
-        @eval C.b()
-        @eval C.call_loopc()
-        @eval C.d(3.2)
-    end
-    ft = flatten_times(timing)
-    @test !isempty(ft)
-    str = sprint(show, ft)
-    @test occursin("InferenceFrameInfo for /(1::$Int, ::$Int)", str)  # inference constants
-    @test occursin("InferenceFrameInfo for Core.Compiler.Timings.ROOT()", str) # qualified
-    # loopc has internal slots, check constant printing in this case
-    sel = filter(ti -> ti.second.mi.def.name === :loopc, ft)
-    ifi = sel[end].second
-    @test length(ifi.slottypes) > ifi.nargs
-    str = sprint(show, sel)
-    @test occursin("InferenceFrameInfo for $(@__MODULE__).C.loopc(5::$Int)", str)
-    # check that types aren't double-printed as `T::Type{T}`
-    sel = filter(ti -> ti.second.mi.def.name === :myfloor, ft)
-    str = sprint(show, sel)
-    @test occursin("InferenceFrameInfo for $(@__MODULE__).C.myfloor(::Type{Int16}, ::Float64)", str)
-end
-
 # issue #37638
 @test only(Base.return_types(() -> (nothing, Any[]...)[2])) isa Type