Skip to content

Commit d386165

Browse files
committed
add basic benchmarks for Julia-level compilation pipeline
This commit setups a basic infrastructure for benchmarking Julia-level compilation pipeline. `InferenceBenchmarks` is based on `InferenceBenchmarker <: AbstractInterpreter`, which maintains its own global inference cache, and so it allows us to run the compilation pipeline multiple times while avoiding caches generated by previous compilation to be reused. I set up a top-level benchmark group named `"inference": InferenceBenchmarks`, which is composed of the following subgroups: - `"inference"`: just benchmarks overall Julia-level compilation pipeline - `"abstract interpretation"`: benchmarks only abstract interpretation, i.e. without optimization - `"optimization"`: benchmarks only optimization Here is an example of benchmark result obtained by comparing these two commits of `JuliaLang/julia` [`5c357e9`](JuliaLang/julia@5c357e9) and [`d515f05`](JuliaLang/julia@d515f05): ```julia \# built on 5c357e9 using BenchmarkTools, BaseBenchmarks BaseBenchmarks.load!("inference") results = run(BaseBenchmarks.SUITE; verbose = true) BenchmarkTools.save("5c357e9.json", results) \# built on d515f05 using BenchmarkTools, BaseBenchmarks BaseBenchmarks.load!("inference") results = run(BaseBenchmarks.SUITE; verbose = true) BenchmarkTools.save("d515f05.json", results) \# compare using BenchmarkTools, BaseBenchmarks base = BenchmarkTools.load("5c357e9.json")[1] target = BenchmarkTools.load("d515f05.json")[1] ``` ``` julia> leaves(regressions(judge(minimum(target), minimum(base)))) Any[] julia> leaves(improvements(judge(minimum(target), minimum(base)))) 6-element Vector{Any}: (Any["inference", "inference", "rand(Float64)"], TrialJudgement(-2.85% => invariant)) (Any["inference", "inference", "sin(42)"], TrialJudgement(-2.44% => invariant)) (Any["inference", "inference", "abstract_call_gf_by_type"], TrialJudgement(-1.97% => invariant)) (Any["inference", "inference", "println(::QuoteNode)"], TrialJudgement(-0.96% => invariant)) (Any["inference", "optimization", "sin(42)"], TrialJudgement(+1.26% => invariant)) (Any["inference", "optimization", "println(::QuoteNode)"], TrialJudgement(-6.97% => improvement)) ``` This result is very satisfying because the refactor added in `d515f05` certainly improved Julia-level compilation performance by avoiding domtree construction in the SROA pass in many cases.
1 parent 5892382 commit d386165

File tree

3 files changed

+191
-1
lines changed

3 files changed

+191
-1
lines changed

Project.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ uuid = "d7f09723-0a21-57e2-b9ef-316b714b6879"
55
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
66
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
77
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
8+
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
89
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
910
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
1011
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"

src/BaseBenchmarks.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,9 @@ const MODULES = Dict("array" => :ArrayBenchmarks,
2929
"sparse" => :SparseBenchmarks,
3030
"string" => :StringBenchmarks,
3131
"tuple" => :TupleBenchmarks,
32-
"frontend" => :FrontendBenchmarks)
32+
"frontend" => :FrontendBenchmarks,
33+
)
34+
@static VERSION v"1.8-DEV" && push!(MODULES, "inference" => :InferenceBenchmarks)
3335

3436
load!(id::AbstractString; kwargs...) = load!(SUITE, id; kwargs...)
3537

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
"""
2+
"inference" => InferenceBenchmarks
3+
4+
Defines a benchmark suite for Julia-level compilation pipeline.
5+
Note that this benchmark suite is only available for Julia 1.8 and higher.
6+
7+
This benchmark group `"inference"` is composed of the following subgroups:
8+
- `"inference"`: benchmarks the overall Julia-level compilation pipeline per each static call
9+
- `"abstract interpretation"`: benchmarks abstract interpretation per each static call (without optimization)
10+
- `"optimization"`: benchmarks optimization passes applied per a single call frame
11+
"""
12+
module InferenceBenchmarks
13+
14+
using BenchmarkTools, InteractiveUtils
15+
16+
const CC = Core.Compiler
17+
18+
import .CC:
19+
may_optimize, may_compress, may_discard_trees, InferenceParams, OptimizationParams,
20+
get_world_counter, get_inference_cache, code_cache, # get, getindex, haskey, setindex!
21+
nothing
22+
import Core:
23+
MethodInstance, CodeInstance, MethodMatch, SimpleVector, Typeof
24+
import .CC:
25+
AbstractInterpreter, NativeInterpreter, WorldRange, WorldView, InferenceResult,
26+
InferenceState, OptimizationState,
27+
_methods_by_ftype, specialize_method, unwrap_unionall, rewrap_unionall, widenconst,
28+
typeinf, optimize
29+
30+
struct InferenceBenchmarkerCache
31+
dict::IdDict{MethodInstance,CodeInstance}
32+
end
33+
struct InferenceBenchmarker <: AbstractInterpreter
34+
native::NativeInterpreter
35+
optimize::Bool
36+
compress::Bool
37+
discard_trees::Bool
38+
cache::InferenceBenchmarkerCache
39+
function InferenceBenchmarker(
40+
world::UInt = get_world_counter();
41+
inf_params::InferenceParams = InferenceParams(),
42+
opt_params::OptimizationParams = OptimizationParams(),
43+
optimize::Bool = true,
44+
compress::Bool = true,
45+
discard_trees::Bool = true,
46+
cache::InferenceBenchmarkerCache = InferenceBenchmarkerCache(IdDict{MethodInstance,CodeInstance}()),
47+
)
48+
native = NativeInterpreter(world; inf_params, opt_params)
49+
new(native, optimize, compress, discard_trees, cache)
50+
end
51+
end
52+
53+
CC.may_optimize(interp::InferenceBenchmarker) = interp.optimize
54+
CC.may_compress(interp::InferenceBenchmarker) = interp.compress
55+
CC.may_discard_trees(interp::InferenceBenchmarker) = interp.discard_trees
56+
CC.InferenceParams(interp::InferenceBenchmarker) = InferenceParams(interp.native)
57+
CC.OptimizationParams(interp::InferenceBenchmarker) = OptimizationParams(interp.native)
58+
CC.get_world_counter(interp::InferenceBenchmarker) = get_world_counter(interp.native)
59+
CC.get_inference_cache(interp::InferenceBenchmarker) = get_inference_cache(interp.native)
60+
CC.code_cache(interp::InferenceBenchmarker) = WorldView(interp.cache, WorldRange(get_world_counter(interp)))
61+
CC.get(wvc::WorldView{<:InferenceBenchmarkerCache}, mi::MethodInstance, default) = get(wvc.cache.dict, mi, default)
62+
CC.getindex(wvc::WorldView{<:InferenceBenchmarkerCache}, mi::MethodInstance) = getindex(wvc.cache.dict, mi)
63+
CC.haskey(wvc::WorldView{<:InferenceBenchmarkerCache}, mi::MethodInstance) = haskey(wvc.cache.dict, mi)
64+
CC.setindex!(wvc::WorldView{<:InferenceBenchmarkerCache}, ci::CodeInstance, mi::MethodInstance) = setindex!(wvc.cache.dict, ci, mi)
65+
66+
function inf_gf_by_type!(interp::InferenceBenchmarker, @nospecialize(tt::Type{<:Tuple}); kwargs...)
67+
mm = get_single_method_match(tt, InferenceParams(interp).MAX_METHODS, get_world_counter(interp))
68+
return inf_method_signature!(interp, mm.method, mm.spec_types, mm.sparams; kwargs...)
69+
end
70+
71+
function get_single_method_match(@nospecialize(tt), lim, world)
72+
mms = _methods_by_ftype(tt, lim, world)
73+
isa(mms, Bool) && error("unable to find matching method for $(tt)")
74+
filter!(mm::MethodMatch->mm.spec_types===tt, mms)
75+
length(mms) == 1 || error("unable to find single target method for $(tt)")
76+
return first(mms)::MethodMatch
77+
end
78+
79+
inf_method!(interp::InferenceBenchmarker, m::Method; kwargs...) =
80+
inf_method_signature!(interp, m, m.sig, method_sparams(m); kwargs...)
81+
function method_sparams(m::Method)
82+
s = TypeVar[]
83+
sig = m.sig
84+
while isa(sig, UnionAll)
85+
push!(s, sig.var)
86+
sig = sig.body
87+
end
88+
return svec(s...)
89+
end
90+
inf_method_signature!(interp::InferenceBenchmarker, m::Method, @nospecialize(atype), sparams::SimpleVector; kwargs...) =
91+
inf_method_instance!(interp, specialize_method(m, atype, sparams)::MethodInstance; kwargs...)
92+
93+
function inf_method_instance!(interp::InferenceBenchmarker, mi::MethodInstance;
94+
run_optimizer::Bool = true)
95+
result = InferenceResult(mi)
96+
frame = InferenceState(result, #=cache=# run_optimizer ? :global : :no, interp)::InferenceState
97+
typeinf(interp, frame)
98+
return frame
99+
end
100+
101+
macro inf_call(ex0...)
102+
return InteractiveUtils.gen_call_with_extracted_types_and_kwargs(__module__, :inf_call, ex0)
103+
end
104+
function inf_call(@nospecialize(f), @nospecialize(types = Tuple{});
105+
interp = InferenceBenchmarker(),
106+
run_optimizer = true)
107+
ft = Typeof(f)
108+
if isa(types, Type)
109+
u = unwrap_unionall(types)
110+
tt = rewrap_unionall(Tuple{ft, u.parameters...}, types)
111+
else
112+
tt = Tuple{ft, types...}
113+
end
114+
return inf_gf_by_type!(interp, tt; run_optimizer)
115+
end
116+
117+
macro abs_call(ex0...)
118+
return InteractiveUtils.gen_call_with_extracted_types_and_kwargs(__module__, :abs_call, ex0)
119+
end
120+
function abs_call(@nospecialize(f), @nospecialize(types = Tuple{});
121+
interp = InferenceBenchmarker(; optimize = false))
122+
return inf_call(f, types; interp)
123+
end
124+
125+
macro opt_call(ex0...)
126+
return InteractiveUtils.gen_call_with_extracted_types_and_kwargs(__module__, :opt_call, ex0)
127+
end
128+
function opt_call(@nospecialize(f), @nospecialize(types = Tuple{});
129+
interp = InferenceBenchmarker())
130+
frame = inf_call(f, types; interp, run_optimizer = false)
131+
return function ()
132+
params = OptimizationParams(interp)
133+
opt = OptimizationState(frame, params, interp)
134+
result = widenconst(frame.result.result)
135+
optimize(interp, opt, params, result)
136+
end
137+
end
138+
139+
function tune_benchmarks!(
140+
g::BenchmarkGroup;
141+
seconds=30,
142+
gcsample=true,
143+
)
144+
for v in values(g)
145+
v.params.seconds = seconds
146+
v.params.gcsample = gcsample
147+
v.params.evals = 1 # `setup` must be functional
148+
end
149+
end
150+
151+
const SUITE = BenchmarkGroup()
152+
153+
# TODO add TTFP?
154+
155+
let g = addgroup!(SUITE, "abstract interpretation")
156+
g["sin(42)"] = @benchmarkable (@abs_call sin(42))
157+
g["rand(Float64)"] = @benchmarkable (@abs_call rand(Float64))
158+
g["println(::QuoteNode)"] = @benchmarkable (abs_call(println, (QuoteNode,)))
159+
g["abstract_call_gf_by_type"] = @benchmarkable abs_call(
160+
CC.abstract_call_gf_by_type, (NativeInterpreter,Any,CC.ArgInfo,Any,InferenceState,Int))
161+
g["construct_ssa!"] = @benchmarkable abs_call(CC.construct_ssa!, (Core.CodeInfo,CC.IRCode,CC.DomTree,Vector{CC.SlotInfo},Vector{Any}))
162+
g["domsort_ssa!"] = @benchmarkable abs_call(CC.domsort_ssa!, (CC.IRCode,CC.DomTree))
163+
tune_benchmarks!(g)
164+
end
165+
166+
let g = addgroup!(SUITE, "optimization")
167+
g["sin(42)"] = @benchmarkable f() (setup = (f = @opt_call sin(42)))
168+
g["rand(Float64)"] = @benchmarkable f() (setup = (f = @opt_call rand(Float64)))
169+
g["println(::QuoteNode)"] = @benchmarkable f() (setup = (f = opt_call(println, (QuoteNode,))))
170+
g["abstract_call_gf_by_type"] = @benchmarkable f() (setup = (f = opt_call(CC.abstract_call_gf_by_type, (NativeInterpreter,Any,CC.ArgInfo,Any,InferenceState,Int))))
171+
g["construct_ssa!"] = @benchmarkable f() (setup = (f = opt_call(CC.construct_ssa!, (Core.CodeInfo,CC.IRCode,CC.DomTree,Vector{CC.SlotInfo},Vector{Any}))))
172+
g["domsort_ssa!"] = @benchmarkable f() (setup = (f = opt_call(CC.domsort_ssa!, (CC.IRCode,CC.DomTree))))
173+
tune_benchmarks!(g)
174+
end
175+
176+
let g = addgroup!(SUITE, "inference")
177+
g["sin(42)"] = @benchmarkable (@inf_call sin(42))
178+
g["rand(Float64)"] = @benchmarkable (@inf_call rand(Float64))
179+
g["println(::QuoteNode)"] = @benchmarkable (inf_call(println, (QuoteNode,)))
180+
g["abstract_call_gf_by_type"] = @benchmarkable inf_call(
181+
CC.abstract_call_gf_by_type, (NativeInterpreter,Any,CC.ArgInfo,Any,InferenceState,Int))
182+
g["construct_ssa!"] = @benchmarkable inf_call(CC.construct_ssa!, (Core.CodeInfo,CC.IRCode,CC.DomTree,Vector{CC.SlotInfo},Vector{Any}))
183+
g["domsort_ssa!"] = @benchmarkable inf_call(CC.domsort_ssa!, (CC.IRCode,CC.DomTree))
184+
tune_benchmarks!(g)
185+
end
186+
187+
end # module InferenceBenchmarks

0 commit comments

Comments
 (0)