From d20ed1698b15e796e9106eed0acbd91b6355d68c Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Sat, 20 Jun 2020 14:28:09 +0900 Subject: [PATCH 01/25] Add pstats macro --- Project.toml | 2 +- src/LinuxPerf.jl | 266 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 266 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index 5342695..c4843f5 100644 --- a/Project.toml +++ b/Project.toml @@ -8,6 +8,6 @@ PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" [compat] -julia = "1" Formatting = "0.4" PrettyTables = "0.9" +julia = "1" diff --git a/src/LinuxPerf.jl b/src/LinuxPerf.jl index 53598d7..91e9d90 100644 --- a/src/LinuxPerf.jl +++ b/src/LinuxPerf.jl @@ -77,11 +77,17 @@ const EVENT_TYPES = (:scaled_cycles, 9) # PERF_COUNT_HW_REF_CPU_CYCLES ]), (:sw, 1, # PERF_TYPE_SOFTWARE - [(:page_faults, 2), # PERF_COUNT_SW_PAGE_FAULTS + [(:cpu_clock, 0), # PERF_COUNT_SW_CPU_CLOCK + (:task_clock, 1), # PEF_COUNT_SW_TASK_CLOCK + (:page_faults, 2), # PERF_COUNT_SW_PAGE_FAULTS (:ctx_switches, 3), # PERF_COUNT_SW_CONTEXT_SWITCHES (:cpu_migrations, 4), # PERF_COUNT_SW_CPU_MIGRATIONS (:minor_page_faults, 5), # PERF_COUNT_SW_PAGE_FAULTS_MIN (:major_page_faults, 6), # PERF_COUNT_SW_PAGE_FAULTS_MAJ + (:alignment_faults, 7), # PERF_COUNT_SW_ALIGNMENT_FAULTS + (:emulation_faults, 8), # PERF_COUNT_SW_EMULATION_FAULTS + (:dummy, 9), # PERF_COUNT_SW_DUMMY + (:bpf_output, 10), # PERF_COUNT_SW_BPF_OUTPUT ]) ] @@ -372,4 +378,262 @@ end make_bench() = make_bench(reasonable_defaults) + +# Event names are taken from the perf command. +const NAME_TO_EVENT = Dict( + # hardware events + "branch-instructions" => EventType(:hw, :branches), + "branch-misses" => EventType(:hw, :branch_mispredicts), + "cache-misses" => EventType(:hw, :cache_misses), + "cache-references" => EventType(:hw, :cache_access), + "cpu-cycles" => EventType(:hw, :cycles), + "instructions" => EventType(:hw, :instructions), + "stalled-cycles-backend" => EventType(:hw, :stalled_cycles_backend), + "stalled-cycles-frontend" => EventType(:hw, :stalled_cycles_frontend), + + # software events + "alignment-faults" => EventType(:sw, :alignment_faults), + "bpf-output" => EventType(:sw, :bpf_output), + "context-switches" => EventType(:sw, :ctx_switches), + "cpu-clock" => EventType(:sw, :cpu_clock), + "cpu-migrations" => EventType(:sw, :cpu_migrations), + "dummy" => EventType(:sw, :dummy), + "emulation-faults" => EventType(:sw, :emulation_faults), + "major-faults" => EventType(:sw, :major_page_faults), + "minor-faults" => EventType(:sw, :minor_page_faults), + "page-faults" => EventType(:sw, :page_faults), + "task-clock" => EventType(:sw, :task_clock), + + # hardware cache events + "L1-dcache-load-misses" => EventType(:cache, :L1_data, :read, :miss), + "L1-dcache-loads" => EventType(:cache, :L1_data, :read, :access), + "L1-icache-load-misses" => EventType(:cache, :L1_insn, :read, :miss), + "L1-icache-loads" => EventType(:cache, :L1_insn, :read, :access), + "dTLB-load-misses" => EventType(:cache, :TLB_data, :read, :miss), + "dTLB-loads" => EventType(:cache, :TLB_data, :read, :access), + "iTLB-load-misses" => EventType(:cache, :TLB_insn, :read, :miss), + "iTLB-loads" => EventType(:cache, :TLB_insn, :read, :access), +) + +const EVENT_TO_NAME = Dict(event => name for (name, event) in NAME_TO_EVENT) + +function parse_pstats_options(opts) + # default events + events = parse_groups(" + (cpu-cycles, stalled-cycles-frontend, stalled-cycles-backend), + (instructions, branch-instructions, branch-misses), + (task-clock, context-switches, cpu-migrations, page-faults) + ") + for opt in opts + if opt isa AbstractString + events = parse_groups(opt) + elseif opt isa Expr && opt.head == :(=) + key, val = opt.args + error("unknown key: $(key)") + else + error("unknown option: $(opt)") + end + end + return (events = events,) +end + +# syntax: groups = (group ',')* group +function parse_groups(str) + groups = Vector{EventType}[] + i = firstindex(str) + next = iterate(str, i) + while next !== nothing + i = skipws(str, i) + group, i = parse_group(str, i) + push!(groups, group) + i = skipws(str, i) + next = iterate(str, i) + if next === nothing + continue + end + c, i = next + if c == ',' + # ok + else + error("unknown character: $(repr(c))") + end + end + return groups +end + +# syntax: group = event | '(' (event ',')* event ')' +function parse_group(str, i) + group = EventType[] + next = iterate(str, i) + if next === nothing + error("no events") + elseif next[1] == '(' + # group + i = next[2] + while true + i = skipws(str, i) + event, i = parse_event(str, i) + push!(group, event) + i = skipws(str, i) + next = iterate(str, i) + if next === nothing + error("unpaired '('") + end + c, i = next + if c == ',' + # ok + elseif c == ')' + break + else + error("unknown character: $(repr(c))") + end + end + else + # singleton group + i = skipws(str, i) + event, i = parse_event(str, i) + push!(group, event) + end + return group, i +end + +# syntax: event = [A-Za-z0-9-]+ +function parse_event(str, i) + isok(c) = 'A' ≤ c ≤ 'Z' || 'a' ≤ c ≤ 'z' || '0' ≤ c ≤ '9' || c == '-' + start = i + next = iterate(str, start) + while next !== nothing && isok(next[1]) + i = next[2] + next = iterate(str, i) + end + stop = prevind(str, i) + if start > stop + error("empty event name") + end + name = str[start:stop] + if !haskey(NAME_TO_EVENT, name) + error("unknown event name: $(name)") + end + return NAME_TO_EVENT[name], i end + +# skip whitespace if any +function skipws(str, i) + @label head + next = iterate(str, i) + if next !== nothing && isspace(next[1]) + i = next[2] + @goto head + end + return i +end + +struct Stats + groups::Vector{Vector{Counter}} +end + +function Stats(b::PerfBench) + groups = Vector{Counter}[] + for g in b.groups + values = Vector{UInt64}(undef, length(g)+1+2) + read!(g.leader_io, values) + #?Ref@assert(length(g) == values[1]) + enabled, running = values[2], values[3] + push!(groups, [Counter(g.event_types[i], values[3+i], enabled, running) for i in 1:length(g)]) + end + return Stats(groups) +end + +function Base.haskey(stats::Stats, name::AbstractString) + event = NAME_TO_EVENT[name] + return any(counter.event == event for group in stats.groups for counter in group) +end + +function Base.getindex(stats::Stats, name::AbstractString) + event = NAME_TO_EVENT[name] + for group in stats.groups, counter in group + if counter.event == event + return counter + end + end + throw(KeyError(name)) +end + +function Base.show(io::IO, stats::Stats) + w = 2 + 23 + 18 + println(io, '━'^w) + for group in stats.groups + for i in 1:length(group) + # grouping character + if length(group) == 1 + c = '╶' + elseif i == 1 + c = '┌' + elseif i == length(group) + c = '└' + else + c = '│' + end + counter = group[i] + event = counter.event + name = EVENT_TO_NAME[event] + @printf io "%-2s%-23s" c name + if !isenabled(counter) + @printf(io, "%18s", "not enabled") + elseif !isrun(counter) + @printf(io, "%10s%7.1f%%", "NA", 0.0) + else + @printf(io, "%10.2e%7.1f%%", scaledcount(counter), fillrate(counter) * 100) + end + if isrun(counter) + # show a comment + if name == "cpu-cycles" + @printf(io, " # %4.1f cycles per ns", counter.value / counter.running) + elseif (name == "stalled-cycles-frontend" || name == "stalled-cycles-backend") && haskey(stats, "cpu-cycles") + @printf(io, " # %4.1f%% of cycles", scaledcount(counter) / scaledcount(stats["cpu-cycles"]) * 100) + elseif name == "instructions" && haskey(stats, "cpu-cycles") + @printf(io, " # %4.1f insns per cycle", scaledcount(counter) / scaledcount(stats["cpu-cycles"])) + elseif name == "branch-instructions" && haskey(stats, "instructions") + @printf(io, " # %4.1f%% of instructions", scaledcount(counter) / scaledcount(stats["instructions"]) * 100) + elseif name == "branch-misses" && haskey(stats, "branch-instructions") + @printf(io, " # %4.1f%% of branch instructions", scaledcount(counter)/ scaledcount(stats["branch-instructions"]) * 100) + elseif name == "cache-misses" && haskey(stats, "cache-references") + @printf(io, " # %4.1f%% of cache references", scaledcount(counter) / scaledcount(stats["cache-references"]) * 100) + elseif name == "L1-dcache-load-misses" && haskey(stats, "L1-dcache-loads") + @printf(io, " # %4.1f%% of loads", scaledcount(counter) / scaledcount(stats["L1-dcache-loads"]) * 100) + end + end + println(io) + end + end + print(io, '━'^w) +end + +isenabled(counter::Counter) = counter.enabled > 0 +isrun(counter::Counter) = counter.running > 0 +fillrate(counter::Counter) = counter.running / counter.enabled +scaledcount(counter::Counter) = counter.value * (counter.enabled / counter.running) + +""" + @pstats [options] expr + +Run `expr` and gather its performance statistics. +""" +macro pstats(args...) + if isempty(args) + error("@pstats requires at least one argument") + end + opts, expr = parse_pstats_options(args[1:end-1]), args[end] + quote + (function () + bench = make_bench($(opts.events)) + enable!(bench) + val = $(esc(expr)) + disable!(bench) + # trick the compiler not to eliminate the code + (rand() < 0 ? val : Stats(bench))::Stats + end)() + end +end + +end \ No newline at end of file From fb4e206f53c1e193400116a7e7ce3eaccd7b027a Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Sat, 20 Jun 2020 14:35:33 +0900 Subject: [PATCH 02/25] Export pstats macro --- src/LinuxPerf.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/LinuxPerf.jl b/src/LinuxPerf.jl index 91e9d90..a97a705 100644 --- a/src/LinuxPerf.jl +++ b/src/LinuxPerf.jl @@ -4,7 +4,7 @@ using Printf using PrettyTables using Formatting -export @measure, @measured +export @measure, @measured, @pstats export make_bench, enable!, disable!, reset!, reasonable_defaults, counters import Base: show, length, close @@ -636,4 +636,4 @@ macro pstats(args...) end end -end \ No newline at end of file +end From 6fc53d433138d3fb1e86ed5491546e616676f0c6 Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Sat, 20 Jun 2020 19:29:17 +0900 Subject: [PATCH 03/25] Add list function to list events --- src/LinuxPerf.jl | 42 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/src/LinuxPerf.jl b/src/LinuxPerf.jl index a97a705..1b9f4f2 100644 --- a/src/LinuxPerf.jl +++ b/src/LinuxPerf.jl @@ -76,7 +76,7 @@ const EVENT_TYPES = (:stalled_cycles_backend, 8), # PERF_COUNT_HW_STALLED_CYCLES_BACKEND (:scaled_cycles, 9) # PERF_COUNT_HW_REF_CPU_CYCLES ]), - (:sw, 1, # PERF_TYPE_SOFTWARE + (:sw, PERF_TYPE_SOFTWARE, [(:cpu_clock, 0), # PERF_COUNT_SW_CPU_CLOCK (:task_clock, 1), # PEF_COUNT_SW_TASK_CLOCK (:page_faults, 2), # PERF_COUNT_SW_PAGE_FAULTS @@ -414,9 +414,47 @@ const NAME_TO_EVENT = Dict( "iTLB-load-misses" => EventType(:cache, :TLB_insn, :read, :miss), "iTLB-loads" => EventType(:cache, :TLB_insn, :read, :access), ) - const EVENT_TO_NAME = Dict(event => name for (name, event) in NAME_TO_EVENT) +function is_supported(event::EventType) + attr = perf_event_attr() + attr.typ = event.category + attr.size = sizeof(perf_event_attr) + attr.config = event.event + fd = perf_event_open(attr, 0, -1, -1, 0) + if fd ≥ 0 + ret = ccall(:close, Cint, (Cint,), fd) + if ret != 0 + @warn "failed to close file descriptor for some reason" + end + return true + end + return false +end + +is_supported(name::AbstractString) = haskey(NAME_TO_EVENT, name) && is_supported(NAME_TO_EVENT[name]) + +function list() + for t in [PERF_TYPE_HARDWARE, PERF_TYPE_SOFTWARE, PERF_TYPE_HW_CACHE] + events = collect(filter(x -> x[2].category == t, NAME_TO_EVENT)) + sort!(events, by = x -> x[1]) # sort events by name + if t == PERF_TYPE_HARDWARE + println("hardware:") + elseif t == PERF_TYPE_SOFTWARE + println("software:") + elseif t == PERF_TYPE_HW_CACHE + println("cache:") + else + @assert false + end + for (name, event) in events + @printf " %-25s%s" name (is_supported(event) ? "supported" : "not supported") + println() + end + t != PERF_TYPE_HW_CACHE && println() + end +end + function parse_pstats_options(opts) # default events events = parse_groups(" From 78bdb4e1ec0f4671e780e66e27c1830eafd502b9 Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Sun, 21 Jun 2020 02:26:54 +0900 Subject: [PATCH 04/25] Write docs --- src/LinuxPerf.jl | 56 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/src/LinuxPerf.jl b/src/LinuxPerf.jl index 1b9f4f2..2883ab6 100644 --- a/src/LinuxPerf.jl +++ b/src/LinuxPerf.jl @@ -656,6 +656,62 @@ scaledcount(counter::Counter) = counter.value * (counter.enabled / counter.runni @pstats [options] expr Run `expr` and gather its performance statistics. + +This macro basically measures the number of occurrences of events such as CPU +cycles, branch prediction misses, page faults, and so on. The list of +supported events can be shown by calling the `LinuxPerf.list` function. + +Due to the resource limitation of performance measuring units (PMUs) +installed in a CPU core, all events may not be measured simultaneously, +resulting in multiplexing several groups of events in a single measurement. +If the running time is extremely short, some event groups may not be measured +at all. + +The result is shown in a table. Each row consists of four columns: an event +group indicator, an event name, a scaled count and a running rate. A comment +may follow these columns after a hash (#) character. +1. The event group indicated by a bracket is a set of events that are + measured simultaneously so that their count statistics can be meaningfully + compared. +2. The event name is a conventional name of the measured event. +3. The scaled count is the number of occurrences of the event, scaled by the + reciprocal of the running rate. +4. The running rate is the ratio of the time of running and enabled. + +The macro can take some options. If a string object is passed, it is a +comma-separated list of event names to measure. An event group can be +indicated by a pair of parentheses. + +# Examples + +``` +julia> xs = randn(1_000_000); + +julia> sort(xs[1:9]); # compile + +julia> @pstats sort(xs) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +┌ cpu-cycles 2.57e+08 48.6% # 3.8 cycles per ns +│ stalled-cycles-frontend 1.10e+07 48.6% # 4.3% of cycles +└ stalled-cycles-backend 2.48e+06 48.6% # 1.0% of cycles +┌ instructions 1.84e+08 51.4% # 0.7 insns per cycle +│ branch-instructions 3.73e+07 51.4% # 20.2% of instructions +└ branch-misses 7.92e+06 51.4% # 21.2% of branch instructions +┌ task-clock 6.75e+07 100.0% +│ context-switches 0.00e+00 100.0% +│ cpu-migrations 0.00e+00 100.0% +└ page-faults 1.95e+03 100.0% +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +julia> @pstats "(cpu-cycles,instructions,branch-instructions,branch-misses),page-faults" sort(xs) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +┌ cpu-cycles 2.61e+08 100.0% # 3.9 cycles per ns +│ instructions 1.80e+08 100.0% # 0.7 insns per cycle +│ branch-instructions 3.64e+07 100.0% # 20.2% of instructions +└ branch-misses 8.32e+06 100.0% # 22.8% of branch instructions +╶ page-faults 0.00e+00 100.0% +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +``` """ macro pstats(args...) if isempty(args) From b6a85c58b9e69d3a4680a97be011dbbafc136bf2 Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Sun, 21 Jun 2020 16:02:22 +0900 Subject: [PATCH 05/25] Close PerfBench --- src/LinuxPerf.jl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/LinuxPerf.jl b/src/LinuxPerf.jl index 2883ab6..dce6460 100644 --- a/src/LinuxPerf.jl +++ b/src/LinuxPerf.jl @@ -7,7 +7,7 @@ using Formatting export @measure, @measured, @pstats export make_bench, enable!, disable!, reset!, reasonable_defaults, counters -import Base: show, length, close +import Base: show, length macro measure(expr, args...) esc(quote @@ -332,6 +332,8 @@ enable!(b::PerfBench) = foreach(enable!, b.groups) disable!(b::PerfBench) = foreach(disable!, b.groups) reset!(b::PerfBench) = foreach(reset!, b.groups) +Base.close(b::PerfBench) = foreach(close, b.groups) + function counters(b::PerfBench) c = Counter[] for g in b.groups @@ -725,7 +727,9 @@ macro pstats(args...) val = $(esc(expr)) disable!(bench) # trick the compiler not to eliminate the code - (rand() < 0 ? val : Stats(bench))::Stats + stats = rand() < 0 ? val : Stats(bench) + close(bench) + return stats::Stats end)() end end From c4077eda1f86b750349fe0fd5d353e8cda7db9a9 Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Sun, 21 Jun 2020 16:31:08 +0900 Subject: [PATCH 06/25] Add exclude_kernel option --- src/LinuxPerf.jl | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/LinuxPerf.jl b/src/LinuxPerf.jl index dce6460..c68692c 100644 --- a/src/LinuxPerf.jl +++ b/src/LinuxPerf.jl @@ -366,13 +366,13 @@ const reasonable_defaults = [EventType(:cache, :L1_data, :write, :access), EventType(:cache, :L1_data, :write, :miss)]=#] -function make_bench(x) +function make_bench(x; kwargs...) groups = EventGroup[] for y in x if isa(y, EventType) - push!(groups, EventGroup([y])) + push!(groups, EventGroup([y]; kwargs...)) else - push!(groups, EventGroup(y)) + push!(groups, EventGroup(y; kwargs...)) end end PerfBench(groups) @@ -464,17 +464,22 @@ function parse_pstats_options(opts) (instructions, branch-instructions, branch-misses), (task-clock, context-switches, cpu-migrations, page-faults) ") + exclude_kernel = false for opt in opts if opt isa AbstractString events = parse_groups(opt) elseif opt isa Expr && opt.head == :(=) key, val = opt.args - error("unknown key: $(key)") + if key === :exclude_kernel + exclude_kernel = esc(val) + else + error("unknown key: $(key)") + end else error("unknown option: $(opt)") end end - return (events = events,) + return (events = events, exclude_kernel = exclude_kernel,) end # syntax: groups = (group ',')* group @@ -682,7 +687,8 @@ may follow these columns after a hash (#) character. The macro can take some options. If a string object is passed, it is a comma-separated list of event names to measure. An event group can be -indicated by a pair of parentheses. +indicated by a pair of parentheses. If `exclude_kernel = true` is passed, the +count excludes events that happen in kernel space (`false` by default). # Examples @@ -722,7 +728,7 @@ macro pstats(args...) opts, expr = parse_pstats_options(args[1:end-1]), args[end] quote (function () - bench = make_bench($(opts.events)) + bench = make_bench($(opts.events), userspace_only = $(opts.exclude_kernel)) enable!(bench) val = $(esc(expr)) disable!(bench) From 033e29e572837b66164b9840ea4ed77b720a103c Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Sun, 21 Jun 2020 16:35:01 +0900 Subject: [PATCH 07/25] Update examples --- src/LinuxPerf.jl | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/LinuxPerf.jl b/src/LinuxPerf.jl index c68692c..1bad8ee 100644 --- a/src/LinuxPerf.jl +++ b/src/LinuxPerf.jl @@ -699,25 +699,25 @@ julia> sort(xs[1:9]); # compile julia> @pstats sort(xs) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -┌ cpu-cycles 2.57e+08 48.6% # 3.8 cycles per ns -│ stalled-cycles-frontend 1.10e+07 48.6% # 4.3% of cycles -└ stalled-cycles-backend 2.48e+06 48.6% # 1.0% of cycles -┌ instructions 1.84e+08 51.4% # 0.7 insns per cycle -│ branch-instructions 3.73e+07 51.4% # 20.2% of instructions -└ branch-misses 7.92e+06 51.4% # 21.2% of branch instructions -┌ task-clock 6.75e+07 100.0% -│ context-switches 0.00e+00 100.0% +┌ cpu-cycles 2.68e+08 51.0% # 3.9 cycles per ns +│ stalled-cycles-frontend 9.75e+06 51.0% # 3.6% of cycles +└ stalled-cycles-backend 2.00e+07 51.0% # 7.5% of cycles +┌ instructions 1.87e+08 49.0% # 0.7 insns per cycle +│ branch-instructions 3.82e+07 49.0% # 20.4% of instructions +└ branch-misses 8.51e+06 49.0% # 22.3% of branch instructions +┌ task-clock 6.90e+07 100.0% +│ context-switches 6.00e+00 100.0% │ cpu-migrations 0.00e+00 100.0% └ page-faults 1.95e+03 100.0% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ julia> @pstats "(cpu-cycles,instructions,branch-instructions,branch-misses),page-faults" sort(xs) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -┌ cpu-cycles 2.61e+08 100.0% # 3.9 cycles per ns -│ instructions 1.80e+08 100.0% # 0.7 insns per cycle -│ branch-instructions 3.64e+07 100.0% # 20.2% of instructions -└ branch-misses 8.32e+06 100.0% # 22.8% of branch instructions -╶ page-faults 0.00e+00 100.0% +┌ cpu-cycles 2.68e+08 100.0% # 3.9 cycles per ns +│ instructions 1.89e+08 100.0% # 0.7 insns per cycle +│ branch-instructions 3.80e+07 100.0% # 20.1% of instructions +└ branch-misses 8.23e+06 100.0% # 21.7% of branch instructions +╶ page-faults 1.95e+03 100.0% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ ``` """ From da8de6745e73bc06ec32dcb8cf91ec7ef2b7207f Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Sun, 21 Jun 2020 17:18:39 +0900 Subject: [PATCH 08/25] Refactor show --- src/LinuxPerf.jl | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/src/LinuxPerf.jl b/src/LinuxPerf.jl index 1bad8ee..5e583c6 100644 --- a/src/LinuxPerf.jl +++ b/src/LinuxPerf.jl @@ -634,18 +634,25 @@ function Base.show(io::IO, stats::Stats) # show a comment if name == "cpu-cycles" @printf(io, " # %4.1f cycles per ns", counter.value / counter.running) - elseif (name == "stalled-cycles-frontend" || name == "stalled-cycles-backend") && haskey(stats, "cpu-cycles") - @printf(io, " # %4.1f%% of cycles", scaledcount(counter) / scaledcount(stats["cpu-cycles"]) * 100) elseif name == "instructions" && haskey(stats, "cpu-cycles") @printf(io, " # %4.1f insns per cycle", scaledcount(counter) / scaledcount(stats["cpu-cycles"])) - elseif name == "branch-instructions" && haskey(stats, "instructions") - @printf(io, " # %4.1f%% of instructions", scaledcount(counter) / scaledcount(stats["instructions"]) * 100) - elseif name == "branch-misses" && haskey(stats, "branch-instructions") - @printf(io, " # %4.1f%% of branch instructions", scaledcount(counter)/ scaledcount(stats["branch-instructions"]) * 100) - elseif name == "cache-misses" && haskey(stats, "cache-references") - @printf(io, " # %4.1f%% of cache references", scaledcount(counter) / scaledcount(stats["cache-references"]) * 100) - elseif name == "L1-dcache-load-misses" && haskey(stats, "L1-dcache-loads") - @printf(io, " # %4.1f%% of loads", scaledcount(counter) / scaledcount(stats["L1-dcache-loads"]) * 100) + else + for (num, den, label) in [ + ("stalled-cycles-frontend", "cpu-cycles", "cycles"), + ("stalled-cycles-backend", "cpu-cycles", "cycles"), + ("branch-instructions", "instructions", "instructions"), + ("branch-misses", "branch-instructions", "branch instructions"), + ("cache-misses", "cache-references", "cache references"), + ("L1-dcache-load-misses", "L1-dcache-loads", "dcache loads"), + ("L1-icache-load-misses", "L1-icache-loads", "icache loads"), + ("dTLB-load-misses", "dTLB-loads", "dTLB loads"), + ("iTLB-load-misses", "iTLB-loads", "iTLB loads"), + ] + if name == num && haskey(stats, den) + @printf(io, " # %4.1f%% of %s", scaledcount(counter) / scaledcount(stats[den]) * 100, label) + break + end + end end end println(io) From cc755eb77363d6aeccd0d12f9306df764339bd71 Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Sun, 21 Jun 2020 17:37:05 +0900 Subject: [PATCH 09/25] Show warning if events are not measured --- src/LinuxPerf.jl | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/LinuxPerf.jl b/src/LinuxPerf.jl index 5e583c6..5532845 100644 --- a/src/LinuxPerf.jl +++ b/src/LinuxPerf.jl @@ -666,6 +666,15 @@ isrun(counter::Counter) = counter.running > 0 fillrate(counter::Counter) = counter.running / counter.enabled scaledcount(counter::Counter) = counter.value * (counter.enabled / counter.running) +function checkstats(stats::Stats) + for group in stats.groups, counter in group + if !isrun(counter) + @warn "Some events are not measured" + return + end + end +end + """ @pstats [options] expr @@ -742,6 +751,7 @@ macro pstats(args...) # trick the compiler not to eliminate the code stats = rand() < 0 ? val : Stats(bench) close(bench) + checkstats(stats) return stats::Stats end)() end From 8602a74302ef2f566f09853358d431caf607057d Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Sun, 21 Jun 2020 22:20:48 +0900 Subject: [PATCH 10/25] Show human-readable time --- src/LinuxPerf.jl | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/LinuxPerf.jl b/src/LinuxPerf.jl index 5532845..880d2b1 100644 --- a/src/LinuxPerf.jl +++ b/src/LinuxPerf.jl @@ -636,6 +636,21 @@ function Base.show(io::IO, stats::Stats) @printf(io, " # %4.1f cycles per ns", counter.value / counter.running) elseif name == "instructions" && haskey(stats, "cpu-cycles") @printf(io, " # %4.1f insns per cycle", scaledcount(counter) / scaledcount(stats["cpu-cycles"])) + elseif name == "cpu-clock" || name == "task-clock" + clk = float(scaledcount(counter)) + if clk ≥ 1e9 + clk /= 1e9 + unit = "s" + elseif clk ≥ 1e6 + clk /= 1e6 + unit = "ms" + elseif clk ≥ 1e3 + clk /= 1e3 + unit = "μs" + else + unit = "ns" + end + @printf(io, " # %4.1f %s", clk, unit) else for (num, den, label) in [ ("stalled-cycles-frontend", "cpu-cycles", "cycles"), From dc261109893486261c61792c69c76c8e9b95b637 Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Sun, 28 Jun 2020 14:12:48 +0900 Subject: [PATCH 11/25] Fix PERF_TYPE_BREAKPOINT --- src/LinuxPerf.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/LinuxPerf.jl b/src/LinuxPerf.jl index 880d2b1..9bb5565 100644 --- a/src/LinuxPerf.jl +++ b/src/LinuxPerf.jl @@ -60,7 +60,7 @@ const PERF_TYPE_SOFTWARE = 1 const PERF_TYPE_TRACEPOINT = 2 const PERF_TYPE_HW_CACHE = 3 const PERF_TYPE_RAW = 4 -const PERF_TYPE_BREAKPOINT = 3 +const PERF_TYPE_BREAKPOINT = 5 const EVENT_TYPES = [ From 7473c03e3481a6dc02aaa474d4ca1f061ccdf78a Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Sun, 28 Jun 2020 15:46:59 +0900 Subject: [PATCH 12/25] Add event modifiers --- src/LinuxPerf.jl | 79 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 68 insertions(+), 11 deletions(-) diff --git a/src/LinuxPerf.jl b/src/LinuxPerf.jl index 9bb5565..0e164fd 100644 --- a/src/LinuxPerf.jl +++ b/src/LinuxPerf.jl @@ -200,13 +200,23 @@ function EventType(cat::Symbol, cache::Symbol, op::Symbol, evt::Symbol) cache_id | (op_id << 8) | (evt_id << 16)) end +const EXCLUDE_NONE = UInt(0) +const EXCLUDE_USER = UInt(1) << 0 +const EXCLUDE_KERNEL = UInt(1) << 1 +const EXCLUDE_HYPERVISOR = UInt(1) << 2 + +struct EventTypeExt + event::EventType + exclude::UInt # bit flags +end + mutable struct EventGroup leader_fd::Cint fds::Vector{Cint} event_types::Vector{EventType} leader_io::IOStream - function EventGroup(types::Vector{EventType}; + function EventGroup(types::Vector{<:Union{EventType,EventTypeExt}}; warn_unsupported = true, userspace_only = true, pinned = false, @@ -217,9 +227,14 @@ mutable struct EventGroup for (i, evt_type) in enumerate(types) attr = perf_event_attr() - attr.typ = evt_type.category attr.size = sizeof(perf_event_attr) - attr.config = evt_type.event + if evt_type isa EventTypeExt + attr.typ = evt_type.event.category + attr.config = evt_type.event.event + else + attr.typ = evt_type.category + attr.config = evt_type.event + end attr.sample_period_or_freq = 0 attr.flags = 0 # first attribute becomes group leader @@ -239,6 +254,18 @@ mutable struct EventGroup # (1 << 6) exclude hypervisor # (1 << 7) exclude idle + if evt_type isa EventTypeExt + if evt_type.exclude & EXCLUDE_USER != 0 + attr.flags |= (1 << 4) + end + if evt_type.exclude & EXCLUDE_KERNEL != 0 + attr.flags |= (1 << 5) + end + if evt_type.exclude & EXCLUDE_HYPERVISOR != 0 + attr.flags |= (1 << 6) + end + end + attr.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_TOTAL_TIME_ENABLED | @@ -259,7 +286,11 @@ mutable struct EventGroup error("perf_event_open error : $(Libc.strerror(errno))") end end - push!(group.event_types, evt_type) + if evt_type isa EventTypeExt + push!(group.event_types, evt_type.event) + else + push!(group.event_types, evt_type) + end push!(group.fds, fd) if group.leader_fd == -1 group.leader_fd = fd @@ -380,7 +411,6 @@ end make_bench() = make_bench(reasonable_defaults) - # Event names are taken from the perf command. const NAME_TO_EVENT = Dict( # hardware events @@ -484,7 +514,7 @@ end # syntax: groups = (group ',')* group function parse_groups(str) - groups = Vector{EventType}[] + groups = Vector{EventTypeExt}[] i = firstindex(str) next = iterate(str, i) while next !== nothing @@ -508,7 +538,7 @@ end # syntax: group = event | '(' (event ',')* event ')' function parse_group(str, i) - group = EventType[] + group = EventTypeExt[] next = iterate(str, i) if next === nothing error("no events") @@ -542,12 +572,13 @@ function parse_group(str, i) return group, i end -# syntax: event = [A-Za-z0-9-]+ +# syntax: event = [A-Za-z0-9-]+ (:[ukh]*)? function parse_event(str, i) - isok(c) = 'A' ≤ c ≤ 'Z' || 'a' ≤ c ≤ 'z' || '0' ≤ c ≤ '9' || c == '-' + # parse event name + isevchar(c) = 'A' ≤ c ≤ 'Z' || 'a' ≤ c ≤ 'z' || '0' ≤ c ≤ '9' || c == '-' start = i next = iterate(str, start) - while next !== nothing && isok(next[1]) + while next !== nothing && isevchar(next[1]) i = next[2] next = iterate(str, i) end @@ -559,7 +590,33 @@ function parse_event(str, i) if !haskey(NAME_TO_EVENT, name) error("unknown event name: $(name)") end - return NAME_TO_EVENT[name], i + event = NAME_TO_EVENT[name] + + # parse modifiers (if any) + ismodchar(c) = 'A' ≤ c ≤ 'Z' || 'a' ≤ c ≤ 'z' + u = k = h = true # u: user, k: kernel, h: hypervisor + next = iterate(str, i) + if next !== nothing && next[1] == ':' + u = k = h = false # exclude all + i = next[2] + next = iterate(str, i) + while next !== nothing && ismodchar(next[1]) + c, i = next + if c ∉ ('u', 'k', 'h') + error("unsupported modifier: $(repr(c))") + end + c == 'u' && (u = true) + c == 'k' && (k = true) + c == 'h' && (h = true) + next = iterate(str, i) + end + end + exclude = EXCLUDE_NONE + u || (exclude |= EXCLUDE_USER) + k || (exclude |= EXCLUDE_KERNEL) + h || (exclude |= EXCLUDE_HYPERVISOR) + + return EventTypeExt(event, exclude), i end # skip whitespace if any From dfa787c27e7b0501167d78bce8b8f9f2cbcd9a07 Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Sun, 28 Jun 2020 18:18:06 +0900 Subject: [PATCH 13/25] Add user, kernel, and hypervisor flags --- src/LinuxPerf.jl | 58 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 44 insertions(+), 14 deletions(-) diff --git a/src/LinuxPerf.jl b/src/LinuxPerf.jl index 0e164fd..1c06b6b 100644 --- a/src/LinuxPerf.jl +++ b/src/LinuxPerf.jl @@ -207,6 +207,7 @@ const EXCLUDE_HYPERVISOR = UInt(1) << 2 struct EventTypeExt event::EventType + modified::Bool exclude::UInt # bit flags end @@ -489,19 +490,24 @@ end function parse_pstats_options(opts) # default events - events = parse_groups(" + events = :(parse_groups(" (cpu-cycles, stalled-cycles-frontend, stalled-cycles-backend), (instructions, branch-instructions, branch-misses), (task-clock, context-switches, cpu-migrations, page-faults) - ") - exclude_kernel = false - for opt in opts - if opt isa AbstractString - events = parse_groups(opt) + ")) + # default spaces + user = kernel = hypervisor = true + for (i, opt) in enumerate(opts) + if i == 1 && !(opt isa Expr && opt.head == :(=)) + events = :(parse_groups($(esc(opt)))) elseif opt isa Expr && opt.head == :(=) key, val = opt.args - if key === :exclude_kernel - exclude_kernel = esc(val) + if key == :user + user = esc(val) + elseif key == :kernel + kernel = esc(val) + elseif key == :hypervisor + hypervisor = esc(val) else error("unknown key: $(key)") end @@ -509,7 +515,7 @@ function parse_pstats_options(opts) error("unknown option: $(opt)") end end - return (events = events, exclude_kernel = exclude_kernel,) + return (events = events, spaces = :($(user), $(kernel), $(hypervisor)), ) end # syntax: groups = (group ',')* group @@ -594,9 +600,11 @@ function parse_event(str, i) # parse modifiers (if any) ismodchar(c) = 'A' ≤ c ≤ 'Z' || 'a' ≤ c ≤ 'z' + modified = false u = k = h = true # u: user, k: kernel, h: hypervisor next = iterate(str, i) if next !== nothing && next[1] == ':' + modified = true u = k = h = false # exclude all i = next[2] next = iterate(str, i) @@ -616,7 +624,7 @@ function parse_event(str, i) k || (exclude |= EXCLUDE_KERNEL) h || (exclude |= EXCLUDE_HYPERVISOR) - return EventTypeExt(event, exclude), i + return EventTypeExt(event, modified, exclude), i end # skip whitespace if any @@ -747,6 +755,21 @@ function checkstats(stats::Stats) end end +function set_default_spaces(groups, (u, k, h)) + map(groups) do group + map(group) do event + if event.modified + return event + end + exclude = EXCLUDE_NONE + u || (exclude |= EXCLUDE_USER) + k || (exclude |= EXCLUDE_KERNEL) + h || (exclude |= EXCLUDE_HYPERVISOR) + return EventTypeExt(event.event, event.modified, exclude) + end + end +end + """ @pstats [options] expr @@ -774,9 +797,15 @@ may follow these columns after a hash (#) character. 4. The running rate is the ratio of the time of running and enabled. The macro can take some options. If a string object is passed, it is a -comma-separated list of event names to measure. An event group can be -indicated by a pair of parentheses. If `exclude_kernel = true` is passed, the -count excludes events that happen in kernel space (`false` by default). +comma-separated list of event names to measure. Modifiers can be added to +confine measured events to specific space. Currently, three space modifiers +are supported: user (`u`), kernel (`k`), and hypervisor (`h`) space. The +modifiers follows an event name separated by a comma. For example, +`cpu-cycles:u` ignores all CPU cycles except in user space. An event group +can be indicated by a pair of parentheses. It is also possible to pass +`user`, `kernel`, and `hypervisor` parameters (`true` by default) to the +macro, which affect events without modifiers. For example, `kernel=false` +excludes events happend in kernel space. # Examples @@ -816,7 +845,8 @@ macro pstats(args...) opts, expr = parse_pstats_options(args[1:end-1]), args[end] quote (function () - bench = make_bench($(opts.events), userspace_only = $(opts.exclude_kernel)) + groups = set_default_spaces($(opts.events), $(opts.spaces)) + bench = make_bench(groups, userspace_only = false) enable!(bench) val = $(esc(expr)) disable!(bench) From 768deb155bfdb3909c2a86c4e033e85a50b80851 Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Sun, 28 Jun 2020 19:55:01 +0900 Subject: [PATCH 14/25] Add dump_groups for debug --- src/LinuxPerf.jl | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/LinuxPerf.jl b/src/LinuxPerf.jl index 1c06b6b..8fec7b7 100644 --- a/src/LinuxPerf.jl +++ b/src/LinuxPerf.jl @@ -770,6 +770,31 @@ function set_default_spaces(groups, (u, k, h)) end end +# for debug +function dump_groups(groups) + buf = IOBuffer() + println(buf, "Event groups") + for (i, group) in enumerate(groups) + println(buf, "group #$(i)") + for (j, event) in enumerate(group) + if j < length(group) + print(buf, " ├─ ") + else + print(buf, " └─ ") + end + print(buf, event.event) + event.modified && print(buf, " → modified") + exclude = event.exclude + exclude != 0 && print(buf, ", exclude ") + exclude & EXCLUDE_USER != 0 && print(buf, 'u') + exclude & EXCLUDE_KERNEL != 0 && print(buf, 'k') + exclude & EXCLUDE_HYPERVISOR != 0 && print(buf, 'h') + println(buf) + end + end + String(take!(buf)) +end + """ @pstats [options] expr @@ -846,6 +871,7 @@ macro pstats(args...) quote (function () groups = set_default_spaces($(opts.events), $(opts.spaces)) + @debug dump_groups(groups) bench = make_bench(groups, userspace_only = false) enable!(bench) val = $(esc(expr)) From 36ec2fc79f7e5ff68e334bf6a647826ef4227398 Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Sun, 28 Jun 2020 20:00:49 +0900 Subject: [PATCH 15/25] Fix docs --- src/LinuxPerf.jl | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/LinuxPerf.jl b/src/LinuxPerf.jl index 8fec7b7..dd9f148 100644 --- a/src/LinuxPerf.jl +++ b/src/LinuxPerf.jl @@ -822,15 +822,15 @@ may follow these columns after a hash (#) character. 4. The running rate is the ratio of the time of running and enabled. The macro can take some options. If a string object is passed, it is a -comma-separated list of event names to measure. Modifiers can be added to -confine measured events to specific space. Currently, three space modifiers -are supported: user (`u`), kernel (`k`), and hypervisor (`h`) space. The -modifiers follows an event name separated by a comma. For example, -`cpu-cycles:u` ignores all CPU cycles except in user space. An event group -can be indicated by a pair of parentheses. It is also possible to pass -`user`, `kernel`, and `hypervisor` parameters (`true` by default) to the -macro, which affect events without modifiers. For example, `kernel=false` -excludes events happend in kernel space. +comma-separated list of event names to measure. A group of events is +surrounded by a pair of parentheses. Modifiers can be added to confine +measured events to specific space. Currently, three space modifiers are +supported: user (`u`), kernel (`k`), and hypervisor (`h`) space. The +modifiers follow an event name separated by a colon. For example, +`cpu-cycles:u` ignores all CPU cycles except in user space. It is also +possible to pass `user`, `kernel`, and `hypervisor` parameters (`true` by +default) to the macro, which affect events without modifiers. For example, +`kernel=false` excludes events happend in kernel space. # Examples From 34e0d2b81d52ab17ae1b6dfb7d1983698e0c28d9 Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Sun, 28 Jun 2020 20:34:08 +0900 Subject: [PATCH 16/25] Add group-level modifiers --- src/LinuxPerf.jl | 74 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 52 insertions(+), 22 deletions(-) diff --git a/src/LinuxPerf.jl b/src/LinuxPerf.jl index dd9f148..f343960 100644 --- a/src/LinuxPerf.jl +++ b/src/LinuxPerf.jl @@ -205,6 +205,14 @@ const EXCLUDE_USER = UInt(1) << 0 const EXCLUDE_KERNEL = UInt(1) << 1 const EXCLUDE_HYPERVISOR = UInt(1) << 2 +function exclude_flags(u, k, h) + exclude = EXCLUDE_NONE + u || (exclude |= EXCLUDE_USER) + k || (exclude |= EXCLUDE_KERNEL) + h || (exclude |= EXCLUDE_HYPERVISOR) + return exclude +end + struct EventTypeExt event::EventType modified::Bool @@ -542,7 +550,7 @@ function parse_groups(str) return groups end -# syntax: group = event | '(' (event ',')* event ')' +# syntax: group = event | '(' (event ',')* event ')' modifiers? function parse_group(str, i) group = EventTypeExt[] next = iterate(str, i) @@ -569,6 +577,18 @@ function parse_group(str, i) error("unknown character: $(repr(c))") end end + i = skipws(str, i) + + # parse group-level modifiers (if any) + next = iterate(str, i) + if next !== nothing && next[1] == ':' + (u, k, h), i = parse_modifiers(str, i) + group = map(group) do event + event.modified && return event + exclude = exclude_flags(u, k, h) + return EventTypeExt(event.event, true, exclude) + end + end else # singleton group i = skipws(str, i) @@ -578,7 +598,7 @@ function parse_group(str, i) return group, i end -# syntax: event = [A-Za-z0-9-]+ (:[ukh]*)? +# syntax: event = [A-Za-z0-9-]+ modifiers? function parse_event(str, i) # parse event name isevchar(c) = 'A' ≤ c ≤ 'Z' || 'a' ≤ c ≤ 'z' || '0' ≤ c ≤ '9' || c == '-' @@ -598,35 +618,45 @@ function parse_event(str, i) end event = NAME_TO_EVENT[name] - # parse modifiers (if any) - ismodchar(c) = 'A' ≤ c ≤ 'Z' || 'a' ≤ c ≤ 'z' + # parse event-level modifiers (if any) modified = false - u = k = h = true # u: user, k: kernel, h: hypervisor + exclude = EXCLUDE_NONE + i = skipws(str, i) next = iterate(str, i) if next !== nothing && next[1] == ':' + (u, k, h), i = parse_modifiers(str, i) modified = true - u = k = h = false # exclude all - i = next[2] - next = iterate(str, i) - while next !== nothing && ismodchar(next[1]) - c, i = next - if c ∉ ('u', 'k', 'h') - error("unsupported modifier: $(repr(c))") - end - c == 'u' && (u = true) - c == 'k' && (k = true) - c == 'h' && (h = true) - next = iterate(str, i) - end + exclude = exclude_flags(u, k, h) end - exclude = EXCLUDE_NONE - u || (exclude |= EXCLUDE_USER) - k || (exclude |= EXCLUDE_KERNEL) - h || (exclude |= EXCLUDE_HYPERVISOR) return EventTypeExt(event, modified, exclude), i end +# syntax: modifiers = ':' [ukh]* +function parse_modifiers(str, i) + next = iterate(str, i) + @assert next[1] == ':' + ismodchar(c) = 'A' ≤ c ≤ 'Z' || 'a' ≤ c ≤ 'z' + # u: user, k: kernel, h: hypervisor + u = k = h = false # exclude all + i = skipws(str, next[2]) + next = iterate(str, i) + while next !== nothing && ismodchar(next[1]) + c, i = next + if c == 'u' + u = true + elseif c == 'k' + k = true + elseif c == 'h' + h = true + else + error("unsupported modifier: $(repr(c))") + end + next = iterate(next, i) + end + return (u, k, h), i +end + # skip whitespace if any function skipws(str, i) @label head From 7f73836878da93fe552c0bb5029d9b13255a3e54 Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Sun, 28 Jun 2020 21:07:47 +0900 Subject: [PATCH 17/25] Fix bug --- src/LinuxPerf.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/LinuxPerf.jl b/src/LinuxPerf.jl index f343960..6e2c7ea 100644 --- a/src/LinuxPerf.jl +++ b/src/LinuxPerf.jl @@ -652,7 +652,7 @@ function parse_modifiers(str, i) else error("unsupported modifier: $(repr(c))") end - next = iterate(next, i) + next = iterate(str, i) end return (u, k, h), i end From b913820583b79d7ad876a882c3e784a211424d38 Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Sun, 28 Jun 2020 21:08:13 +0900 Subject: [PATCH 18/25] Add tests on events parser --- test/runtests.jl | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index e231917..b7cb32c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,7 +1,7 @@ using LinuxPerf using Test -import LinuxPerf: make_bench, enable!, disable!, reset!, reasonable_defaults, counters +using LinuxPerf: make_bench, enable!, disable!, reset!, reasonable_defaults, counters, EventType, EventTypeExt, parse_groups @testset "LinuxPerf" begin @@ -36,4 +36,45 @@ end @test typeof(c1) == typeof(counters(b1)) end +@testset "Parser" begin + cycles = EventType(:hw, :cycles) + insns = EventType(:hw, :instructions) + + @test parse_groups("") == [] + @test parse_groups("cpu-cycles") == [[EventTypeExt(cycles, false, 0)]] + @test parse_groups("(cpu-cycles)") == parse_groups("cpu-cycles") + @test parse_groups("cpu-cycles,instructions") == [[EventTypeExt(cycles, false, 0)], [EventTypeExt(insns, false, 0)]] + @test parse_groups("(cpu-cycles,instructions)") == [[EventTypeExt(cycles, false, 0), EventTypeExt(insns, false, 0)]] + @test parse_groups(" cpu-cycles, instructions ") == parse_groups("cpu-cycles,instructions") + @test parse_groups(" ( cpu-cycles, instructions ) ") == parse_groups("(cpu-cycles,instructions)") + + # exclude flags + u = LinuxPerf.exclude_flags(true, false, false) + k = LinuxPerf.exclude_flags(false, true, false) + h = LinuxPerf.exclude_flags(false, false, true) + uk = LinuxPerf.exclude_flags(true, true, false) + ukh = LinuxPerf.exclude_flags(true, true, true) + + # event-level modifiers + @test parse_groups("cpu-cycles:u") == [[EventTypeExt(cycles, true, u)]] + @test parse_groups("cpu-cycles:k") == [[EventTypeExt(cycles, true, k)]] + @test parse_groups("cpu-cycles:h") == [[EventTypeExt(cycles, true, h)]] + @test parse_groups("cpu-cycles:uk") == [[EventTypeExt(cycles, true, uk)]] + @test parse_groups("cpu-cycles:ukh") == [[EventTypeExt(cycles, true, ukh)]] + @test parse_groups("cpu-cycles:ku") == parse_groups("cpu-cycles:uk") + @test parse_groups("cpu-cycles:uu") == parse_groups("cpu-cycles:u") + @test parse_groups("cpu-cycles : u ") == parse_groups("cpu-cycles:u") + + # group-level modifiers + @test parse_groups("(cpu-cycles,instructions):u") == parse_groups("(cpu-cycles:u,instructions:u)") + @test parse_groups("(cpu-cycles,instructions):k") == parse_groups("(cpu-cycles:k,instructions:k)") + @test parse_groups("(cpu-cycles,instructions):h") == parse_groups("(cpu-cycles:h,instructions:h)") + @test parse_groups("(cpu-cycles,instructions):uk") == parse_groups("(cpu-cycles:uk,instructions:uk)") + @test parse_groups("(cpu-cycles,instructions):ukh") == parse_groups("(cpu-cycles:ukh,instructions:ukh)") + @test parse_groups("(cpu-cycles,instructions):ku") == parse_groups("(cpu-cycles,instructions):uk") + @test parse_groups("(cpu-cycles,instructions):uu") == parse_groups("(cpu-cycles,instructions):u") + @test parse_groups("(cpu-cycles:k,instructions):u") == parse_groups("(cpu-cycles:k,instructions:u)") + @test parse_groups("(cpu-cycles,instructions) : u ") == parse_groups("(cpu-cycles,instructions):u") +end + end \ No newline at end of file From fbf77fa40e54b543f1c8c499146daf4f096893fa Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Sun, 28 Jun 2020 21:45:21 +0900 Subject: [PATCH 19/25] Update examples --- src/LinuxPerf.jl | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/LinuxPerf.jl b/src/LinuxPerf.jl index 6e2c7ea..57af0f6 100644 --- a/src/LinuxPerf.jl +++ b/src/LinuxPerf.jl @@ -871,24 +871,24 @@ julia> sort(xs[1:9]); # compile julia> @pstats sort(xs) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -┌ cpu-cycles 2.68e+08 51.0% # 3.9 cycles per ns -│ stalled-cycles-frontend 9.75e+06 51.0% # 3.6% of cycles -└ stalled-cycles-backend 2.00e+07 51.0% # 7.5% of cycles -┌ instructions 1.87e+08 49.0% # 0.7 insns per cycle -│ branch-instructions 3.82e+07 49.0% # 20.4% of instructions -└ branch-misses 8.51e+06 49.0% # 22.3% of branch instructions -┌ task-clock 6.90e+07 100.0% -│ context-switches 6.00e+00 100.0% +┌ cpu-cycles 2.60e+08 49.7% # 3.4 cycles per ns +│ stalled-cycles-frontend 1.09e+07 49.7% # 4.2% of cycles +└ stalled-cycles-backend 7.07e+06 49.7% # 2.7% of cycles +┌ instructions 1.96e+08 50.3% # 0.8 insns per cycle +│ branch-instructions 4.02e+07 50.3% # 20.5% of instructions +└ branch-misses 8.15e+06 50.3% # 20.3% of branch instructions +┌ task-clock 7.61e+07 100.0% # 76.1 ms +│ context-switches 7.00e+00 100.0% │ cpu-migrations 0.00e+00 100.0% └ page-faults 1.95e+03 100.0% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ julia> @pstats "(cpu-cycles,instructions,branch-instructions,branch-misses),page-faults" sort(xs) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -┌ cpu-cycles 2.68e+08 100.0% # 3.9 cycles per ns -│ instructions 1.89e+08 100.0% # 0.7 insns per cycle -│ branch-instructions 3.80e+07 100.0% # 20.1% of instructions -└ branch-misses 8.23e+06 100.0% # 21.7% of branch instructions +┌ cpu-cycles 2.64e+08 100.0% # 3.5 cycles per ns +│ instructions 1.86e+08 100.0% # 0.7 insns per cycle +│ branch-instructions 3.74e+07 100.0% # 20.1% of instructions +└ branch-misses 8.21e+06 100.0% # 21.9% of branch instructions ╶ page-faults 1.95e+03 100.0% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ ``` From 882d700d3a35e89c7b7d8aeaf9c1536f813fd200 Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Sun, 28 Jun 2020 22:42:24 +0900 Subject: [PATCH 20/25] Make sure to close PerfBench --- src/LinuxPerf.jl | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/LinuxPerf.jl b/src/LinuxPerf.jl index 57af0f6..f6674ce 100644 --- a/src/LinuxPerf.jl +++ b/src/LinuxPerf.jl @@ -903,14 +903,19 @@ macro pstats(args...) groups = set_default_spaces($(opts.events), $(opts.spaces)) @debug dump_groups(groups) bench = make_bench(groups, userspace_only = false) - enable!(bench) - val = $(esc(expr)) - disable!(bench) - # trick the compiler not to eliminate the code - stats = rand() < 0 ? val : Stats(bench) - close(bench) - checkstats(stats) - return stats::Stats + try + enable!(bench) + val = $(esc(expr)) + disable!(bench) + # trick the compiler not to eliminate the code + stats = rand() < 0 ? val : Stats(bench) + checkstats(stats) + return stats::Stats + catch + rethrow() + finally + close(bench) + end end)() end end From affbb6e7b8cce623b32cd9071469f6982f4f587a Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Sun, 28 Jun 2020 23:35:12 +0900 Subject: [PATCH 21/25] Measure user space only by default --- src/LinuxPerf.jl | 49 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/src/LinuxPerf.jl b/src/LinuxPerf.jl index f6674ce..ae485fa 100644 --- a/src/LinuxPerf.jl +++ b/src/LinuxPerf.jl @@ -457,11 +457,23 @@ const NAME_TO_EVENT = Dict( ) const EVENT_TO_NAME = Dict(event => name for (name, event) in NAME_TO_EVENT) -function is_supported(event::EventType) +function is_supported(event::EventType; space::Symbol) attr = perf_event_attr() attr.typ = event.category attr.size = sizeof(perf_event_attr) attr.config = event.event + if space == :user + attr.flags |= (1 << 5) + attr.flags |= (1 << 6) + elseif space == :kernel + attr.flags |= (1 << 4) + attr.flags |= (1 << 6) + elseif space == :hypervisor + attr.flags |= (1 << 4) + attr.flags |= (1 << 5) + else + throw(ArgumentError("unknown space name: $(space)")) + end fd = perf_event_open(attr, 0, -1, -1, 0) if fd ≥ 0 ret = ccall(:close, Cint, (Cint,), fd) @@ -473,7 +485,7 @@ function is_supported(event::EventType) return false end -is_supported(name::AbstractString) = haskey(NAME_TO_EVENT, name) && is_supported(NAME_TO_EVENT[name]) +is_supported(name::AbstractString; kwargs...) = haskey(NAME_TO_EVENT, name) && is_supported(NAME_TO_EVENT[name]; kwargs...) function list() for t in [PERF_TYPE_HARDWARE, PERF_TYPE_SOFTWARE, PERF_TYPE_HW_CACHE] @@ -489,7 +501,22 @@ function list() @assert false end for (name, event) in events - @printf " %-25s%s" name (is_supported(event) ? "supported" : "not supported") + spaces = String[] + if is_supported(event, space = :user) + push!(spaces, "user") + end + if is_supported(event, space = :kernel) + push!(spaces, "kernel") + end + if is_supported(event, space = :hypervisor) + push!(spaces, "hypervisor") + end + if isempty(spaces) + msg = "not supported" + else + msg = join(spaces, ", ") + end + @printf " %-25s%s" name msg println() end t != PERF_TYPE_HW_CACHE && println() @@ -504,7 +531,8 @@ function parse_pstats_options(opts) (task-clock, context-switches, cpu-migrations, page-faults) ")) # default spaces - user = kernel = hypervisor = true + user = true + kernel = hypervisor = false for (i, opt) in enumerate(opts) if i == 1 && !(opt isa Expr && opt.head == :(=)) events = :(parse_groups($(esc(opt)))) @@ -857,10 +885,15 @@ surrounded by a pair of parentheses. Modifiers can be added to confine measured events to specific space. Currently, three space modifiers are supported: user (`u`), kernel (`k`), and hypervisor (`h`) space. The modifiers follow an event name separated by a colon. For example, -`cpu-cycles:u` ignores all CPU cycles except in user space. It is also -possible to pass `user`, `kernel`, and `hypervisor` parameters (`true` by -default) to the macro, which affect events without modifiers. For example, -`kernel=false` excludes events happend in kernel space. +`cpu-cycles:u` ignores all CPU cycles except in user space (which is the +default). It is also possible to pass `user`, `kernel`, and `hypervisor` +parameters to the macro, which affect events without modifiers. Only user +space is activated by default (i.e., `user` is `true` but `kernel` and +`hypervisor` are `false`). To measure kernel events, for example, add the `k` +modifier to events you are interested in or pass `kernel=true` to the macro, +which globally activates events in kernel space. + +For more details, see perf_event_open(2)'s manual page. # Examples From bd1dfc6ad4ebbad2e51693bedb1a303c518213c1 Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Mon, 29 Jun 2020 14:25:13 +0900 Subject: [PATCH 22/25] Support multithreads --- src/LinuxPerf.jl | 180 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 141 insertions(+), 39 deletions(-) diff --git a/src/LinuxPerf.jl b/src/LinuxPerf.jl index ae485fa..157c58b 100644 --- a/src/LinuxPerf.jl +++ b/src/LinuxPerf.jl @@ -230,6 +230,7 @@ mutable struct EventGroup userspace_only = true, pinned = false, exclusive = false, + pid = Cint(0), ) my_types = EventType[] group = new(-1, Cint[], EventType[]) @@ -280,7 +281,7 @@ mutable struct EventGroup PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING - fd = perf_event_open(attr, 0, -1, group.leader_fd, 0) + fd = perf_event_open(attr, pid, -1, group.leader_fd, 0) if fd < 0 errno = Libc.errno() if errno in (Libc.EINVAL,Libc.ENOENT) @@ -344,6 +345,7 @@ function Base.close(g::EventGroup) end mutable struct PerfBench + pid::Cint groups::Vector{EventGroup} end @@ -406,20 +408,40 @@ const reasonable_defaults = [EventType(:cache, :L1_data, :write, :access), EventType(:cache, :L1_data, :write, :miss)]=#] -function make_bench(x; kwargs...) +function make_bench(x) groups = EventGroup[] for y in x if isa(y, EventType) - push!(groups, EventGroup([y]; kwargs...)) + push!(groups, EventGroup([y])) else - push!(groups, EventGroup(y; kwargs...)) + push!(groups, EventGroup(y)) end end - PerfBench(groups) + PerfBench(0, groups) end make_bench() = make_bench(reasonable_defaults) +struct PerfBenchThreaded + data::Vector{PerfBench} +end + +enable!(b::PerfBenchThreaded) = foreach(enable!, b.data) +disable!(b::PerfBenchThreaded) = foreach(disable!, b.data) +reset!(b::PerfBenchThreaded) = foreach(reset!, b.data) + +Base.close(b::PerfBenchThreaded) = foreach(close, b.data) + +function make_bench_threaded(groups; threads = true) + data = PerfBench[] + for tid in (threads ? alltids() : zero(getpid())) + push!(data, PerfBench(tid, [EventGroup(g, pid = tid, userspace_only = false) for g in groups])) + end + return PerfBenchThreaded(data) +end + +alltids(pid = getpid()) = parse.(typeof(pid), readdir("/proc/$(pid)/task")) + # Event names are taken from the perf command. const NAME_TO_EVENT = Dict( # hardware events @@ -533,17 +555,22 @@ function parse_pstats_options(opts) # default spaces user = true kernel = hypervisor = false + # default threads + threads = true for (i, opt) in enumerate(opts) if i == 1 && !(opt isa Expr && opt.head == :(=)) events = :(parse_groups($(esc(opt)))) elseif opt isa Expr && opt.head == :(=) key, val = opt.args + val = esc(val) if key == :user - user = esc(val) + user = val elseif key == :kernel - kernel = esc(val) + kernel = val elseif key == :hypervisor - hypervisor = esc(val) + hypervisor = val + elseif key == :threads + threads = val else error("unknown key: $(key)") end @@ -551,7 +578,7 @@ function parse_pstats_options(opts) error("unknown option: $(opt)") end end - return (events = events, spaces = :($(user), $(kernel), $(hypervisor)), ) + return (events = events, spaces = :($(user), $(kernel), $(hypervisor)), threads = threads,) end # syntax: groups = (group ',')* group @@ -696,11 +723,12 @@ function skipws(str, i) return i end -struct Stats +struct ThreadStats + pid::Cint groups::Vector{Vector{Counter}} end -function Stats(b::PerfBench) +function ThreadStats(b::PerfBench) groups = Vector{Counter}[] for g in b.groups values = Vector{UInt64}(undef, length(g)+1+2) @@ -709,15 +737,15 @@ function Stats(b::PerfBench) enabled, running = values[2], values[3] push!(groups, [Counter(g.event_types[i], values[3+i], enabled, running) for i in 1:length(g)]) end - return Stats(groups) + return ThreadStats(b.pid, groups) end -function Base.haskey(stats::Stats, name::AbstractString) +function Base.haskey(stats::ThreadStats, name::AbstractString) event = NAME_TO_EVENT[name] return any(counter.event == event for group in stats.groups for counter in group) end -function Base.getindex(stats::Stats, name::AbstractString) +function Base.getindex(stats::ThreadStats, name::AbstractString) event = NAME_TO_EVENT[name] for group in stats.groups, counter in group if counter.event == event @@ -727,10 +755,96 @@ function Base.getindex(stats::Stats, name::AbstractString) throw(KeyError(name)) end -function Base.show(io::IO, stats::Stats) - w = 2 + 23 + 18 - println(io, '━'^w) - for group in stats.groups +function Base.show(io::IO, stats::ThreadStats) + println(io, stats.pid) + printcounts(io, stats.groups) +end + +isenabled(counter::Counter) = counter.enabled > 0 +isrun(counter::Counter) = counter.running > 0 +fillrate(counter::Counter) = counter.running / counter.enabled +scaledcount(counter::Counter) = counter.value * (counter.enabled / counter.running) + +struct Stats + threads::Vector{ThreadStats} +end + +Stats(b::PerfBenchThreaded) = Stats(map(ThreadStats, b.data)) + +Base.show(io::IO, stats::Stats) = printsummary(io, stats) + +printsummary(stats::Stats; kwargs...) = printsummary(stdout, stats; kwargs...) + +function printsummary(io::IO, stats::Stats; expand::Bool = false, skipdisabled::Bool = true) + printsep(io, '━') + println(io) + if isempty(stats.threads) + print(io, "no threads") + return + end + + # aggregate all counts + n_aggregated = 0 + counts = deepcopy(stats.threads[1].groups) + for i in 2:length(stats.threads) + t = stats.threads[i] + if skipdisabled && !any(isenabled, c for g in t.groups for c in g) + continue + end + if expand + println(io, "TID = ", t.pid) + printcounts(io, t.groups) + printsep(io, '┄') + #printsep(io, '─') + println(io) + end + for (j, g) in enumerate(t.groups) + for (k, c) in enumerate(g) + c′ = counts[j][k] + @assert c′.event == c.event + counts[j][k] = Counter( + c.event, + c.value + c′.value, + c.enabled + c′.enabled, + c.running + c′.running + ) + end + end + n_aggregated += 1 + end + + for g in counts, c in g + if !isrun(c) + @warn "Some events are not measured" + return + end + end + + if expand + println(io, "Aggregated") + end + printcounts(io, counts) + if n_aggregated > 1 + println(io, lpad("(aggregated from $(n_aggregated) threads)", TABLE_WIDTH)) + end + printsep(io, '━') +end + +const TABLE_WIDTH = 2 + 23 + 18 +printsep(io::IO, c::Char) = print(io, c^TABLE_WIDTH) + +function printcounts(io::IO, groups::Vector{Vector{Counter}}) + for group in groups + function findcount(name) + event = NAME_TO_EVENT[name] + for c in group + c.event == event && return c + end + for g in groups, c in g + c.event == event && return c + end + return nothing + end for i in 1:length(group) # grouping character if length(group) == 1 @@ -757,8 +871,8 @@ function Base.show(io::IO, stats::Stats) # show a comment if name == "cpu-cycles" @printf(io, " # %4.1f cycles per ns", counter.value / counter.running) - elseif name == "instructions" && haskey(stats, "cpu-cycles") - @printf(io, " # %4.1f insns per cycle", scaledcount(counter) / scaledcount(stats["cpu-cycles"])) + elseif name == "instructions" && (cycles = findcount("cpu-cycles")) !== nothing + @printf(io, " # %4.1f insns per cycle", scaledcount(counter) / scaledcount(cycles)) elseif name == "cpu-clock" || name == "task-clock" clk = float(scaledcount(counter)) if clk ≥ 1e9 @@ -786,8 +900,8 @@ function Base.show(io::IO, stats::Stats) ("dTLB-load-misses", "dTLB-loads", "dTLB loads"), ("iTLB-load-misses", "iTLB-loads", "iTLB loads"), ] - if name == num && haskey(stats, den) - @printf(io, " # %4.1f%% of %s", scaledcount(counter) / scaledcount(stats[den]) * 100, label) + if name == num && (d = findcount(den)) !== nothing + @printf(io, " # %4.1f%% of %s", scaledcount(counter) / scaledcount(d) * 100, label) break end end @@ -796,21 +910,6 @@ function Base.show(io::IO, stats::Stats) println(io) end end - print(io, '━'^w) -end - -isenabled(counter::Counter) = counter.enabled > 0 -isrun(counter::Counter) = counter.running > 0 -fillrate(counter::Counter) = counter.running / counter.enabled -scaledcount(counter::Counter) = counter.value * (counter.enabled / counter.running) - -function checkstats(stats::Stats) - for group in stats.groups, counter in group - if !isrun(counter) - @warn "Some events are not measured" - return - end - end end function set_default_spaces(groups, (u, k, h)) @@ -893,6 +992,10 @@ space is activated by default (i.e., `user` is `true` but `kernel` and modifier to events you are interested in or pass `kernel=true` to the macro, which globally activates events in kernel space. +All threads are measured and event counts are aggregated by default. Passing +`threads=false` to the macro disables this feature and only measures events +that occurred in the current thread invoking the macro. + For more details, see perf_event_open(2)'s manual page. # Examples @@ -935,14 +1038,13 @@ macro pstats(args...) (function () groups = set_default_spaces($(opts.events), $(opts.spaces)) @debug dump_groups(groups) - bench = make_bench(groups, userspace_only = false) + bench = make_bench_threaded(groups, threads = $(opts.threads)) try enable!(bench) val = $(esc(expr)) disable!(bench) # trick the compiler not to eliminate the code stats = rand() < 0 ? val : Stats(bench) - checkstats(stats) return stats::Stats catch rethrow() From e838806c88b397fdc0fce6fc594d30dac2a7198f Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Mon, 29 Jun 2020 14:28:35 +0900 Subject: [PATCH 23/25] Fix --- src/LinuxPerf.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/LinuxPerf.jl b/src/LinuxPerf.jl index 157c58b..4d301da 100644 --- a/src/LinuxPerf.jl +++ b/src/LinuxPerf.jl @@ -816,7 +816,7 @@ function printsummary(io::IO, stats::Stats; expand::Bool = false, skipdisabled:: for g in counts, c in g if !isrun(c) @warn "Some events are not measured" - return + break end end From 444edea5648714d972e63b2f390ab8b704e7c01f Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Tue, 30 Jun 2020 11:34:15 +0900 Subject: [PATCH 24/25] Fix printsummary --- src/LinuxPerf.jl | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/src/LinuxPerf.jl b/src/LinuxPerf.jl index 4d301da..7a21d92 100644 --- a/src/LinuxPerf.jl +++ b/src/LinuxPerf.jl @@ -757,7 +757,7 @@ end function Base.show(io::IO, stats::ThreadStats) println(io, stats.pid) - printcounts(io, stats.groups) + printcounters(io, stats.groups) end isenabled(counter::Counter) = counter.enabled > 0 @@ -785,24 +785,23 @@ function printsummary(io::IO, stats::Stats; expand::Bool = false, skipdisabled:: # aggregate all counts n_aggregated = 0 - counts = deepcopy(stats.threads[1].groups) - for i in 2:length(stats.threads) + counters = [[Counter(c.event, 0, 0, 0) for c in g] for g in stats.threads[1].groups] + for i in 1:length(stats.threads) t = stats.threads[i] if skipdisabled && !any(isenabled, c for g in t.groups for c in g) continue end if expand - println(io, "TID = ", t.pid) - printcounts(io, t.groups) + println(io, "TID = ", t.pid) # label + printcounters(io, t.groups) printsep(io, '┄') - #printsep(io, '─') println(io) end for (j, g) in enumerate(t.groups) for (k, c) in enumerate(g) - c′ = counts[j][k] + c′ = counters[j][k] @assert c′.event == c.event - counts[j][k] = Counter( + counters[j][k] = Counter( c.event, c.value + c′.value, c.enabled + c′.enabled, @@ -813,17 +812,15 @@ function printsummary(io::IO, stats::Stats; expand::Bool = false, skipdisabled:: n_aggregated += 1 end - for g in counts, c in g + for g in counters, c in g if !isrun(c) @warn "Some events are not measured" break end end - if expand - println(io, "Aggregated") - end - printcounts(io, counts) + expand && n_aggregated > 1 && println(io, "Aggregated") # label + printcounters(io, counters) if n_aggregated > 1 println(io, lpad("(aggregated from $(n_aggregated) threads)", TABLE_WIDTH)) end @@ -833,7 +830,7 @@ end const TABLE_WIDTH = 2 + 23 + 18 printsep(io::IO, c::Char) = print(io, c^TABLE_WIDTH) -function printcounts(io::IO, groups::Vector{Vector{Counter}}) +function printcounters(io::IO, groups::Vector{Vector{Counter}}) for group in groups function findcount(name) event = NAME_TO_EVENT[name] From 1ea871b2581451fe6c26027b7c23494f18c4d96b Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Tue, 30 Jun 2020 12:13:20 +0900 Subject: [PATCH 25/25] Tweaks --- src/LinuxPerf.jl | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/src/LinuxPerf.jl b/src/LinuxPerf.jl index 7a21d92..3cad214 100644 --- a/src/LinuxPerf.jl +++ b/src/LinuxPerf.jl @@ -775,7 +775,16 @@ Base.show(io::IO, stats::Stats) = printsummary(io, stats) printsummary(stats::Stats; kwargs...) = printsummary(stdout, stats; kwargs...) -function printsummary(io::IO, stats::Stats; expand::Bool = false, skipdisabled::Bool = true) +""" + printsummary([io,] stats::Stats; expandthreads = false, skipinactive = true) + +Print summary of event statistics. + +If `expandthreads` is `true`, the statistics of each thread are printed with +its thread ID (TID). If `skipinactive` is `true`, the statistics from +unmeasured (inactive) threads are ignored. +""" +function printsummary(io::IO, stats::Stats; expandthreads::Bool = false, skipinactive::Bool = true) printsep(io, '━') println(io) if isempty(stats.threads) @@ -786,13 +795,13 @@ function printsummary(io::IO, stats::Stats; expand::Bool = false, skipdisabled:: # aggregate all counts n_aggregated = 0 counters = [[Counter(c.event, 0, 0, 0) for c in g] for g in stats.threads[1].groups] - for i in 1:length(stats.threads) - t = stats.threads[i] - if skipdisabled && !any(isenabled, c for g in t.groups for c in g) + for t in stats.threads + if skipinactive && !any(isenabled, c for g in t.groups for c in g) continue end - if expand - println(io, "TID = ", t.pid) # label + n_aggregated += 1 + if expandthreads + println(io, "Thread #$(n_aggregated) (TID = $(t.pid))") # label printcounters(io, t.groups) printsep(io, '┄') println(io) @@ -809,7 +818,6 @@ function printsummary(io::IO, stats::Stats; expand::Bool = false, skipdisabled:: ) end end - n_aggregated += 1 end for g in counters, c in g @@ -819,10 +827,10 @@ function printsummary(io::IO, stats::Stats; expand::Bool = false, skipdisabled:: end end - expand && n_aggregated > 1 && println(io, "Aggregated") # label + expandthreads && n_aggregated > 1 && println(io, "Aggregated") # label printcounters(io, counters) if n_aggregated > 1 - println(io, lpad("(aggregated from $(n_aggregated) threads)", TABLE_WIDTH)) + println(io, lpad("aggregated from $(n_aggregated) threads", TABLE_WIDTH)) end printsep(io, '━') end @@ -834,26 +842,21 @@ function printcounters(io::IO, groups::Vector{Vector{Counter}}) for group in groups function findcount(name) event = NAME_TO_EVENT[name] + # try to find within the same group for c in group c.event == event && return c end + # fall back to other groups for g in groups, c in g c.event == event && return c end return nothing end - for i in 1:length(group) + for (i, counter) in enumerate(group) # grouping character - if length(group) == 1 - c = '╶' - elseif i == 1 - c = '┌' - elseif i == length(group) - c = '└' - else - c = '│' - end - counter = group[i] + c = length(group) == 1 ? '╶' : + i == 1 ? '┌' : + i == length(group) ? '└' : '│' event = counter.event name = EVENT_TO_NAME[event] @printf io "%-2s%-23s" c name