Skip to content

Commit 330c6b2

Browse files
avik-palgiordano
andauthored
ci: update the continuous benchmarking ci to use the new machines (#1632)
* ci: update the continuous benchmarking ci to use the new machines * ci: test limited options for now * ci: aggregate * ci: dont run on draft PRs * perf: enable all benchmarks * perf: move Neural Operators benchmarks to benchmark/ * fix: disable fno for now * ci: restrict runners to PR opt-in and nightly on main * test: does label work * ci: test on smaller batch sizes * ci: cancel in-progress builds * feat: re-enable fno bench * perf: only bench small model * perf: use reactant_device till rng case is sorted * Update .github/workflows/benchmark.yml Co-authored-by: Mosè Giordano <[email protected]> * fix: compat bounds * ci: add timeout * ci: enable cuda * fix: temporarily disable transpose_reshape * perf: rework to use chairmarks for faster benchmarking * perf: print compile times * perf: print versioninfo * ci: run benchmarks under pprof * perf: reduce benchmark set for now * perf: more selective about cpu benchmarks * ci: upload benchmarks for debugging * perf: ablate on transpose opts * Update .github/workflows/benchmark.yml Co-authored-by: Mosè Giordano <[email protected]> * Update src/Compiler.jl * Update Reactant_jll version to 0.0.240 --------- Co-authored-by: Mosè Giordano <[email protected]>
1 parent abf0d1b commit 330c6b2

17 files changed

+496
-821
lines changed

.buildkite/pipeline.yml

Lines changed: 0 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -48,77 +48,6 @@ steps:
4848
if: build.message !~ /\[skip tests\]/
4949
timeout_in_minutes: 120
5050

51-
# - group: ":racehorse: Benchmarks"
52-
# steps:
53-
# - label: "CPU: Run Benchmarks"
54-
# plugins:
55-
# - JuliaCI/julia#v1:
56-
# version: "1"
57-
# command: |
58-
# julia --project=benchmark -e 'println("--- :julia: Instantiating project")
59-
# using Pkg
60-
# Pkg.develop([PackageSpec(path=pwd()), PackageSpec(path="lib/ReactantCore")])'
61-
62-
# julia --project=benchmark -e 'println("--- :julia: Run Benchmarks")
63-
# include("benchmark/runbenchmarks.jl")'
64-
# artifact_paths:
65-
# - "benchmark/results/*"
66-
# agents:
67-
# # Models are quite large so we need a decent sized machine. Don't tell Chris we
68-
# # are stealing SciMLBenchmarks machine :P
69-
# queue: "juliaecosystem"
70-
# sandbox_capable: true
71-
# exclusive: true
72-
# arch: "x86_64"
73-
# env:
74-
# BENCHMARK_GROUP: CPU
75-
# JULIA_NUM_THREADS: "auto"
76-
# timeout_in_minutes: 120
77-
78-
# - label: "CUDA: Run Benchmarks"
79-
# plugins:
80-
# - JuliaCI/julia#v1:
81-
# version: "1"
82-
# command: |
83-
# julia --project=benchmark -e 'println("--- :julia: Instantiating project")
84-
# using Pkg
85-
# Pkg.develop([PackageSpec(path=pwd()), PackageSpec(path="lib/ReactantCore")])'
86-
87-
# julia --project=benchmark -e 'println("--- :julia: Run Benchmarks")
88-
# include("benchmark/runbenchmarks.jl")'
89-
# artifact_paths:
90-
# - "benchmark/results/*"
91-
# agents:
92-
# queue: "benchmark"
93-
# gpu: "rtx4070"
94-
# cuda: "*"
95-
# env:
96-
# BENCHMARK_GROUP: CUDA
97-
# JULIA_NUM_THREADS: "auto"
98-
# timeout_in_minutes: 120
99-
100-
# - wait: ~
101-
# continue_on_failure: true
102-
103-
# - label: "Combine benchmarks"
104-
# plugins:
105-
# - JuliaCI/julia#v1:
106-
# version: "1"
107-
# command: |
108-
# buildkite-agent artifact download "benchmark/results/*" .
109-
110-
# julia -e 'println("--- :julia: Instantiating project")
111-
# using Pkg
112-
# Pkg.add("BenchmarkTools")
113-
114-
# println("--- :julia: Combining Benchmarks")
115-
# include("benchmark/aggregate.jl")'
116-
# artifact_paths:
117-
# - "benchmark/results/combinedbenchmarks.json"
118-
# agents:
119-
# queue: "juliagpu"
120-
# timeout_in_minutes: 10
121-
12251
# - label: "AMDGPU Julia v{{matrix.version}}"
12352
# matrix:
12453
# setup:

.github/workflows/benchmark.yml

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
name: Benchmarks
2+
3+
permissions:
4+
contents: write # contents permission to update benchmark contents in gh-pages branch
5+
statuses: read
6+
deployments: write # deployments permission to deploy GitHub pages website
7+
pull-requests: write
8+
9+
on:
10+
schedule:
11+
- cron: '0 3 * * *' # Nightly at 3am UTC
12+
workflow_dispatch:
13+
# Manual trigger
14+
pull_request:
15+
types: [labeled, unlabeled, synchronize, opened, reopened]
16+
paths:
17+
- ".github/workflows/benchmark.yml"
18+
- "ext/**"
19+
- "lib/**"
20+
- "src/**"
21+
- "Project.toml"
22+
23+
concurrency:
24+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
25+
cancel-in-progress: true
26+
27+
jobs:
28+
benchmark:
29+
timeout-minutes: 90
30+
if: ${{ (github.ref == 'refs/heads/main' && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')) || (github.event_name == 'pull_request' && contains(join(github.event.pull_request.labels.*.name, ','), 'run benchmarks')) }}
31+
runs-on: ${{ matrix.os }}
32+
container:
33+
image: ${{ contains(matrix.os, 'linux') && 'ghcr.io/enzymead/reactant-docker-images@sha256:7004a6ebbdd77bd047900b2bffc542e8576864056dc27a9c94d30666d6f7ea01' || '' }}
34+
strategy:
35+
fail-fast: false
36+
matrix:
37+
os:
38+
- linux-x86-n2-32
39+
- linux-x86-ct6e-180-4tpu
40+
- linux-x86-a2-48-a100-4gpu
41+
steps:
42+
- uses: actions/checkout@v4
43+
- uses: julia-actions/setup-julia@v2
44+
with:
45+
version: "1"
46+
- uses: julia-actions/cache@v2
47+
- name: "Instantiate benchmarks environment"
48+
shell: julia --color=yes --project=benchmark {0}
49+
run: |
50+
using Pkg
51+
Pkg.instantiate()
52+
- name: "Run Benchmarks"
53+
run: |
54+
julia --color=yes --project=benchmark benchmark/runbenchmarks.jl
55+
- name: Upload PProf Results
56+
uses: actions/upload-artifact@v4
57+
timeout-minutes: 10
58+
with:
59+
name: pprof-results-${{ matrix.os }}
60+
path: "**/*pb.gz"
61+
retention-days: 90
62+
overwrite: false
63+
- name: Upload Benchmark Results
64+
uses: actions/upload-artifact@v4
65+
timeout-minutes: 10
66+
with:
67+
name: benchmark-results-${{ matrix.os }}
68+
path: "benchmark/results/*"
69+
retention-days: 90
70+
overwrite: false
71+
72+
benchmark-aggregate:
73+
if: ${{ (github.ref == 'refs/heads/main' && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')) || (github.event_name == 'pull_request' && contains(join(github.event.pull_request.labels.*.name, ','), 'run benchmarks')) }}
74+
needs: benchmark
75+
runs-on: ubuntu-latest
76+
steps:
77+
- uses: actions/checkout@v5
78+
- uses: julia-actions/setup-julia@v2
79+
with:
80+
version: "1"
81+
- uses: julia-actions/cache@v2
82+
- uses: actions/download-artifact@v5
83+
with:
84+
pattern: benchmark-results-*
85+
path: benchmark/results
86+
merge-multiple: true
87+
- name: Combine benchmarks
88+
id: locate
89+
run: |
90+
julia --color=yes -e '@info "Instantiating project"
91+
using Pkg;
92+
Pkg.add("JSON3");
93+
@info "Combining Benchmarks"
94+
include("benchmark/aggregate.jl")'
95+
96+
echo "path=$(find benchmark -type f -name combinedbenchmarks.json 2>/dev/null)" >> $GITHUB_OUTPUT
97+
- name: Upload benchmark results as artifact
98+
uses: actions/upload-artifact@v4
99+
with:
100+
name: benchmark-results
101+
path: ${{ steps.locate.outputs.path }}
102+
retention-days: 90
103+
overwrite: false
104+
- name: Upload Benchmark Results
105+
uses: benchmark-action/github-action-benchmark@v1
106+
with:
107+
name: Reactant.jl Benchmarks
108+
tool: "customSmallerIsBetter"
109+
output-file-path: ${{ steps.locate.outputs.path }}
110+
benchmark-data-dir-path: "benchmarks"
111+
github-token: ${{ secrets.GITHUB_TOKEN }}
112+
comment-always: true
113+
summary-always: true
114+
alert-threshold: "150%"
115+
fail-on-alert: false
116+
auto-push: ${{ github.event_name != 'pull_request' }}
117+
max-items-in-chart: 50

.github/workflows/benchmark_aggregate.yml

Lines changed: 0 additions & 58 deletions
This file was deleted.

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ PythonCall = "0.9.25"
100100
Random = "1.10"
101101
Random123 = "1.7"
102102
ReactantCore = "0.1.15"
103-
Reactant_jll = "0.0.239"
103+
Reactant_jll = "0.0.240"
104104
ScopedValues = "1.3.0"
105105
Scratch = "1.2"
106106
Sockets = "1.10"

benchmark/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
results

benchmark/Project.toml

Lines changed: 18 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,27 @@
11
[deps]
2-
AppleAccelerate = "13e28ba4-7ad8-5781-acae-3021b1ed3924"
3-
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
42
Boltz = "4544d5e4-abc5-4dea-817f-29e4c205d9c8"
5-
CpuId = "adafc99b-e345-5852-983c-f28acb93d879"
3+
Chairmarks = "0ca39b1e-fe0b-4e98-acfc-b1656634c4de"
64
Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
7-
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
5+
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
86
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
97
Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
10-
LuxCUDA = "d0bbae9a-e099-4d5b-a835-1c6931763bda"
11-
MKL = "33e6dc65-8f57-5167-99aa-e5a354878fb2"
12-
MLDataDevices = "7e8f7934-dd98-4c1a-8fe8-92b47a384d40"
13-
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
8+
NeuralOperators = "ea5c82af-86e5-48da-8ee1-382d6ad7af4b"
9+
PProf = "e4faabce-9ead-11e9-39d9-4379958e3056"
10+
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
1411
Reactant = "3c362404-f566-11ee-1572-e11a4b42c853"
15-
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
16-
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
12+
13+
[sources]
14+
Reactant = {path = ".."}
1715

1816
[compat]
19-
BenchmarkTools = "1.5"
20-
Boltz = "1"
21-
Enzyme = "0.13"
22-
Lux = "1.1"
23-
Random = "1.10"
17+
Boltz = "1.7"
18+
Chairmarks = "1"
19+
Enzyme = "0.13.73"
20+
JSON3 = "1"
21+
LinearAlgebra = "1.10"
22+
Lux = "1.21"
23+
NeuralOperators = "0.6.2"
24+
PProf = "3"
25+
Printf = "1.10"
26+
Reactant = "0.2.161"
2427
julia = "1.10"
25-
26-
[extras]
27-
CUDA_Driver_jll = "4ee394cb-3365-5eb0-8335-949819d2adfc"
28-
29-
[preferences.CUDA_Driver_jll]
30-
compat = false
31-
32-
[sources]
33-
Reactant = { path = ".." }

benchmark/aggregate.jl

Lines changed: 11 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,19 @@
1-
using BenchmarkTools
1+
using JSON3
22

3-
const BACKENDS = ["CPU", "CUDA"]
3+
const BACKENDS = ["CPU", "CUDA", "TPU"]
44

5-
const CPU_Results = joinpath(dirname(@__FILE__), "results", "CPUbenchmarks.json")
6-
@assert(ispath(CPU_Results))
7-
8-
const RESULTS = BenchmarkTools.load(CPU_Results)[1]
9-
@assert RESULTS isa BenchmarkTools.BenchmarkGroup
10-
11-
for backend in BACKENDS[2:end]
12-
@info "Aggregating results for $(backend)"
5+
all_results = []
6+
for backend in BACKENDS
137
filename = string(backend, "benchmarks.json")
148
filepath = joinpath(dirname(@__FILE__), "results", filename)
15-
if !ispath(filepath)
16-
@warn "No file found at path: $(filepath)"
9+
if ispath(filepath)
10+
results = JSON3.read(read(filepath, String))
11+
append!(all_results, results)
1712
else
18-
backend_results = BenchmarkTools.load(filepath)[1]
19-
if backend_results isa BenchmarkTools.BenchmarkGroup
20-
# <benchmark name>/<forward or reverse>/<backend>/<reactant or package>
21-
for benchmark in keys(RESULTS)
22-
for pass in keys(RESULTS[benchmark])
23-
for pkg in keys(backend_results[benchmark][pass][backend])
24-
RESULTS[benchmark][pass][backend][pkg] = backend_results[benchmark][pass][backend][pkg]
25-
end
26-
end
27-
end
28-
else
29-
@warn "Unexpected file format for file at path: $(filepath)"
30-
end
13+
@warn "No file found at path: $(filepath)"
3114
end
3215
end
3316

34-
BenchmarkTools.save(
35-
joinpath(dirname(@__FILE__), "results", "combinedbenchmarks.json"), RESULTS
36-
)
17+
open(joinpath(dirname(@__FILE__), "results", "combinedbenchmarks.json"), "w") do io
18+
JSON3.pretty(io, JSON3.write(all_results))
19+
end

0 commit comments

Comments
 (0)