Add example scripts to run pyperformance on a generic host (#436)

diegorusso · web-flow · commit e485ba9bc380 · 2025-11-11T14:48:12.000Z
diff --git a/examples/benchmarking-scripts/README.md b/examples/benchmarking-scripts/README.md
@@ -0,0 +1,23 @@
+# Benchmarking Scripts Toolkit
+
+Companion assets for running `pyperformance` benchmarks on hosts that provide isolated CPUs and for backfilling historical CPython revisions to [speed.python.org](https://speed.python.org/).
+
+## Contents
+- `run-pyperformance.sh` – shell wrapper that reserves an isolated CPU (175–191) via lockfiles, renders `benchmark.conf` from `benchmark.conf.in` with `m4`, sets up a virtual environment, and runs `pyperformance` with upload enabled.
+- `benchmark.conf.in` – template consumed by the wrapper; placeholders `TMPDIR` and `CPUID` are filled in so each run has its own working tree, build directory, and CPU affinity.
+- `backfill.py` – Python helper that reads revisions from `backfill_shas.txt` and launches multiple `run-pyperformance.sh` jobs in parallel, capturing stdout/stderr per revision under `output/`.
+- `backfill_shas.txt` – example list of `sha=branch` pairs targeted by the backfill script.
+
+## Typical Workflow
+1. Ensure kernel CPU isolation (`isolcpus=175-191`) and the `lockfile` utility are available so the wrapper can pin workloads without contention.
+2. Invoke `./run-pyperformance.sh -- compile benchmark.conf <sha> <branch>` for an ad-hoc run; the script installs `pyperformance==1.13.0`, clones CPython, and uploads results using the environment label configured in `benchmark.conf.in`.
+3. Populate `backfill_shas.txt` with the revisions you want to replay and run `python backfill.py` to batch process them; individual logs land in `output/<branch>-<sha>.out|.err`.
+
+Adjust `benchmark.conf.in` if you need to change build parameters (PGO/LTO, job count, upload target, etc.).
+
+## Scheduled Runs
+If you want a daily unattended run, drop an entry like this into `crontab -e` on the host:
+
+```
+0 0 * * * cd /home/user/pyperformance/examples/benchmarking-scripts && ./run-pyperformance.sh -- compile_all benchmark.conf > /home/pyperf/pyperformance/cron.log 2>&1
+```
diff --git a/examples/benchmarking-scripts/backfill.py b/examples/benchmarking-scripts/backfill.py
@@ -0,0 +1,55 @@
+import signal
+import subprocess
+from multiprocessing import Pool
+from pathlib import Path
+
+"""
+Parallel backfilling helper for pyperformance runs on isolated CPUs.
+
+Reads `sha=branch` pairs from backfill_shas.txt, invokes run-pyperformance.sh
+for each revision, and lets that wrapper pin the workload to an isolated CPU,
+materialize benchmark.conf, build CPython, and upload results to
+speed.python.org. Stdout/stderr for each revision are captured under
+output/<branch>-<sha>.(out|err).
+"""
+
+
+def get_revisions() -> tuple[str, str]:
+    revisions = []
+    with open("backfill_shas.txt", "r") as f:
+        for line in f:
+            sha, branch = line.split("=")
+            revisions.append((sha, branch.rstrip()))
+    return revisions
+
+
+def run_pyperformance(revision):
+    sha, branch = revision
+    print(f"Running run-pyperformance.sh with sha: {sha}, branch: {branch}")
+    output_dir = Path("output")
+    output_dir.mkdir(parents=True, exist_ok=True)
+    out_file = output_dir / f"{branch}-{sha}.out"
+    err_file = output_dir / f"{branch}-{sha}.err"
+    with open(out_file, "w") as output, open(err_file, "w") as error:
+        subprocess.run(
+            [
+                "./run-pyperformance.sh",
+                "-x",
+                "--",
+                "compile",
+                "benchmark.conf",
+                sha,
+                branch,
+            ],
+            stdout=output,
+            stderr=error,
+        )
+
+
+if __name__ == "__main__":
+    original_sigint_handler = signal.signal(signal.SIGINT, signal.SIG_IGN)
+    signal.signal(signal.SIGINT, original_sigint_handler)
+    with Pool(8) as pool:
+        res = pool.map_async(run_pyperformance, get_revisions())
+        # Without the timeout this blocking call ignores all signals.
+        res.get(86400)
diff --git a/examples/benchmarking-scripts/backfill_shas.txt b/examples/benchmarking-scripts/backfill_shas.txt
@@ -0,0 +1,3 @@
+5d2edf72d25c2616f0e13d10646460a8e69344fa=main
+bd2c7e8c8b10f4d31eab971781de13844bcd07fe=main
+29b38b7aae884c14085a918282ea7f0798ed7a2a=main
diff --git a/examples/benchmarking-scripts/benchmark.conf.in b/examples/benchmarking-scripts/benchmark.conf.in
@@ -0,0 +1,102 @@
+[config]
+# Directory where JSON files are written.
+# - uploaded files are moved to json_dir/uploaded/
+# - results of patched Python are written into json_dir/patch/
+json_dir = TMPDIR/json
+
+# If True, compile CPython is debug mode (LTO and PGO disabled),
+# run benchmarks with --debug-single-sample, and disable upload.
+#
+# Use this option used to quickly test a configuration.
+debug = False
+
+
+[scm]
+# Directory of CPython source code (Git repository)
+repo_dir = TMPDIR/cpython
+
+# Update the Git repository (git fetch)?
+update = True
+
+# Name of the Git remote, used to create revision of
+# the Git branch. For example, use revision 'remotes/origin/3.6'
+# for the branch '3.6'.
+git_remote = remotes/origin
+
+
+[compile]
+# Create files into bench_dir:
+# - bench_dir/bench-xxx.log
+# - bench_dir/prefix/: where Python is installed
+# - bench_dir/venv/: Virtual environment used by pyperformance
+bench_dir = TMPDIR/bench_tmpdir
+
+# Link Time Optimization (LTO)?
+lto = True
+
+# Profiled Guided Optimization (PGO)?
+pgo = True
+
+# The space-separated list of libraries that are package-only,
+# i.e., locally installed but not on header and library paths.
+# For each such library, determine the install path and add an
+# appropriate subpath to CFLAGS and LDFLAGS declarations passed
+# to configure. As an exception, the prefix for openssl, if that
+# library is present here, is passed via the --with-openssl
+# option. Currently, this only works with Homebrew on macOS.
+# If running on macOS with Homebrew, you probably want to use:
+#     pkg_only = openssl readline sqlite3 xz zlib
+# The version of zlib shipping with macOS probably works as well,
+# as long as Apple's SDK headers are installed.
+pkg_only =
+
+# Install Python? If false, run Python from the build directory
+#
+# WARNING: Running Python from the build directory introduces subtle changes
+# compared to running an installed Python. Moreover, creating a virtual
+# environment using a Python run from the build directory fails in many cases,
+# especially on Python older than 3.4. Only disable installation if you
+# really understand what you are doing!
+install = True
+
+# Specify '-j' parameter in 'make' command
+jobs = 24
+
+[run_benchmark]
+# Run "sudo python3 -m pyperf system tune" before running benchmarks?
+system_tune = False
+
+# --manifest option for 'pyperformance run'
+manifest =
+
+# --benchmarks option for 'pyperformance run'
+benchmarks =
+
+# --affinity option for 'pyperf system tune' and 'pyperformance run'
+affinity = CPUID
+
+# Upload generated JSON file?
+#
+# Upload is disabled on patched Python, in debug mode or if install is
+# disabled.
+upload = True
+
+# Configuration to upload results to a Codespeed website
+[upload]
+url = https://speed.python.org/
+# environment-name should be created on speed.python.org
+environment = environment-name
+executable = lto-pgo
+project = CPython
+
+[compile_all]
+# List of CPython Git branches
+branches = main
+
+
+# List of revisions to benchmark by compile_all
+[compile_all_revisions]
+# list of 'sha1=' (default branch: 'main') or 'sha1=branch'
+# used by the "pyperformance compile_all" command
+# e.g.:
+# 11159d2c9d6616497ef4cc62953a5c3cc8454afb =
diff --git a/examples/benchmarking-scripts/run-pyperformance.sh b/examples/benchmarking-scripts/run-pyperformance.sh
@@ -0,0 +1,134 @@
+#!/bin/bash
+
+
+# Wrapper around pyperformance for hosts with isolated CPUs. Reserves a CPU
+# (175-191) via lockfiles, renders benchmark.conf with m4, bootstraps a venv,
+# and runs pyperformance pinned to that CPU. Requires kernel isolcpus=175-191
+# and the lockfile utility so concurrent runs do not collide, which is
+# especially helpful when backfilling multiple revisions.
+
+
+set -e
+set -u
+set -o pipefail
+
+lock_file=
+tmpdir=
+cleanup()
+{
+  if [[ -n "${lock_file:-}" ]]; then
+    echo "Removing $lock_file"
+    rm -f "$lock_file"
+  fi
+  if [[ -n "${tmpdir:-}" ]]; then
+    echo "Removing $tmpdir"
+    rm -fr "$tmpdir"
+  fi
+  exit
+}
+
+trap cleanup EXIT
+
+usage()
+{
+  cat <<EOF
+
+usage: run-pyperformance.sh [OPTION]...
+
+ -h, --help
+    print some basic usage information and exit
+ -x
+    enable tracing in this shells script
+
+Note: if you want to pass arguments to pyperformance append "--" followed by the arguments.
+EOF
+}
+
+args=$(getopt -o+hx -l help -n $(basename "$0") -- "$@")
+eval set -- "$args"
+while [ $# -gt 0 ]; do
+  if [ -n "${opt_prev:-}" ]; then
+    eval "$opt_prev=\$1"
+    opt_prev=
+    shift 1
+    continue
+  elif [ -n "${opt_append:-}" ]; then
+    if [ -n "$1" ]; then
+      eval "$opt_append=\"\${$opt_append:-} \$1\""
+    fi
+    opt_append=
+    shift 1
+    continue
+  fi
+  case $1 in
+  -h | --help)
+    usage
+    exit 0
+    ;;
+
+  -x)
+    set -x
+    ;;
+
+  --)
+    shift
+    break 2
+    ;;
+  esac
+  shift 1
+done
+
+
+# We have the latest 16 CPUs (ID 175-191) for running pyperformance and we want
+# to make sure that pyperformance runs with affinity on one of these CPUs.
+# In order to do that a locking mechanism is implemented in order to "reserve"
+# a CPU being used.
+# Locking files are in /var/lock/pyperformance-CPUID (where CPUID is 175, 176... 191)
+# Linux is booted with
+#
+# GRUB_CMDLINE_LINUX="isolcpus=175-191 mitigations=off"
+#
+# in the /etc/default/grub file
+lock_prefix_path="/var/lock/pyperformance-"
+
+for i in $(seq 175 191); do
+  lock_file="$lock_prefix_path$i"
+  # lockfile is provided byt the `procmail` package
+  if lockfile -r0 "$lock_file"; then
+    # Let's save the CPUID to set the affinity later
+    cpuid=$i
+    break
+  fi
+done
+
+if [ -z ${cpuid+x} ]; then
+  echo "Cannot find an available CPU to run pyperformance on. Exiting...";
+  # Disable the trap as we don't need to clean up anything
+  trap - EXIT
+  exit 1
+fi
+
+# Create a temporary directory
+tmpdir=$(mktemp -d -t pyperformance.XXXXXXXXX)
+
+echo "Pyperformance will be run on CPU $cpuid"
+echo "Working directory is $tmpdir"
+
+# Snapshot the benchmark.conf file
+m4 \
+  -DTMPDIR="$tmpdir" \
+  -DCPUID="$cpuid" \
+  benchmark.conf.in > "$tmpdir/benchmark.conf"
+
+# This is our working directory from now on
+cd "$tmpdir"
+
+# Install pyperformance in a virtual env.
+python3 -m venv venv
+venv/bin/pip install pyperformance==1.13.0
+
+# Clone cpython
+git clone https://github.com/python/cpython.git
+
+# Run pyperformance
+venv/bin/pyperformance "$@"

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+5d2edf72d25c2616f0e13d10646460a8e69344fa=main`
	`2`	`+bd2c7e8c8b10f4d31eab971781de13844bcd07fe=main`
	`3`	`+29b38b7aae884c14085a918282ea7f0798ed7a2a=main`