diff --git a/.gitignore b/.gitignore index dc3f00389..8a932b213 100644 --- a/.gitignore +++ b/.gitignore @@ -94,3 +94,7 @@ docs/_build/ microbench/pixi.lock microbench/.venv_* microbench/tmp_* +microbench/profile/log* +microbench/profile/*.prof* +microbench/profile/*.log +microbench/profile/callgrind.out.* diff --git a/microbench/Makefile b/microbench/Makefile index 3044a8617..2cc149d15 100644 --- a/microbench/Makefile +++ b/microbench/Makefile @@ -12,12 +12,12 @@ uninstall: $(PYTHON) -m pip uninstall hpy.microbench --yes test: - $(PYTHON) -m pytest -v | tee tmp_results_$(shell $(PYTHON) -c "import sys; print(sys.implementation.name)").txt + $(PYTHON) -m pytest -v --purepy | tee $(shell $(PYTHON) -c 'import util as u; u.print_file_name_results()') bench: test bench_hpy: - $(PYTHON) -m pytest -v -m hpy | tee tmp_results_$(shell $(PYTHON) -c "import sys; print(sys.implementation.name)").txt + $(PYTHON) -m pytest -v -m hpy | tee $(shell $(PYTHON) get_file_name_results.py) clean: rm -f src/*.so src/hpy_simple.py @@ -39,19 +39,16 @@ create_venv_graalpy: $(shell uv python find graalpy) -m venv .venv_graalpy print_cpy: - @echo =================================== CPython ==================================== - @tail tmp_results_cpython.txt -n 29 + @$(PYTHON) print_tmp_results.py cpy print_pypy: - @echo ==================================== PyPy ====================================== - @tail tmp_results_pypy.txt -n 29 + @$(PYTHON) print_tmp_results.py pypy print_graalpy: - @echo =================================== GraalPy ==================================== - @tail tmp_results_graalpy.txt -n 29 + @$(PYTHON) print_tmp_results.py graalpy print_pypy_vs_cpy: - @$(PYTHON) print_other_vs_cpy.py PyPy + @$(PYTHON) print_other_vs_cpy.py pypy print_graalpy_vs_cpy: - @$(PYTHON) print_other_vs_cpy.py GraalPy + @$(PYTHON) print_other_vs_cpy.py graalpy diff --git a/microbench/conftest.py b/microbench/conftest.py index 8e553ffe7..4a43844c1 100644 --- a/microbench/conftest.py +++ b/microbench/conftest.py @@ -1,9 +1,12 @@ import re import time +import sys from collections import defaultdict import pytest import _valgrind +from util import get_short_prefix + class Timer: def __init__(self, nodeid): @@ -72,16 +75,31 @@ def format_ratio(self, reference, value): def display_summary(self, tr): w = tr.write_line w('') + tr.write_sep('=', "INFO", cyan=True) + w(f"implementation: {sys.implementation.name}") + w(f"short prefix: {get_short_prefix()}") tr.write_sep('=', 'BENCHMARKS', cyan=True) - w(' '*40 + ' cpy hpy') - w(' '*40 + '---------------- -------------------') + line = ' '*40 + ' cpy hpy' + if 'purepy' in self.apis: + line += ' purepy' + w(line) + line = ' '*40 + '---------------- -------------------' + if 'purepy' in self.apis: + line += ' -------------------' + w(line) for shortid, timings in self.table.items(): cpy = timings.get('cpy') hpy = timings.get('hpy') + purepy = timings.get('purepy') hpy_ratio = self.format_ratio(cpy, hpy) + purepy_ratio = self.format_ratio(cpy, purepy) cpy = cpy or '' hpy = hpy or '' - w(f'{shortid:<40} {cpy!s:>15} {hpy!s:>15} {hpy_ratio}') + purepy = purepy or '' + w( + f'{shortid:<40} {cpy!s:>15} {hpy!s:>15} {hpy_ratio} ' + f'{purepy!s:>15} {purepy_ratio}' + ) w('') @@ -95,6 +113,7 @@ def pytest_configure(config): config._timersession = TimerSession() config.addinivalue_line("markers", "hpy: mark modules using the HPy API") config.addinivalue_line("markers", "cpy: mark modules using the old Python/C API") + config.addinivalue_line("markers", "purepy: mark modules using pure Python") def pytest_addoption(parser): parser.addoption( @@ -103,7 +122,9 @@ def pytest_addoption(parser): parser.addoption( "--slow", action="store_true", default=False, help="run microbench slower" ) - + parser.addoption( + "--purepy", action="store_true", default=False, help="run pure Python microbenchmarks" + ) VERBOSE_TEST_NAME_LENGTH = 90 diff --git a/microbench/pixi.toml b/microbench/pixi.toml index 6c4267458..a27876608 100644 --- a/microbench/pixi.toml +++ b/microbench/pixi.toml @@ -12,3 +12,4 @@ gcc = ">=15.1.0,<15.2" valgrind = ">=3.25.0,<4" libffi = ">=3.4.6,<4" libxcrypt = ">=4.4.36,<5" +qcachegrind = ">=0.7.4,<0.8" diff --git a/microbench/print_other_vs_cpy.py b/microbench/print_other_vs_cpy.py old mode 100644 new mode 100755 index a303d6f4a..d1bd72115 --- a/microbench/print_other_vs_cpy.py +++ b/microbench/print_other_vs_cpy.py @@ -1,14 +1,21 @@ +#!/usr/bin/env python3 import sys from pathlib import Path +from util import capitalize_implementation, info_from_path, print_sep + try: - other = sys.argv[1] + arg = sys.argv[1] except IndexError: - other = "PyPy" + arg = "pypy" + +path_result_cpy = Path("tmp_results_cpy.txt") + +path_result_other = Path(arg) -path_result_cpy = Path("tmp_results_cpython.txt") -path_result_other = Path(f"tmp_results_{other.lower()}.txt") +if not path_result_other.exists(): + path_result_other = Path(f"tmp_results_{arg}.txt") assert path_result_cpy.exists() assert path_result_other.exists() @@ -21,39 +28,64 @@ def data_from_path(path): ) lines = txt.splitlines()[3:-2] - if "cpy" in path.name: - index_time = 1 - else: - parts = lines[0].split() - if len(parts) == 3: - index_time = 1 + index_time_cpy = 1 + index_time_hpy = 3 + + parts = lines[0].split() + if len(parts) == 1: + if "cpy" in path.name: + index_time_hpy = None else: - index_time = 3 + index_time_cpy = None + index_time_hpy = 1 names = [] - times = [] + times_cpy = [] + times_hpy = [] for line in lines: parts = line.split() names.append(parts[0]) - times.append(float(parts[index_time])) + if index_time_cpy is not None: + times_cpy.append(float(parts[index_time_cpy])) + if index_time_hpy is not None: + times_hpy.append(float(parts[index_time_hpy])) + + return names, times_cpy, times_hpy - return names, times +info = info_from_path(path_result_other) -names, times_cpy = data_from_path(path_result_cpy) -names, times_other = data_from_path(path_result_other) +implementation = info["implementation"] + +names_cpy, times_cpy_cpy, times_cpy_hpy = data_from_path(path_result_cpy) +names, times_other_cpyext, times_other_hpy = data_from_path(path_result_other) + +assert names_cpy == names max_length_name = 45 fmt_name = f"{{:{max_length_name}s}}" -out = f" {other} HPy univ / CPy native (time ratio, smaller is better) " -num_chars = 81 -num_equals = (num_chars - len(out)) // 2 +print_sep( + f"{capitalize_implementation(implementation)} / CPy native (time ratio, smaller is better)" +) + +if info["short prefix"] != implementation: + print(f"short prefix: {info['short prefix']}") -print("\n" + num_equals * "=" + out + num_equals * "=") +if times_other_cpyext: + print(max_length_name * " " + "cpyext HPy univ") +else: + print(max_length_name * " " + "HPy univ") -for index, t_other in enumerate(times_other): - ratio = t_other / times_cpy[index] +for index, t_other_hpy in enumerate(times_other_hpy): + norm = times_cpy_cpy[index] name = fmt_name.format(names[index]) - print(f"{name} {ratio:.2f}") + + ratio = t_other_hpy / norm + + if times_other_cpyext: + ratio_cpyext = times_other_cpyext[index] / norm + print(f"{name} {ratio_cpyext:5.2f} {ratio:5.2f}") + else: + print(f"{name} {ratio:.2f}") diff --git a/microbench/print_tmp_results.py b/microbench/print_tmp_results.py new file mode 100755 index 000000000..806a9f28f --- /dev/null +++ b/microbench/print_tmp_results.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +import sys + +from pathlib import Path + +import util + +try: + arg = sys.argv[1] +except IndexError: + arg = "pypy" + +path_result = Path(arg) + +if not path_result.exists(): + path_result = Path(f"tmp_results_{arg}.txt") + +assert path_result.exists() + +info = util.info_from_path(path_result) +implementation = info["implementation"] + +util.print_sep(util.capitalize_implementation(implementation)) +if info["short prefix"] != implementation and info[ + "short prefix" +] != util.short_implementation(implementation): + print(f"short prefix: {info['short prefix']}") + +txt = path_result.read_text() +_, txt = txt.split( + "================================== BENCHMARKS ==================================\n" +) + +print(txt.rstrip()) diff --git a/microbench/profile/Makefile b/microbench/profile/Makefile new file mode 100644 index 000000000..0cff2a01b --- /dev/null +++ b/microbench/profile/Makefile @@ -0,0 +1,3 @@ + +clean: + rm -f *.prof* logfile* callgrind.out.* tmp* diff --git a/microbench/profile/README.md b/microbench/profile/README.md new file mode 100644 index 000000000..02aabcf1a --- /dev/null +++ b/microbench/profile/README.md @@ -0,0 +1,40 @@ +# Profile + +## Disable the JIT + +```sh +python --jit off profile_alloc.py +``` + +## With vmprof + +```sh +pip install vmprof +sudo apt install libunwind-dev +python -m vmprof -o tmp_output.log profile_alloc.py +vmprofshow tmp_output.log tree +``` + +## with vmprof-firefox-converter + +```sh +pip install vmprof-firefox-converter +vmprofconvert -run profile_alloc.py cpy +# or +vmprofconvert -run profile_alloc.py +``` + +## With `PYPYLOG` + +```sh +PYPYLOG=jit-log-opt,jit-backend:logfile python profile_alloc_and_die.py +``` + +## With Valgrind / Callgrind + +```sh +valgrind --tool=callgrind --dump-instr=yes --simulate-cache=yes --collect-jumps=yes python profile_alloc_and_die.py --short +``` + +The Valgrind output (a file starting with "callgrind.out.") can for example be +opened with `qcachegrind`. diff --git a/microbench/profile/profile_alloc.py b/microbench/profile/profile_alloc.py new file mode 100644 index 000000000..23183994d --- /dev/null +++ b/microbench/profile/profile_alloc.py @@ -0,0 +1,19 @@ +import gc + +from time import perf_counter as time + +from util import cls, N + + +def main(): + objs = [None] * N + for i in range(N): + objs[i] = cls() + return objs + + +gc.collect() + +t_start = time() +main() +print(f"time per allocation: {(time() - t_start)/N:.1e} s") diff --git a/microbench/profile/profile_alloc_and_die.py b/microbench/profile/profile_alloc_and_die.py new file mode 100644 index 000000000..1d55dbcbe --- /dev/null +++ b/microbench/profile/profile_alloc_and_die.py @@ -0,0 +1,18 @@ +import gc + +from time import perf_counter as time + +from util import cls, N + + +def main(): + for _ in range(N): + obj = cls() + obj.onearg(None) + + +gc.collect() + +t_start = time() +main() +print(f"time per allocation: {(time() - t_start)/N:.1e} s") diff --git a/microbench/profile/profile_allocate_and_collect.py b/microbench/profile/profile_allocate_and_collect.py new file mode 100644 index 000000000..32c8c9b53 --- /dev/null +++ b/microbench/profile/profile_allocate_and_collect.py @@ -0,0 +1,21 @@ +import gc + +from time import perf_counter as time + +from util import cls, N + + +def main(): + objs = [None] * N + for i in range(N): + objs[i] = cls() + return objs + + +gc.collect() + +t_start = time() +_objs = main() +del _objs +gc.collect() +print(f"time per object: {(time() - t_start)/N:.1e} s") diff --git a/microbench/profile/profile_allocate_tuple.py b/microbench/profile/profile_allocate_tuple.py new file mode 100644 index 000000000..07fd1b292 --- /dev/null +++ b/microbench/profile/profile_allocate_tuple.py @@ -0,0 +1,16 @@ +import gc + +from time import perf_counter as time + +from util import simple, N + + +def main(): + for _ in range(N): + simple.allocate_tuple() + +gc.collect() + +t_start = time() +main() +print(f"time per allocation: {(time() - t_start)/N:.1e} s") diff --git a/microbench/profile/profile_method_lookup.py b/microbench/profile/profile_method_lookup.py new file mode 100644 index 000000000..4b3150971 --- /dev/null +++ b/microbench/profile/profile_method_lookup.py @@ -0,0 +1,21 @@ +import gc + +from time import perf_counter as time + +from util import cls, N + +N *= 40 + +def main(): + obj = cls() + for _ in range(N): + # note: here we are NOT calling it, we want to measure just + # the lookup + obj.noargs # pylint: disable=W0104 + + +gc.collect() + +t_start = time() +main() +print(f"time per lookup: {(time() - t_start)/N:.1e} s") diff --git a/microbench/profile/util.py b/microbench/profile/util.py new file mode 100644 index 000000000..45ce00027 --- /dev/null +++ b/microbench/profile/util.py @@ -0,0 +1,24 @@ +import sys + +if "purepy" in sys.argv: + import purepy_simple as simple +elif "cpy" in sys.argv: + import cpy_simple as simple +else: + if sys.implementation.name == "cpython": + import cpy_simple as simple + else: + import hpy_simple as simple + +print(simple) + +cls = simple.HTFoo + +N = 10000000 +if sys.implementation.name == "cpython": + N *= 10 +elif sys.implementation.name == "pypy" and "cpy" in sys.argv: + N *= 4 + +if "--short" in sys.argv: + N //= 100 diff --git a/microbench/src/purepy_simple.py b/microbench/src/purepy_simple.py new file mode 100644 index 000000000..7a7e091ad --- /dev/null +++ b/microbench/src/purepy_simple.py @@ -0,0 +1,72 @@ +class Foo: + """""" + + def __len__(self): + return 42 + + def __getitem__(self, key): + return None + + def noargs( + self, + ): + """""" + + def onearg(self, arg): + """""" + + def varargs(self, *args): + """""" + + def call_with_tuple(self, func, *args): + """""" + if len(args) != 1: + raise TypeError("call_with_tuple requires two arguments") + return func(*args[0]) + + def call_with_tuple_and_dict(self, func, *args): + """""" + return func(args[0][0], args[1]["b"]) + + def allocate_int( + self, + ): + return 2048 + + def allocate_tuple(self): + return (2048, 2049) + + +def noargs(): + """""" + + +def onearg(arg): + """""" + + +def varargs(*args): + """""" + + +def call_with_tuple(func, *args): + """""" + if len(args) != 1: + raise TypeError("call_with_tuple requires two arguments") + return func(*args[0]) + + +def call_with_tuple_and_dict(func, *args): + """""" + return func(args[0][0], args[1]["b"]) + + +def allocate_int(): + return 2048 + + +def allocate_tuple(): + return (2048, 2049) + + +HTFoo = Foo diff --git a/microbench/test_microbench.py b/microbench/test_microbench.py index ddddad11a..0f7ef9807 100644 --- a/microbench/test_microbench.py +++ b/microbench/test_microbench.py @@ -3,28 +3,42 @@ timing and display the results is inside conftest.py """ +import sys + import pytest import _valgrind API_PARAMS = [ - pytest.param('cpy', marks=pytest.mark.cpy), - pytest.param('hpy', marks=pytest.mark.hpy) - ] + pytest.param("cpy", marks=pytest.mark.cpy), + pytest.param("hpy", marks=pytest.mark.hpy), +] + +if any(arg.endswith("purepy") for arg in sys.argv): + API_PARAMS.append(pytest.param("purepy", marks=pytest.mark.purepy)) + @pytest.fixture(params=API_PARAMS) def api(request): return request.param + @pytest.fixture def simple(request, api): - if api == 'cpy': + if api == "cpy": import cpy_simple + return cpy_simple - elif api == 'hpy': + elif api == "hpy": import hpy_simple + return hpy_simple + elif api == "purepy": + import purepy_simple + + return purepy_simple else: - assert False, 'Unkown param: %s' % request.param + assert False, f"Unkown param: {request.param}" + @pytest.fixture def N(request): @@ -88,150 +102,102 @@ def test_allocate_tuple(self, api, simple, timer, N): class TestType: - """ Compares the performance of operations on types. + """Compares the performance of operations on types. - The kinds of type used are: + The kinds of type used are: - * cpy: a static type - * hpy: a heap type (HPy only has heap types) + * cpy: a static type + * hpy: a heap type (HPy only has heap types) - The type is named `simple.Foo` in both cases. + The type is named `simple.Foo` in both cases. """ + name_cls = "Foo" + def test_allocate_and_collect(self, simple, timer, N): import gc - Foo = simple.Foo + + cls = getattr(simple, self.name_cls) objs = [None] * N gc.collect() with timer: for i in range(N): - objs[i] = Foo() + objs[i] = cls() del objs gc.collect() def test_allocate_obj(self, simple, timer, N): import gc - Foo = simple.Foo + + cls = getattr(simple, self.name_cls) objs = [None] * N gc.collect() with timer: for i in range(N): - objs[i] = Foo() + objs[i] = cls() del objs gc.collect() - def test_method_lookup(self, simple, timer, N): - obj = simple.Foo() - with timer: - for i in range(N): - # note: here we are NOT calling it, we want to measure just - # the lookup - obj.noargs - - def test_noargs(self, simple, timer, N): - obj = simple.Foo() - with timer: - for i in range(N): - obj.noargs() - - def test_onearg_None(self, simple, timer, N): - obj = simple.Foo() - with timer: - for i in range(N): - obj.onearg(None) - - def test_onearg_int(self, simple, timer, N): - obj = simple.Foo() - with timer: - for i in range(N): - obj.onearg(i) - - def test_varargs(self, simple, timer, N): - obj = simple.Foo() - with timer: - for i in range(N): - obj.varargs(None, None) - - def test_len(self, simple, timer, N): - obj = simple.Foo() - with timer: - for i in range(N): - len(obj) - - def test_getitem(self, simple, timer, N): - obj = simple.Foo() - with timer: - for i in range(N): - obj[0] - - -class TestHeapType: - """ Compares the performance of operations on heap types. - - The type is named `simple.HTFoo` and is a heap type in all cases. - """ - - def test_allocate_obj_and_survive(self, simple, timer, N): - import gc - HTFoo = simple.HTFoo - objs = [None] * N - gc.collect() - with timer: - for i in range(N): - objs[i] = HTFoo() - del objs - gc.collect() - def test_allocate_obj_and_die(self, simple, timer, N): import gc - HTFoo = simple.HTFoo + + cls = getattr(simple, self.name_cls) gc.collect() with timer: - for i in range(N): - obj = HTFoo() + for _ in range(N): + obj = cls() obj.onearg(None) gc.collect() def test_method_lookup(self, simple, timer, N): - obj = simple.HTFoo() + obj = getattr(simple, self.name_cls)() with timer: - for i in range(N): + for _ in range(N): # note: here we are NOT calling it, we want to measure just # the lookup obj.noargs def test_noargs(self, simple, timer, N): - obj = simple.HTFoo() + obj = getattr(simple, self.name_cls)() with timer: - for i in range(N): + for _ in range(N): obj.noargs() def test_onearg_None(self, simple, timer, N): - obj = simple.HTFoo() + obj = getattr(simple, self.name_cls)() with timer: - for i in range(N): + for _ in range(N): obj.onearg(None) def test_onearg_int(self, simple, timer, N): - obj = simple.HTFoo() + obj = getattr(simple, self.name_cls)() with timer: for i in range(N): obj.onearg(i) def test_varargs(self, simple, timer, N): - obj = simple.HTFoo() + obj = getattr(simple, self.name_cls)() with timer: - for i in range(N): + for _ in range(N): obj.varargs(None, None) def test_len(self, simple, timer, N): - obj = simple.HTFoo() + obj = getattr(simple, self.name_cls)() with timer: - for i in range(N): + for _ in range(N): len(obj) def test_getitem(self, simple, timer, N): - obj = simple.HTFoo() + obj = getattr(simple, self.name_cls)() with timer: - for i in range(N): + for _ in range(N): obj[0] + + +class TestHeapType(TestType): + """Compares the performance of operations on heap types. + + The type is named `simple.HTFoo` and is a heap type in all cases. + """ + + name_cls = "HTFoo" diff --git a/microbench/util.py b/microbench/util.py new file mode 100644 index 000000000..0f5b9c760 --- /dev/null +++ b/microbench/util.py @@ -0,0 +1,51 @@ +import sys + +from pathlib import Path + + +def get_short_prefix(): + prefix = Path(sys.prefix) + name = prefix.name + parts = [".", "venv", "_", "-"] + for part in parts: + if name.startswith(part): + name = name[len(part) :] + return name + + +def info_from_path(path): + txt = path.read_text() + _, txt = txt.split( + "===================================== INFO =====================================" + ) + txt, _ = txt.split( + "================================== BENCHMARKS ==================================" + ) + result = {} + for line in txt.splitlines(): + if not line: + continue + left, right = line.split(":") + result[left] = right.strip() + return result + + +def capitalize_implementation(implementation): + return implementation.capitalize().replace("py", "Py") + + +def short_implementation(implementation): + if implementation == "cpython": + return "cpy" + return implementation + + +def print_sep(out, sep_char="=", num_chars=81): + out = f" {out} " + num_sep_chars = (num_chars - len(out)) // 2 + print(num_sep_chars * "=" + out + num_sep_chars * "=") + + +def print_file_name_results(): + name = f"tmp_results_{get_short_prefix()}.txt" + print(name)