Skip to content

Commit b014b79

Browse files
committed
attempt at filtered ncu
1 parent 00c215a commit b014b79

File tree

1 file changed

+31
-1
lines changed

1 file changed

+31
-1
lines changed

src/libkernelbot/run_eval.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,35 @@ def _directory_to_zip_bytes(directory_path) -> str:
139139
return base64.b64encode(data).decode('utf-8')
140140

141141

142+
def _filter_ncu_report(report: str, tables: list):
143+
"""
144+
Extract the Speed-of-light section from the full ncu terminal report.
145+
146+
For expert users, we just attach the full ncu profile to the result,
147+
and they can view whichever metrics they are interested in. But to
148+
encourage novice users to try out profiling, we want to have a
149+
*simple* set of things to display automatically, short enough to fit
150+
in a *single* discord message.
151+
"""
152+
result = ""
153+
collect = False
154+
for line in report.splitlines():
155+
if "Table Name : " in line:
156+
table = line[line.find("Table Name :") + len("Table Name :"):].strip()
157+
if table in tables:
158+
result += "\n"
159+
collect = True
160+
else:
161+
collect = False
162+
163+
if len(line.strip()) == 0:
164+
collect = False
165+
166+
if collect:
167+
result += line + "\n"
168+
return result
169+
170+
142171
def compile_cuda_script( # # noqa: C901
143172
files: list[str],
144173
arch: Optional[int] = None,
@@ -420,7 +449,8 @@ def profile_program_ncu(
420449
profile_result = None
421450

422451
try:
423-
report = subprocess.check_output(["ncu", "--import", f"{str(output_dir / 'profile.ncu-rep')}",], text=True)
452+
report = subprocess.check_output(["ncu", "--import", f"{str(output_dir / 'profile.ncu-rep')}", "--print-details", "body"], text=True)
453+
report = _filter_ncu_report(report, ["GPU Throughput", "Pipe Utilization (% of active cycles)", "Warp State (All Cycles)"])
424454
run_result.result["benchmark.0.report"] = base64.b64encode(report.encode("utf-8")).decode("utf-8")
425455
except subprocess.CalledProcessError:
426456
pass

0 commit comments

Comments
 (0)