@@ -139,6 +139,35 @@ def _directory_to_zip_bytes(directory_path) -> str:
139139 return base64 .b64encode (data ).decode ('utf-8' )
140140
141141
142+ def _filter_ncu_report (report : str , tables : list ):
143+ """
144+ Extract the Speed-of-light section from the full ncu terminal report.
145+
146+ For expert users, we just attach the full ncu profile to the result,
147+ and they can view whichever metrics they are interested in. But to
148+ encourage novice users to try out profiling, we want to have a
149+ *simple* set of things to display automatically, short enough to fit
150+ in a *single* discord message.
151+ """
152+ result = ""
153+ collect = False
154+ for line in report .splitlines ():
155+ if "Table Name : " in line :
156+ table = line [line .find ("Table Name :" ) + len ("Table Name :" ):].strip ()
157+ if table in tables :
158+ result += "\n "
159+ collect = True
160+ else :
161+ collect = False
162+
163+ if len (line .strip ()) == 0 :
164+ collect = False
165+
166+ if collect :
167+ result += line + "\n "
168+ return result
169+
170+
142171def compile_cuda_script ( # # noqa: C901
143172 files : list [str ],
144173 arch : Optional [int ] = None ,
@@ -420,7 +449,8 @@ def profile_program_ncu(
420449 profile_result = None
421450
422451 try :
423- report = subprocess .check_output (["ncu" , "--import" , f"{ str (output_dir / 'profile.ncu-rep' )} " ,], text = True )
452+ report = subprocess .check_output (["ncu" , "--import" , f"{ str (output_dir / 'profile.ncu-rep' )} " , "--print-details" , "body" ], text = True )
453+ report = _filter_ncu_report (report , ["GPU Throughput" , "Pipe Utilization (% of active cycles)" , "Warp State (All Cycles)" ])
424454 run_result .result ["benchmark.0.report" ] = base64 .b64encode (report .encode ("utf-8" )).decode ("utf-8" )
425455 except subprocess .CalledProcessError :
426456 pass
0 commit comments