2727 UIntXWeightOnlyConfig ,
2828)
2929from torchao .sparsity .sparse_api import BlockSparseWeightConfig , SemiSparseWeightConfig
30+ from torch .profiler import profile , record_function , ProfilerActivity
31+ import os
32+ import subprocess
33+ import sys
34+ import uuid
35+
3036
3137try :
3238 import triton # noqa: F401
@@ -84,6 +90,8 @@ def __init__(
8490 "name" ,
8591 f"benchmark_{ self .quantization } _{ self .model_type } _m{ self .m } _k{ self .k } _n{ self .n } { '_compile' if self .use_torch_compile else '' } " ,
8692 )
93+ self .profile = params .get ("profile" , False )
94+ self .profile_file_name = os .path .join (self .output_dir , f"/profile/{ self .name } _{ self .m } _{ self .k } _{ self .n } _profile.json" )
8795
8896 @staticmethod
8997 def _parse_precision (precision_str : str ) -> torch .dtype :
@@ -105,6 +113,7 @@ def to_dict(self) -> Dict[str, Any]:
105113 "device" : self .device ,
106114 "model_type" : self .model_type ,
107115 "output_dir" : self .output_dir ,
116+ "profile" : self .profile ,
108117 }
109118
110119
@@ -319,6 +328,61 @@ def model_inference_time_in_ms(model, input_data):
319328 return res * 1e6
320329
321330
331+ def upload_trace_file (local_path : str , overwrite : bool = False ) -> Optional [str ]:
332+ file_name = os .path .basename (local_path )
333+ manifold_path = os .path .join (
334+ "perfetto_internal_traces/tree/shared_trace" , f"{ os .getlogin ()} _{ str (uuid .uuid4 ())} _{ file_name } "
335+ )
336+ cmd = [
337+ "manifold" ,
338+ "put" ,
339+ local_path ,
340+ manifold_path ,
341+ "--ttl" ,
342+ str (28 * 24 * 60 * 60 ),
343+ "--userData" ,
344+ "false" ,
345+ ]
346+ ret = subprocess .run (
347+ cmd , stdout = subprocess .PIPE , stderr = subprocess .PIPE , universal_newlines = True
348+ )
349+ if ret .returncode == 0 :
350+ print ("Upload trace successfully." )
351+ return manifold_path
352+ else :
353+ print ("[ERROR] Upload failed, maybe the trace file exists." )
354+ return None
355+
356+
357+ def print_perfetto_ui_url (manifold_path : str ) -> None :
358+ url = (
359+ "https://interncache-all.fbcdn.net/manifold/perfetto-artifacts/tree/ui/index.html"
360+ + "#!/?url=https://interncache-all.fbcdn.net/manifold/"
361+ + manifold_path
362+ )
363+ print (f"The trace is accessible at:\n { url } " )
364+
365+
366+ def generate_model_profile (model , input_data , profile_file ):
367+ # Function to benchmark model evaluation with profiling
368+ torch .profiler ._utils ._init_for_cuda_graphs ()
369+ prof = torch .profiler .profile (activities = [ProfilerActivity .CPU , ProfilerActivity .CUDA ], record_shapes = True )
370+ with prof :
371+ # with record_function("model_inference"):
372+ for _ in range (1 ): # Run the model multiple times to warm up the cache
373+ with torch .no_grad ():
374+ _ = model (* input_data )
375+ torch .cuda .synchronize ()
376+ prof .export_chrome_trace (profile_file ) # Save profiling details
377+
378+ manifold_path = upload_trace_file (profile_file )
379+ if manifold_path :
380+ print_perfetto_ui_url (manifold_path )
381+
382+ # Return the profiler output
383+ return prof
384+
385+
322386def create_model_and_input (
323387 model_type : str ,
324388 m : int ,
0 commit comments