1515######################################################################
1616# Register the NVCC Compiler Option
1717# ---------------------------------
18- # NNVM optimizes the graph and relies on TVM to generate fast
19- # GPU code, to get the maximum performance, we need to enable
20- # nvcc's compiler hook. This gives better performance than nvrtc mode.
18+ # NNVM optimizes the graph and relies on TVM to generate fast GPU code.
19+ # To get the maximum performance, we need to enable nvcc's compiler hook.
20+ # This gives better performance than nvrtc mode.
2121
2222@tvm .register_func
2323def tvm_callback_cuda_compile (code ):
@@ -28,7 +28,7 @@ def tvm_callback_cuda_compile(code):
2828# Prepare the Benchmark
2929# ---------------------
3030# We construct a standard imagenet inference benchmark.
31- # We use nnvm's testing utility to produce the model description and random parameters that so the example does not
31+ # We use nnvm's testing utility to produce the model description and random parameters so that the example does not
3232# depend on a specific front-end framework.
3333#
3434# .. note::
@@ -46,17 +46,17 @@ def tvm_callback_cuda_compile(code):
4646 batch_size = 1 , image_shape = image_shape )
4747
4848######################################################################
49- # Compile The Graph
49+ # Compile the Graph
5050# -----------------
5151# NNVM needs two things to compile a deep learning model:
5252#
53- # - net which is the graph representation of the computation
54- # - params a dictionary of str to parameters.
53+ # - net: the graph representation of the computation
54+ # - params: a dictionary of str to parameters
5555#
5656# To compile the graph, we call the build function with the graph
5757# configuration and parameters.
58- # When parameters are provided, NNVM will pre-compute certain part of the graph if possible,
59- # the new parameter set returned as the third return value .
58+ # When parameters are provided, NNVM will pre-compute certain part of the graph if possible (e.g. simplify batch normalization to scale shift) ,
59+ # and return the updated parameters .
6060
6161graph , lib , params = nnvm .compiler .build (
6262 net , target , shape = {"data" : data_shape }, params = params )
@@ -65,7 +65,7 @@ def tvm_callback_cuda_compile(code):
6565# Run the Compiled Module
6666# -----------------------
6767#
68- # To deploy the module, we call :any:`tvm.contrib.graph_runtime.create` passing in the graph the lib and context.
68+ # To deploy the module, we call :any:`tvm.contrib.graph_runtime.create` passing in the graph, the lib, and context.
6969# Thanks to TVM, we can deploy the compiled module to many platforms and languages.
7070# The deployment module is designed to contain minimum dependencies.
7171# This example runs on the same machine.
@@ -79,5 +79,5 @@ def tvm_callback_cuda_compile(code):
7979module .run ()
8080# get output
8181out = module .get_output (0 , tvm .nd .empty (out_shape ))
82- # Convert to numpy
82+ # convert to numpy
8383out .asnumpy ()
0 commit comments