| 
 | 1 | +"""  | 
 | 2 | +Dynamo Compile Advanced Usage  | 
 | 3 | +=========================  | 
 | 4 | +
  | 
 | 5 | +This interactive script is intended as an overview of the process by which `torch_tensorrt.dynamo.compile` works, and how it integrates with the new `torch.compile` API."""  | 
 | 6 | + | 
 | 7 | +# %%  | 
 | 8 | +# Imports and Model Definition  | 
 | 9 | +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^  | 
 | 10 | + | 
 | 11 | +import torch  | 
 | 12 | +from torch_tensorrt.fx.lower_setting import LowerPrecision  | 
 | 13 | + | 
 | 14 | +# %%  | 
 | 15 | + | 
 | 16 | +# We begin by defining a model  | 
 | 17 | +class Model(torch.nn.Module):  | 
 | 18 | +    def __init__(self) -> None:  | 
 | 19 | +        super().__init__()  | 
 | 20 | +        self.relu = torch.nn.ReLU()  | 
 | 21 | + | 
 | 22 | +    def forward(self, x: torch.Tensor, y: torch.Tensor):  | 
 | 23 | +        x_out = self.relu(x)  | 
 | 24 | +        y_out = self.relu(y)  | 
 | 25 | +        x_y_out = x_out + y_out  | 
 | 26 | +        return torch.mean(x_y_out)  | 
 | 27 | + | 
 | 28 | + | 
 | 29 | +# %%  | 
 | 30 | +# Compilation with `torch.compile` Using Default Settings  | 
 | 31 | +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^  | 
 | 32 | + | 
 | 33 | +# Define sample float inputs and initialize model  | 
 | 34 | +sample_inputs = [torch.rand((5, 7)).cuda(), torch.rand((5, 7)).cuda()]  | 
 | 35 | +model = Model().eval().cuda()  | 
 | 36 | + | 
 | 37 | +# %%  | 
 | 38 | + | 
 | 39 | +# Next, we compile the model using torch.compile  | 
 | 40 | +# For the default settings, we can simply call torch.compile  | 
 | 41 | +# with the backend "torch_tensorrt", and run the model on an  | 
 | 42 | +# input to cause compilation, as so:  | 
 | 43 | +optimized_model = torch.compile(model, backend="torch_tensorrt")  | 
 | 44 | +optimized_model(*sample_inputs)  | 
 | 45 | + | 
 | 46 | +# %%  | 
 | 47 | +# Compilation with `torch.compile` Using Custom Settings  | 
 | 48 | +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^  | 
 | 49 | + | 
 | 50 | +# First, we use Torch utilities to clean up the workspace  | 
 | 51 | +# after the previous compile invocation  | 
 | 52 | +torch._dynamo.reset()  | 
 | 53 | + | 
 | 54 | +# Define sample half inputs and initialize model  | 
 | 55 | +sample_inputs_half = [  | 
 | 56 | +    torch.rand((5, 7)).half().cuda(),  | 
 | 57 | +    torch.rand((5, 7)).half().cuda(),  | 
 | 58 | +]  | 
 | 59 | +model_half = Model().eval().cuda()  | 
 | 60 | + | 
 | 61 | +# %%  | 
 | 62 | + | 
 | 63 | +# If we want to customize certain options in the backend,  | 
 | 64 | +# but still use the torch.compile call directly, we can provide  | 
 | 65 | +# custom options to the backend via the "options" keyword  | 
 | 66 | +# which takes in a dictionary mapping options to values.  | 
 | 67 | +#  | 
 | 68 | +# For accepted backend options, see the CompilationSettings dataclass:  | 
 | 69 | +# py/torch_tensorrt/dynamo/backend/_settings.py  | 
 | 70 | +backend_kwargs = {  | 
 | 71 | +    "precision": LowerPrecision.FP16,  | 
 | 72 | +    "debug": True,  | 
 | 73 | +    "min_block_size": 2,  | 
 | 74 | +    "torch_executed_ops": {"torch.ops.aten.sub.Tensor"},  | 
 | 75 | +    "optimization_level": 4,  | 
 | 76 | +    "use_experimental_rt": True,  | 
 | 77 | +}  | 
 | 78 | + | 
 | 79 | +# Run the model on an input to cause compilation, as so:  | 
 | 80 | +optimized_model_custom = torch.compile(  | 
 | 81 | +    model_half, backend="torch_tensorrt", options=backend_kwargs  | 
 | 82 | +)  | 
 | 83 | +optimized_model_custom(*sample_inputs_half)  | 
 | 84 | + | 
 | 85 | +# %%  | 
 | 86 | +# Cleanup  | 
 | 87 | +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^  | 
 | 88 | + | 
 | 89 | +# Finally, we use Torch utilities to clean up the workspace  | 
 | 90 | +torch._dynamo.reset()  | 
 | 91 | + | 
 | 92 | +with torch.no_grad():  | 
 | 93 | +    torch.cuda.empty_cache()  | 
0 commit comments