chore: additional fixes

peri044 · peri044 · commit e7f8c09b45d4 · 2024-01-12T04:37:29.000Z
diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
@@ -142,6 +142,7 @@ jobs:
         ${CONDA_RUN} python -m pip install --pre pytest timm transformers parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver
         ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_fe_test_results.xml --ir dynamo models/test_models_export.py
         ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/test_dyn_models.py
+        ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/output_format.xml --ir dynamo models/test_output_format.py
         popd
 
   tests-py-dynamo-serde:
diff --git a/docsrc/user_guide/saving_models.rst b/docsrc/user_guide/saving_models.rst
@@ -22,11 +22,10 @@ The `output_format` can take the following options
 * `torchscript` (or) `ts` : This returns a TorchScript module
 * `graph_module` (or) `fx` : This returns a torch.fx.GraphModule which can be traced into Torchscript to save to disk.
 
-a) Converting to Torchscript
+a) Torchscript
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-`torch.fx.GraphModule` objects cannot be serialized directly. Hence we use `torch.jit.trace` to convert this into a `ScriptModule` object which can be saved to disk.
-The following code illustrates this approach.
+If you set the `output_format="torchscript"`, this will return a `ScriptModule` which can be serialized via torch.jit.save
 
 .. code-block:: python
 
@@ -35,6 +34,7 @@ The following code illustrates this approach.
 
     model = MyModel().eval().cuda()
     inputs = [torch.randn((1, 3, 224, 224)).cuda()]
+    # trt_ts is a torch.jit.ScriptModule object
     trt_ts = torch_tensorrt.compile(model, ir="dynamo", inputs, output_format="torchscript")
     torch.jit.save(trt_ts, "trt_model.ts")
 
@@ -45,8 +45,7 @@ The following code illustrates this approach.
 b) ExportedProgram
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-`torch.export.ExportedProgram` is a new format introduced in Pytorch 2.1. After we compile a Pytorch module using Torch-TensorRT, the resultant
-`torch.fx.GraphModule` along with additional metadata can be used to create `ExportedProgram` which can be saved and loaded from disk.
+`torch.export.ExportedProgram`, a new format introduced in Pytorch 2.X is the default return type of Torch-TensorRT compilation.
 
 .. code-block:: python
 
@@ -55,24 +54,36 @@ b) ExportedProgram
 
     model = MyModel().eval().cuda()
     inputs = [torch.randn((1, 3, 224, 224)).cuda()]
-    trt_gm = torch_tensorrt.compile(model, ir="dynamo", inputs) # Output is a torch.fx.GraphModule
-    torch.export.save(trt_exp_program, "trt_model.ep")
+    # trt_ep is a torch.export.ExportedProgram object
+    trt_ep = torch_tensorrt.compile(model, ir="dynamo", inputs) 
+    torch.export.save(trt_ep, "trt_model.ep")
 
     # Later, you can load it and run inference
     model = torch.export.load("trt_model.ep")
     model(*inputs)
 
-`torch_tensorrt.dynamo.export` inlines the submodules within a GraphModule to their corresponding nodes and stiches all the nodes together.
-This is needed as `torch._export` serialization cannot handle serializing and deserializing of submodules (`call_module` nodes).
+c) GraphModule
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+We can also return a `torch.fx.GraphModule` object as the output of Torch-TensorRT compilation by setting `output_format="graph_module"`.
+Internally, partitioning, lowering, conversion phases operate using GraphModule objects. These can be either traced into a Torchscript modules or 
+exported into `ExportedProgram` objects
+
+.. code-block:: python
 
-.. note:: This way of saving the models using `ExportedProgram` is experimental. Here is a known issue : https://github.com/pytorch/TensorRT/issues/2341
+    import torch
+    import torch_tensorrt
 
+    model = MyModel().eval().cuda()
+    inputs = [torch.randn((1, 3, 224, 224)).cuda()]
+    # trt_gm is a torch.fx.GraphModule object
+    trt_gm = torch_tensorrt.compile(model, ir="dynamo", inputs, output_format="graph_module") 
 
 Torchscript IR
 -------------
 
 In Torch-TensorRT 1.X versions, the primary way to compile and run inference with Torch-TensorRT is using Torchscript IR.
-This behavior stays the same in 2.X versions as well.
+For `ir=ts`, this behavior stays the same in 2.X versions as well.
 
 .. code-block:: python
 
diff --git a/py/torch_tensorrt/dynamo/_exporter.py b/py/torch_tensorrt/dynamo/_exporter.py
@@ -1,4 +1,3 @@
-import copy
 import operator
 from typing import Any, Dict, Sequence, Tuple, cast
 
@@ -86,21 +85,23 @@ def lift(gm: torch.fx.GraphModule, graph_signature: Any) -> torch.fx.GraphModule
     # Get the state_dict of graph_module. This is different from exported_program.state_dict
     # exp_program.state_dict contains parameters and buffers whereas a graph_module's state_dict
     # has all parameters registered as torch.tensors.
-    state_dict = copy.deepcopy(gm.state_dict())
+    state_dict = gm.state_dict()
 
     fake_mode = detect_fake_mode(
         tuple(node.meta["val"] for node in gm.graph.nodes if node.op == "placeholder")
     )
     assert fake_mode is not None
 
     # Locate the user input to insert new placeholders before them
-    first_user_input_loc, first_user_input = 0, None
+    first_user_input = None
     for node in gm.graph.nodes:
         if node.op == "placeholder" and node.name in graph_signature.user_inputs:
             first_user_input = node
             break
-        first_user_input_loc += 1
 
+    # At first the user_inputs are only present in the graph_signature.input_specs and hence non_user_input_idx=0
+    # The input_specs should be of the form [params, buffers, constant_tensors, user_inputs]
+    non_user_input_idx = 0
     for node in gm.graph.nodes:
         if node.op == "get_attr":
             constant_tensor = getattr(gm, node.target)
@@ -130,14 +131,14 @@ def lift(gm: torch.fx.GraphModule, graph_signature: Any) -> torch.fx.GraphModule
                 # Add these parameters/buffers/constants to the existing graph signature
                 # before user inputs. These specs are looked up in the state_dict during ExportedProgram creation.
                 graph_signature.input_specs.insert(
-                    first_user_input_loc,
+                    non_user_input_idx,
                     InputSpec(
                         kind=input_kind,
                         arg=TensorArgument(name=const_placeholder_node.name),
                         target=node.target,
                     ),
                 )
-                first_user_input_loc += 1
+                non_user_input_idx += 1
 
     gm.graph.eliminate_dead_code()
     gm.graph.lint()
@@ -257,6 +258,7 @@ def create_trt_exp_program(
     """Creates a new Exported Program. This function takes an torch.fx.GraphModule which has TRT engines
     and constructs an Exported Program object with the new IO node names and state_dict
     """
+
     input_nodes = [node for node in gm.graph.nodes if node.op == "placeholder"]
     output_nodes = [node for node in gm.graph.nodes if node.op == "output"]
     assert output_nodes
diff --git a/tests/py/dynamo/models/test_export_serde.py b/tests/py/dynamo/models/test_export_serde.py
@@ -116,6 +116,62 @@ def forward(self, x):
         )
 
 
+@pytest.mark.unit
+def test_no_compile(ir):
+    """
+    This tests export serde functionality on a model
+    which won't convert to TRT because of min_block_size=5 constraint
+    """
+
+    class MyModule(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True)
+            self.relu = torch.nn.ReLU()
+
+        def forward(self, x):
+            conv = self.conv(x)
+            conv = conv * 0.5
+            relu = self.relu(conv)
+            return conv, relu
+
+    model = MyModule().eval().cuda()
+    input = torch.randn((1, 3, 224, 224)).to("cuda")
+
+    compile_spec = {
+        "inputs": [
+            torchtrt.Input(
+                input.shape, dtype=torch.float, format=torch.contiguous_format
+            )
+        ],
+        "ir": ir,
+        "debug": True,
+    }
+
+    exp_program = torchtrt.dynamo.trace(model, **compile_spec)
+    trt_exp_program = torchtrt.dynamo.compile(exp_program, **compile_spec)
+    torch.export.save(trt_exp_program, "/tmp/trt.ep")
+    deser_trt_exp_program = torch.export.load("/tmp/trt.ep")
+    # Check Pyt and TRT exported program outputs
+    outputs_pyt = model(input)
+    outputs_trt = trt_exp_program(input)
+    for idx in range(len(outputs_pyt)):
+        cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt[idx])
+        assertions.assertTrue(
+            cos_sim > COSINE_THRESHOLD,
+            msg=f"test_no_compile TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+        )
+
+    # Check Pyt and deserialized TRT exported program outputs
+    outputs_trt_deser = deser_trt_exp_program(input)
+    for idx in range(len(outputs_pyt)):
+        cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt_deser[idx])
+        assertions.assertTrue(
+            cos_sim > COSINE_THRESHOLD,
+            msg=f"test_no_compile deserialized TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+        )
+
+
 @pytest.mark.unit
 def test_hybrid_relu_fallback(ir):
     """