Remove unneeded _to_copy in edge dialect.

zhxchen17 · facebook-github-bot · commit 319a4f2514e5 · 2024-04-25T10:09:42.000-07:00
Summary: In executorch we will dtype-specialize the kernels and also run on a single device with export. Therefore _to_copy is not needed in edge dialect.

Reviewed By: tugsbayasgalan

Differential Revision: D56579169

fbshipit-source-id: 5a2e3cd453a11bd2ad009b439587b0fc589f7fe4
diff --git a/exir/passes/__init__.py b/exir/passes/__init__.py
@@ -43,7 +43,7 @@
 from executorch.exir.passes.memory_planning_pass import MemoryPlanningPass
 from executorch.exir.passes.normalize_transpose_pass import NormalizeTransposePass
 from executorch.exir.passes.quant_fusion_pass import QuantFusionPass
-from executorch.exir.passes.remove_noop_pass import RemoveNoopPass
+from executorch.exir.passes.remove_noop_pass import RemoveNoopPass, RemoveToCopyPass
 from executorch.exir.passes.replace_aten_with_edge_pass import OpReplacePass
 from executorch.exir.passes.replace_broken_ops_with_function_ops_pass import (
     ReplaceBrokenOpsWithFunctionalOpsPass,
@@ -482,6 +482,7 @@ def dead_code_elimination_pass(graph_module: torch.fx.GraphModule) -> PassResult
         ScalarToTensorPass(),
         SymToTensorPass(),
         RemoveNoopPass(),
+        RemoveToCopyPass(),
     ]
 ).passes
 
diff --git a/exir/passes/remove_noop_pass.py b/exir/passes/remove_noop_pass.py
@@ -90,3 +90,30 @@ def call(self, graph_module: GraphModule) -> PassResult:
         graph_module.graph.eliminate_dead_code()
 
         return PassResult(graph_module, True)
+
+
+class RemoveToCopyPass(ExportPass):
+    """
+    Removes _to_copy that pass through arguments.
+    """
+
+    def call(self, graph_module: GraphModule) -> PassResult:
+        for node in graph_module.graph.nodes:
+            if node.op != "call_function":
+                continue
+
+            if node.target not in (torch.ops.aten._to_copy.default,):
+                continue
+
+            orig_tensor = node.args[0].meta["val"]
+
+            if (
+                orig_tensor.dtype == node.meta["val"].dtype
+                and orig_tensor.device == node.meta["val"].device
+            ):
+                node.replace_all_uses_with(node.args[0])
+
+        graph_module.graph.eliminate_dead_code()
+        graph_module.graph.lint()
+
+        return PassResult(graph_module, True)