Refactor layout constraint selection logic (pytorch#148104)

zou3519 · pytorchmergebot · commit 2e7c9d33e7f9 · 2025-04-09T02:09:18.000Z
This PR: - cleans up some existing comments that don't make sense anymore - hooks up the "custom_op_default_layout_constraint" back (that seems to have broken) - cleans up the "lazy registration path" which seems to never get hit anymore - adds dislike_padding to nodes that require exact strides Test Plan: - tests + CI disable padding Pull Request resolved: pytorch#148104 Approved by: https://github.com/shunting314, https://github.com/eellison ghstack dependencies: pytorch#150495
diff --git a/torch/_inductor/config.py b/torch/_inductor/config.py
@@ -126,7 +126,7 @@ def prologue_fusion_enabled() -> bool:
 # If the custom op does not have a layout constraint tag already
 # then we assume the following applies.
 custom_op_default_layout_constraint: Literal[
-    "needs_fixed_stride_order", "flexible_layout"
+    "needs_exact_strides", "needs_fixed_stride_order", "flexible_layout"
 ] = "needs_fixed_stride_order"
 
 # The default layout constraint for user-defined triton kernels.
diff --git a/torch/_inductor/graph.py b/torch/_inductor/graph.py
@@ -80,11 +80,13 @@
     FALLBACK_ALLOW_LIST,
     fallback_handler,
     fallback_node_due_to_unsupported_type,
+    get_layout_constraint_tag,
     lowerings,
     make_fallback,
     maybe_layout_constraints,
     needs_realized_inputs,
     require_contiguous,
+    tag_to_layout_constraint,
     unsupported_output_tensor,
 )
 from .runtime import autotune_cache
@@ -244,6 +246,14 @@ def _get_overload_packet(
             cur.meta["dislike_padding"] = True
             continue
 
+        if (
+            isinstance(cur.target, torch._ops.OpOverload)
+            and get_layout_constraint_tag(cur.target)
+            == torch._C.Tag.needs_exact_strides
+        ):
+            cur.meta["dislike_padding"] = True
+            continue
+
         op = _get_overload_packet(cur)
         if not op:
             continue
@@ -1150,34 +1160,26 @@ def call_function(self, target: Callable, args: Any, kwargs: dict[str, Any]) ->
                     error.operator_str(target, args, kwargs),
                 )
 
-                # use contiguous unless the (custom) op asks something else
-                # explicitly
-                if torch._C.Tag.needs_exact_strides in target.tags:
-                    decided_constraint = constrain_to_fake_tensors  # type: ignore[assignment]
-                elif torch._C.Tag.needs_fixed_stride_order in target.tags:
-                    decided_constraint = constrain_to_fx_strides  # type: ignore[assignment]
-                elif torch._C.Tag.flexible_layout in target.tags:
-                    decided_constraint = None  # type: ignore[assignment]
-                else:
-                    # If there are no tags, we do different things depending on
-                    # if it's a builtin ATen/prim ops or custom ops.
-                    # For ATen ops, we require_contiguous to fix https://github.com/pytorch/pytorch/issues/140452
-                    # For custom ops, we constrain_to_fx_strides to maintain the
-                    # behavior of PyTorch 2.5: https://github.com/pytorch/pytorch/issues/148356
+                tag = get_layout_constraint_tag(target, with_default=False)
+                if (
+                    tag is None
+                    and torch._library.utils.is_builtin(target)
+                    and self.is_backward
+                ):
+                    # for implicit fallback ATen ops during backward, if there
+                    # is no layout constraint tag, we conservatively require contiguous
+                    # input since some eager kernels do not
+                    # support non-contiguous inputs. Otherwise they may silently cause
+                    # accuracy problems. Check https://github.com/pytorch/pytorch/issues/140452
+                    # We only do this For ATen ops and for backward.
                     #
-                    # For ATen ops, only apply the constraint for backward
-                    # ops since fwd ops should work for any strides.
-                    if torch._library.utils.is_builtin(target) and self.is_backward:
-                        decided_constraint = require_contiguous  # type: ignore[assignment]
-                    else:
-                        # maybe_layout_constraints will decide the layout constraint for the custom op
-                        # lazily
-                        decided_constraint = None  # type: ignore[assignment]
-
-                # for implicitly fallback ops, we conservatively requires
-                # contiguous input since some eager kernels does not
-                # support non-contiguous inputs. They may silently cause
-                # accuracy problems. Check https://github.com/pytorch/pytorch/issues/140452
+                    # TODO: should really switch to "needs_fixed_stride" constraint on these
+                    # and identify them one by one.
+                    decided_constraint = require_contiguous  # type: ignore[assignment]
+                else:
+                    tag = get_layout_constraint_tag(target, with_default=True)
+                    decided_constraint = tag_to_layout_constraint(tag)
+
                 make_fallback(target, layout_constraint=decided_constraint)
 
             elif get_decompositions([target]):
diff --git a/torch/_inductor/lowering.py b/torch/_inductor/lowering.py
@@ -157,37 +157,40 @@ def maybe_layout_constraints(fn: Callable[..., Any]) -> Optional[Callable[..., A
         return None
     if fn in _maybe_layout_constraints:
         return _maybe_layout_constraints[fn]
-    # OpOverload with custom lowerings override tag-based layout constraints
-    if fn in lowerings:
-        _maybe_layout_constraints[fn] = None
-        return None
-    # We lazily register tag-based layout constraints.
-
-    def handle_layout_constraint_tag(tag):
-        if tag is torch._C.Tag.needs_fixed_stride_order:
-            _maybe_layout_constraints[fn] = constrain_to_fx_strides
-            return _maybe_layout_constraints[fn]
-        elif tag is torch._C.Tag.flexible_layout:
-            _maybe_layout_constraints[fn] = None
-            return None
-        else:
-            raise AssertionError(f"Unknown layout constraint tag: {tag}")
+    return None
+
 
-    tag = get_layout_constraint_tag(fn)
-    return handle_layout_constraint_tag(tag)
+tags_by_priority = [
+    torch._C.Tag.needs_exact_strides,
+    torch._C.Tag.needs_fixed_stride_order,
+    torch._C.Tag.flexible_layout,
+]
 
 
-def get_layout_constraint_tag(fn):
+def get_layout_constraint_tag(fn, *, with_default=True):
     tags_by_priority = [
+        torch._C.Tag.needs_exact_strides,
         torch._C.Tag.needs_fixed_stride_order,
         torch._C.Tag.flexible_layout,
     ]
     for tag in tags_by_priority:
         if tag in fn.tags:
             return tag
-    if torch._library.utils.is_builtin(fn):
-        return torch._C.Tag.flexible_layout
-    return getattr(torch._C.Tag, config.custom_op_default_layout_constraint)
+    if with_default:
+        if torch._library.utils.is_builtin(fn):
+            return torch._C.Tag.flexible_layout
+        return getattr(torch._C.Tag, config.custom_op_default_layout_constraint)
+    return None
+
+
+def tag_to_layout_constraint(tag):
+    if tag == torch._C.Tag.needs_exact_strides:
+        return constrain_to_fake_tensors
+    if tag == torch._C.Tag.needs_fixed_stride_order:
+        return constrain_to_fx_strides
+    if tag == torch._C.Tag.flexible_layout:
+        return None
+    raise AssertionError(f"Unknown layout constraint tag: {tag}")
 
 
 def assert_nyi(cond, msg):
diff --git a/torch/fx/experimental/proxy_tensor.py b/torch/fx/experimental/proxy_tensor.py
@@ -1169,7 +1169,9 @@ def _should_save_eager_input_vals(
                 f"propagate the FakeTensor vals. Please file an issue."
             )
     if isinstance(target, torch._ops.OpOverload):
-        return torch._C.Tag.needs_exact_strides in target.tags
+        from torch._inductor.lowering import get_layout_constraint_tag
+
+        return get_layout_constraint_tag(target) == torch._C.Tag.needs_exact_strides
     return False
 
 

Original file line number	Diff line number	Diff line change
`@@ -1169,7 +1169,9 @@ def _should_save_eager_input_vals(`
`1169`	`1169`	`f"propagate the FakeTensor vals. Please file an issue."`
`1170`	`1170`	`)`
`1171`	`1171`	`if isinstance(target, torch._ops.OpOverload):`
`1172`		`- return torch._C.Tag.needs_exact_strides in target.tags`
	`1172`	`+ from torch._inductor.lowering import get_layout_constraint_tag`
	`1173`	`+`
	`1174`	`+ return get_layout_constraint_tag(target) == torch._C.Tag.needs_exact_strides`
`1173`	`1175`	`return False`
`1174`	`1176`
`1175`	`1177`