apache · masahi · Aug 9, 2022 · Aug 5, 2022 · Aug 5, 2022 · Aug 5, 2022
diff --git a/python/tvm/relay/op/strategy/x86.py b/python/tvm/relay/op/strategy/x86.py
@@ -836,3 +836,15 @@ def concatenate_strategy_cpu(attrs, inputs, out_type, target):
             name="concatenate.generic",
         )
     return strategy
+
+
+@batch_norm_strategy.register(["cpu"])
+def batch_norm_strategy_cpu(attrs, inputs, out_type, target):
+    """batch_norm x86 strategy"""
+    strategy = _op.OpStrategy()
+    strategy.add_implementation(
+        wrap_compute_batch_norm(topi.nn.batch_norm),
+        wrap_topi_schedule(topi.x86.schedule_batch_norm),
+        name="batch_norm.cpu",
+    )
+    return strategy
diff --git a/python/tvm/topi/x86/nn.py b/python/tvm/topi/x86/nn.py
@@ -107,3 +107,33 @@ def _callback(op):
 
     traverse_inline(s, outs[0].op, _callback)
     return s
+
+
+def schedule_batch_norm(outs):
+    """Schedule for batch_norm
+
+    Parameters
+    ----------
+    outs: Array of Tensor
+          The computation graph description of batch_norm
+          in the format of an array of tensors.
+
+    Returns
+    -------
+    sch: Schedule
+        The computation schedule for the op.
+    """
+    s = te.create_schedule([x.op for x in outs])
+    # only parallelize outer dimensions up to axis
+    output_op = outs[0].op
+    axis = output_op.axis
+    outer_axes = [output_op.axis[i] for i in range(0, len(axis) - 1)]
+    fused_outer_axes = s[output_op].fuse(*outer_axes)
+    s[output_op].parallel(fused_outer_axes)
+    # when scale or center is enabled
+    if "divide" not in output_op.name:
+        div = output_op.input_tensors[0]
+        substract = s[div].op.input_tensors[0]
+        s[div].compute_inline()
+        s[substract].compute_inline()
+    return s
diff --git a/tests/python/topi/python/test_topi_batch_norm.py b/tests/python/topi/python/test_topi_batch_norm.py
@@ -28,6 +28,7 @@
 _DEVICE = "llvm"
 _BATCH_NORM_IMPLEMENT = {
     "generic": (topi.nn.batch_norm, topi.generic.schedule_batch_norm),
+    "cpu": (topi.nn.batch_norm, topi.x86.schedule_batch_norm),
 }