We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 76c2392 commit 3ea8358Copy full SHA for 3ea8358
topi/python/topi/x86/pooling.py
@@ -98,6 +98,12 @@ def traverse(OP):
98
traverse(tensor.op)
99
# schedule pool
100
elif OP.tag.startswith('pool'):
101
+ # Average pool accumulation and division happens in different for loops (#3607).
102
+ # To ensure good parallel support, apply multi-threading on the second loop.
103
+ output = outs[0]
104
+ output_fused = s[output].fuse(output.op.axis[0], output.op.axis[1])
105
+ s[output].parallel(output_fused)
106
+
107
PaddedInput = OP.input_tensors[0]
108
Pool = OP.output(0)
109
_schedule(PaddedInput, Pool)
0 commit comments