1- // RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs-from-loops bufferize-function-boundaries test-analysis-only" -split-input-file | FileCheck %s
1+ // RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs-from-loops bufferize-function-boundaries test-analysis-only" -split-input-file | FileCheck %s --check-prefixes=CHECK,PARALLEL-CHECK
2+ // RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs-from-loops bufferize-function-boundaries test-analysis-only check-parallel-regions=false" -split-input-file | FileCheck %s --check-prefixes=CHECK,NO-PARALLEL-CHECK
23
34// Run fuzzer with different seeds.
45// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs-from-loops bufferize-function-boundaries test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=23" -split-input-file -o /dev/null
@@ -811,8 +812,10 @@ func.func @parallel_region() -> tensor<320xf32>
811812 %0 = scf.forall (%arg0 ) in (%c320 ) shared_outs (%arg1 = %alloc0 ) -> (tensor <320 xf32 >) {
812813 %val = " test.foo" () : () -> (f32 )
813814 // linalg.fill must bufferize out-of-place because every thread needs a
814- // private copy of %alloc1.
815- // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]}
815+ // private copy of %alloc1. If not accounting for parallel regions, the fill
816+ // can bufferize in place.
817+ // PARALLEL-CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]}
818+ // NO-PARALLEL-CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]}
816819 %fill = linalg.fill ins (%val : f32 ) outs (%alloc1 : tensor <1 xf32 >) -> tensor <1 xf32 >
817820 scf.forall.in_parallel {
818821 // CHECK: tensor.parallel_insert_slice {{.*}} {__inplace_operands_attr__ = ["true", "true", "none"]}
@@ -841,8 +844,10 @@ func.func @parallel_region_mixed_def(%c: i1) -> tensor<320xf32>
841844 }
842845 %val = " test.foo" () : () -> (f32 )
843846 // linalg.fill must bufferize out-of-place because every thread needs a
844- // private copy of %alloc1.
845- // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]}
847+ // private copy of %alloc1. If not accounting for parallel regions, the fill
848+ // can bufferize in place.
849+ // PARALLEL-CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]}
850+ // NO-PARALLEL-CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]}
846851 %fill = linalg.fill ins (%val : f32 ) outs (%selected : tensor <1 xf32 >) -> tensor <1 xf32 >
847852 scf.forall.in_parallel {
848853 // CHECK: tensor.parallel_insert_slice {{.*}} {__inplace_operands_attr__ = ["true", "true", "none"]}
@@ -866,8 +871,10 @@ func.func @parallel_region_two_writes(%f: f32) -> tensor<320xf32>
866871 %0 = scf.forall (%arg0 ) in (%c320 ) shared_outs (%arg1 = %alloc0 ) -> (tensor <320 xf32 >) {
867872 %val = " test.foo" () : () -> (f32 )
868873 // linalg.fill must bufferize out-of-place because every thread needs a
869- // private copy of %alloc1.
870- // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]}
874+ // private copy of %alloc1. If not accounting for parallel regions, the fill
875+ // can bufferize in place.
876+ // PARALLEL-CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]}
877+ // NO-PARALLEL-CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]}
871878 %fill = linalg.fill ins (%val : f32 ) outs (%alloc1 : tensor <1 xf32 >) -> tensor <1 xf32 >
872879 // CHECK: tensor.insert
873880 // CHECK-SAME: __inplace_operands_attr__ = ["none", "true", "none"]
0 commit comments