Skip to content

Commit a33544b

Browse files
authored
[OpenACC][CIR] Implement 'alloca copying' for private lowering (llvm#161382)
The previous patch ensured that we correctly got the allocas put in place. This patch takes the address of each element of each alloca, and copies it to the previous one. This allows us to re-form the pointer-structure for a recipe.
1 parent b413ac1 commit a33544b

8 files changed

+2354
-10
lines changed

clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp

Lines changed: 88 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,75 @@ mlir::Block *OpenACCRecipeBuilderBase::createRecipeBlock(mlir::Region &region,
3636
llvm::SmallVector<mlir::Location> locs{types.size(), loc};
3737
return builder.createBlock(&region, region.end(), types, locs);
3838
}
39+
void OpenACCRecipeBuilderBase::makeAllocaCopy(mlir::Location loc,
40+
mlir::Type copyType,
41+
mlir::Value numEltsToCopy,
42+
mlir::Value offsetPerSubarray,
43+
mlir::Value destAlloca,
44+
mlir::Value srcAlloca) {
45+
mlir::OpBuilder::InsertionGuard guardCase(builder);
46+
47+
mlir::Type itrTy = cgf.cgm.convertType(cgf.getContext().UnsignedLongLongTy);
48+
auto itrPtrTy = cir::PointerType::get(itrTy);
49+
mlir::IntegerAttr itrAlign =
50+
cgf.cgm.getSize(cgf.getContext().getTypeAlignInChars(
51+
cgf.getContext().UnsignedLongLongTy));
52+
53+
auto loopBuilder = [&]() {
54+
auto itr =
55+
cir::AllocaOp::create(builder, loc, itrPtrTy, itrTy, "itr", itrAlign);
56+
cir::ConstantOp constZero = builder.getConstInt(loc, itrTy, 0);
57+
builder.CIRBaseBuilderTy::createStore(loc, constZero, itr);
58+
builder.createFor(
59+
loc,
60+
/*condBuilder=*/
61+
[&](mlir::OpBuilder &b, mlir::Location loc) {
62+
// itr < numEltsToCopy
63+
// Enforce a trip count of 1 if there wasn't any element count, this
64+
// way we can just use this loop with a constant bounds instead of a
65+
// separate code path.
66+
if (!numEltsToCopy)
67+
numEltsToCopy = builder.getConstInt(loc, itrTy, 1);
68+
69+
auto loadCur = cir::LoadOp::create(builder, loc, {itr});
70+
auto cmp = builder.createCompare(loc, cir::CmpOpKind::lt, loadCur,
71+
numEltsToCopy);
72+
builder.createCondition(cmp);
73+
},
74+
/*bodyBuilder=*/
75+
[&](mlir::OpBuilder &b, mlir::Location loc) {
76+
// destAlloca[itr] = srcAlloca[offsetPerSubArray * itr];
77+
auto loadCur = cir::LoadOp::create(builder, loc, {itr});
78+
auto srcOffset = builder.createMul(loc, offsetPerSubarray, loadCur);
79+
80+
auto ptrToOffsetIntoSrc = cir::PtrStrideOp::create(
81+
builder, loc, copyType, srcAlloca, srcOffset);
82+
83+
auto offsetIntoDecayDest = cir::PtrStrideOp::create(
84+
builder, loc, builder.getPointerTo(copyType), destAlloca,
85+
loadCur);
86+
87+
builder.CIRBaseBuilderTy::createStore(loc, ptrToOffsetIntoSrc,
88+
offsetIntoDecayDest);
89+
builder.createYield(loc);
90+
},
91+
/*stepBuilder=*/
92+
[&](mlir::OpBuilder &b, mlir::Location loc) {
93+
// Simple increment of the iterator.
94+
auto load = cir::LoadOp::create(builder, loc, {itr});
95+
auto inc = cir::UnaryOp::create(builder, loc, load.getType(),
96+
cir::UnaryOpKind::Inc, load);
97+
builder.CIRBaseBuilderTy::createStore(loc, inc, itr);
98+
builder.createYield(loc);
99+
});
100+
};
101+
102+
cir::ScopeOp::create(builder, loc,
103+
[&](mlir::OpBuilder &b, mlir::Location loc) {
104+
loopBuilder();
105+
builder.createYield(loc);
106+
});
107+
}
39108

40109
mlir::Value OpenACCRecipeBuilderBase::makeBoundsAlloca(
41110
mlir::Block *block, SourceRange exprRange, mlir::Location loc,
@@ -78,6 +147,10 @@ mlir::Value OpenACCRecipeBuilderBase::makeBoundsAlloca(
78147

79148
bool lastBoundWasArray = isArrayTy(boundTypes.back());
80149

150+
// Make sure we track a moving version of this so we can get our
151+
// 'copying' back to correct.
152+
mlir::Value lastAlloca = initialAlloca;
153+
81154
// Since we're iterating the types in reverse, this sets up for each index
82155
// corresponding to the boundsRange to be the 'after application of the
83156
// bounds.
@@ -125,14 +198,21 @@ mlir::Value OpenACCRecipeBuilderBase::makeBoundsAlloca(
125198

126199
mlir::Type eltTy = cgf.convertType(resultType);
127200
cir::PointerType ptrTy = builder.getPointerTo(eltTy);
128-
builder.createAlloca(loc, ptrTy, eltTy, "openacc.init.bounds",
129-
cgf.getContext().getTypeAlignInChars(resultType),
130-
curSize);
131-
132-
// TODO: OpenACC : At this point we should be copying the addresses of
133-
// each element of this to the last allocation. At the moment, that is
134-
// not yet implemented.
135-
cgf.cgm.errorNYI(exprRange, "OpenACC recipe alloca copying");
201+
mlir::Value curAlloca = builder.createAlloca(
202+
loc, ptrTy, eltTy, "openacc.init.bounds",
203+
cgf.getContext().getTypeAlignInChars(resultType), curSize);
204+
205+
makeAllocaCopy(loc, ptrTy, cumulativeElts, eltsPerSubArray, lastAlloca,
206+
curAlloca);
207+
lastAlloca = curAlloca;
208+
} else {
209+
// In the case of an array, we just need to decay the pointer, so just do
210+
// a zero-offset stride on the last alloca to decay it down an array
211+
// level.
212+
cir::ConstantOp constZero = builder.getConstInt(loc, itrTy, 0);
213+
lastAlloca = builder.getArrayElement(loc, loc, lastAlloca,
214+
cgf.convertType(resultType),
215+
constZero, /*shouldDecay=*/true);
136216
}
137217

138218
cumulativeElts = eltsToAlloca;

clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@
2424

2525
namespace clang::CIRGen {
2626
class OpenACCRecipeBuilderBase {
27+
// makes the copy of the addresses of an alloca to the previous allocation.
28+
void makeAllocaCopy(mlir::Location loc, mlir::Type copyType,
29+
mlir::Value numEltsToCopy, mlir::Value offsetPerSubarray,
30+
mlir::Value destAlloca, mlir::Value srcAlloca);
2731
// This function generates the required alloca, similar to
2832
// 'emitAutoVarAlloca', except for the OpenACC array/pointer types.
2933
mlir::Value makeBoundsAlloca(mlir::Block *block, SourceRange exprRange,

0 commit comments

Comments
 (0)