Skip to content

Commit cbe656f

Browse files
committed
[Clang] [WIP] Added builtin_alloca support for OpenCL1.2 and below
The __builtin_alloca was returning a flat pointer with no address space when compiled using openCL1.2 or below but worked fine with openCL2.0 and above. This accounts to the fact that later uses the concept of generic address space which supports cast to other address space(i.e to private address space which is used for stack allocation) . So, in case of openCL1.2 and below __built_alloca is supposed to return pointer to private address space to eliminate the need of casting as not supported here. Thus,it requires redefintion of the builtin function with appropraite return pointer to appropriate address space.
1 parent 1539989 commit cbe656f

File tree

3 files changed

+106
-3
lines changed

3 files changed

+106
-3
lines changed

clang/lib/Sema/SemaExpr.cpp

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6231,7 +6231,10 @@ bool Sema::CheckArgsForPlaceholders(MultiExprArg args) {
62316231
/// it does not contain any pointer arguments without
62326232
/// an address space qualifer. Otherwise the rewritten
62336233
/// FunctionDecl is returned.
6234-
/// TODO: Handle pointer return types.
6234+
///
6235+
/// Pointer return type with no explicit address space is assigned the
6236+
/// default address space where pointer points to based on the language
6237+
/// option used to compile it.
62356238
static FunctionDecl *rewriteBuiltinFunctionDecl(Sema *Sema, ASTContext &Context,
62366239
FunctionDecl *FDecl,
62376240
MultiExprArg ArgExprs) {
@@ -6275,13 +6278,27 @@ static FunctionDecl *rewriteBuiltinFunctionDecl(Sema *Sema, ASTContext &Context,
62756278
OverloadParams.push_back(Context.getPointerType(PointeeType));
62766279
}
62776280

6281+
QualType ReturnTy = FT->getReturnType();
6282+
QualType OverloadReturnTy = ReturnTy;
6283+
if (ReturnTy->isPointerType() &&
6284+
!ReturnTy->getPointeeType().hasAddressSpace()) {
6285+
if (Sema->getLangOpts().OpenCL) {
6286+
NeedsNewDecl = true;
6287+
6288+
QualType ReturnPtTy = ReturnTy->getPointeeType();
6289+
LangAS defClAS = Context.getDefaultOpenCLPointeeAddrSpace();
6290+
ReturnPtTy = Context.getAddrSpaceQualType(ReturnPtTy, defClAS);
6291+
OverloadReturnTy = Context.getPointerType(ReturnPtTy);
6292+
}
6293+
}
6294+
62786295
if (!NeedsNewDecl)
62796296
return nullptr;
62806297

62816298
FunctionProtoType::ExtProtoInfo EPI;
62826299
EPI.Variadic = FT->isVariadic();
6283-
QualType OverloadTy = Context.getFunctionType(FT->getReturnType(),
6284-
OverloadParams, EPI);
6300+
QualType OverloadTy =
6301+
Context.getFunctionType(OverloadReturnTy, OverloadParams, EPI);
62856302
DeclContext *Parent = FDecl->getParent();
62866303
FunctionDecl *OverloadDecl = FunctionDecl::Create(
62876304
Context, Parent, FDecl->getLocation(), FDecl->getLocation(),
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL1.2 -emit-llvm -o - | FileCheck --check-prefix=OPENCL12 %s
3+
// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL2.0 -emit-llvm -o - | FileCheck --check-prefix=OPENCL20 %s
4+
// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 -emit-llvm -o - | FileCheck --check-prefix=OPENCL30 %s
5+
// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 -cl-ext=+__opencl_c_generic_address_space -emit-llvm -o - | FileCheck --check-prefix=OPENCL30-EXT %s
6+
7+
// OPENCL12-LABEL: define dso_local ptr addrspace(5) @test1(
8+
// OPENCL12-SAME: ) #[[ATTR0:[0-9]+]] {
9+
// OPENCL12-NEXT: [[ENTRY:.*:]]
10+
// OPENCL12-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
11+
// OPENCL12-NEXT: [[TMP0:%.*]] = alloca i8, i64 128, align 8, addrspace(5)
12+
// OPENCL12-NEXT: store ptr addrspace(5) [[TMP0]], ptr addrspace(5) [[ALLOC_PTR]], align 4
13+
// OPENCL12-NEXT: [[TMP1:%.*]] = load ptr addrspace(5), ptr addrspace(5) [[ALLOC_PTR]], align 4
14+
// OPENCL12-NEXT: ret ptr addrspace(5) [[TMP1]]
15+
//
16+
// OPENCL20-LABEL: define dso_local ptr @test1(
17+
// OPENCL20-SAME: ) #[[ATTR0:[0-9]+]] {
18+
// OPENCL20-NEXT: [[ENTRY:.*:]]
19+
// OPENCL20-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5)
20+
// OPENCL20-NEXT: [[TMP0:%.*]] = alloca i8, i64 128, align 8, addrspace(5)
21+
// OPENCL20-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr
22+
// OPENCL20-NEXT: store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 8
23+
// OPENCL20-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[ALLOC_PTR]], align 8
24+
// OPENCL20-NEXT: ret ptr [[TMP2]]
25+
//
26+
// OPENCL30-LABEL: define dso_local ptr addrspace(5) @test1(
27+
// OPENCL30-SAME: ) #[[ATTR0:[0-9]+]] {
28+
// OPENCL30-NEXT: [[ENTRY:.*:]]
29+
// OPENCL30-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
30+
// OPENCL30-NEXT: [[TMP0:%.*]] = alloca i8, i64 128, align 8, addrspace(5)
31+
// OPENCL30-NEXT: store ptr addrspace(5) [[TMP0]], ptr addrspace(5) [[ALLOC_PTR]], align 4
32+
// OPENCL30-NEXT: [[TMP1:%.*]] = load ptr addrspace(5), ptr addrspace(5) [[ALLOC_PTR]], align 4
33+
// OPENCL30-NEXT: ret ptr addrspace(5) [[TMP1]]
34+
//
35+
// OPENCL30-EXT-LABEL: define dso_local ptr @test1(
36+
// OPENCL30-EXT-SAME: ) #[[ATTR0:[0-9]+]] {
37+
// OPENCL30-EXT-NEXT: [[ENTRY:.*:]]
38+
// OPENCL30-EXT-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5)
39+
// OPENCL30-EXT-NEXT: [[TMP0:%.*]] = alloca i8, i64 128, align 8, addrspace(5)
40+
// OPENCL30-EXT-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr
41+
// OPENCL30-EXT-NEXT: store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 8
42+
// OPENCL30-EXT-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[ALLOC_PTR]], align 8
43+
// OPENCL30-EXT-NEXT: ret ptr [[TMP2]]
44+
//
45+
float* test1() {
46+
float* alloc_ptr = (float*)__builtin_alloca(32 * sizeof(int));
47+
return alloc_ptr;
48+
}
49+
50+
// OPENCL12-LABEL: define dso_local void @test2(
51+
// OPENCL12-SAME: ) #[[ATTR0]] {
52+
// OPENCL12-NEXT: [[ENTRY:.*:]]
53+
// OPENCL12-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
54+
// OPENCL12-NEXT: [[TMP0:%.*]] = alloca i8, i64 28, align 8, addrspace(5)
55+
// OPENCL12-NEXT: store ptr addrspace(5) [[TMP0]], ptr addrspace(5) [[ALLOC_PTR]], align 4
56+
// OPENCL12-NEXT: ret void
57+
//
58+
// OPENCL20-LABEL: define dso_local void @test2(
59+
// OPENCL20-SAME: ) #[[ATTR0]] {
60+
// OPENCL20-NEXT: [[ENTRY:.*:]]
61+
// OPENCL20-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5)
62+
// OPENCL20-NEXT: [[TMP0:%.*]] = alloca i8, i64 28, align 8, addrspace(5)
63+
// OPENCL20-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr
64+
// OPENCL20-NEXT: store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 8
65+
// OPENCL20-NEXT: ret void
66+
//
67+
// OPENCL30-LABEL: define dso_local void @test2(
68+
// OPENCL30-SAME: ) #[[ATTR0]] {
69+
// OPENCL30-NEXT: [[ENTRY:.*:]]
70+
// OPENCL30-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
71+
// OPENCL30-NEXT: [[TMP0:%.*]] = alloca i8, i64 28, align 8, addrspace(5)
72+
// OPENCL30-NEXT: store ptr addrspace(5) [[TMP0]], ptr addrspace(5) [[ALLOC_PTR]], align 4
73+
// OPENCL30-NEXT: ret void
74+
//
75+
// OPENCL30-EXT-LABEL: define dso_local void @test2(
76+
// OPENCL30-EXT-SAME: ) #[[ATTR0]] {
77+
// OPENCL30-EXT-NEXT: [[ENTRY:.*:]]
78+
// OPENCL30-EXT-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5)
79+
// OPENCL30-EXT-NEXT: [[TMP0:%.*]] = alloca i8, i64 28, align 8, addrspace(5)
80+
// OPENCL30-EXT-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr
81+
// OPENCL30-EXT-NEXT: store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 8
82+
// OPENCL30-EXT-NEXT: ret void
83+
//
84+
void test2() {
85+
void *alloc_ptr = __builtin_alloca(28);
86+
}

clang/test/CodeGenOpenCL/memcpy.cl

100644100755
File mode changed.

0 commit comments

Comments
 (0)