Skip to content

Commit 1410c60

Browse files
committed
assume base pointer is uniform
1 parent 0b3131b commit 1410c60

File tree

2 files changed

+16
-16
lines changed

2 files changed

+16
-16
lines changed

src/codegen.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1534,12 +1534,14 @@ FunctionType *gc_loaded_vN_ft(LLVMContext &C, int N) {
15341534
return FunctionType::get(
15351535
FixedVectorType::get(PointerType::get(JuliaType::get_prjlvalue_ty(C), AddressSpace::Loaded), N),
15361536
{
1537-
FixedVectorType::get(JuliaType::get_prjlvalue_ty(C), N),
1537+
JuliaType::get_prjlvalue_ty(C),
15381538
FixedVectorType::get(PointerType::get(JuliaType::get_prjlvalue_ty(C), 0), N)
15391539
}, false);
15401540
}
15411541

15421542
// nonnull / nocapture / readnone don't apply to vectors of pointers
1543+
// we assume that the most common case is that the base pointer is uniform
1544+
// but we intend to form a gather/scatter.
15431545
static const auto gc_loaded_v2_func = new JuliaFunction<>{
15441546
"julia.gc_loaded.v2",
15451547
[](LLVMContext &C) { return gc_loaded_vN_ft(C, 2); },
@@ -1558,7 +1560,7 @@ static const auto gc_loaded_v2_func = new JuliaFunction<>{
15581560
AttrBuilder RetAttrs(C);
15591561
RetAttrs.addAttribute(Attribute::NoUndef);
15601562
return AttributeList::get(C, AttributeSet::get(C,FnAttrs), AttributeSet::get(C,RetAttrs),
1561-
{ Attributes(C, {Attribute::NoUndef}),
1563+
{ Attributes(C, {Attribute::NonNull, Attribute::NoUndef, Attribute::ReadNone, Attribute::NoCapture}),
15621564
Attributes(C, {Attribute::NoUndef}) });
15631565
},
15641566
};
@@ -1603,9 +1605,9 @@ static const auto gc_loaded_func = new JuliaFunction<>{
16031605
FnAttrs.addAttribute(Attribute::ReadNone);
16041606
#endif
16051607
// XXX: According to langref we should be able to specify:
1606-
// _ZGV_LLVM_Nxvv for vector length agnostic and even
1607-
// _ZGV_LLVM_Nxvv_julia.gc_loaded(_LLVM_Scalarize_julia.gc_loaded) but that seems to not have been implemented
1608-
FnAttrs.addAttribute("vector-function-abi-variant", "_ZGV_LLVM_N2vv_julia.gc_loaded(julia.gc_loaded.v2),_ZGV_LLVM_N4vv_julia.gc_loaded(julia.gc_loaded.v4),_ZGV_LLVM_N8vv_julia.gc_loaded(julia.gc_loaded.v8),_ZGV_LLVM_N16vv_julia.gc_loaded(julia.gc_loaded.v16)");
1608+
// _ZGV_LLVM_Nxuv for vector length agnostic and even
1609+
// _ZGV_LLVM_Nxuv_julia.gc_loaded(_LLVM_Scalarize_julia.gc_loaded) but that seems to not have been implemented
1610+
FnAttrs.addAttribute("vector-function-abi-variant", "_ZGV_LLVM_N2uv_julia.gc_loaded(julia.gc_loaded.v2),_ZGV_LLVM_N4uv_julia.gc_loaded(julia.gc_loaded.v4),_ZGV_LLVM_N8uv_julia.gc_loaded(julia.gc_loaded.v8),_ZGV_LLVM_N16uv_julia.gc_loaded(julia.gc_loaded.v16)");
16091611
AttrBuilder RetAttrs(C);
16101612
RetAttrs.addAttribute(Attribute::NonNull);
16111613
RetAttrs.addAttribute(Attribute::NoUndef);

test/llvmpasses/vectorized_intrinsics.ll

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='loop-vectorize' -force-vector-width=8 -S %s | FileCheck %s --check-prefixes=CHECKV8
66
; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='loop-vectorize' -force-vector-width=16 -S %s | FileCheck %s --check-prefixes=CHECKV16
77

8-
; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='loop-vectorize' -scalable-vectorization=on -force-target-supports-scalable-vectors=true -force-vector-width=2 -S %s | FileCheck %s --check-prefixes=CHECKSCAL
9-
108
source_filename = "vectorized_intrinsics.ll"
119
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
1210
target triple = "x86_64-unknown-linux-gnu"
@@ -33,10 +31,10 @@ L30: ; preds = %L30, %L13.preheader
3331
%memoryref_data = load ptr, ptr addrspace(11) %memory_data_ptr, align 8, !tbaa !15, !invariant.load !17, !alias.scope !18, !noalias !19, !nonnull !17
3432
%memoryref_offset = shl i64 %value_phi3, 3
3533
%3 = call ptr addrspace(13) @julia.gc_loaded(ptr addrspace(10) %memoryref_mem, ptr %memoryref_data)
36-
; CHECKV2: call <2 x ptr addrspace(13)> @julia.gc_loaded.v2
37-
; CHECKV4: call <4 x ptr addrspace(13)> @julia.gc_loaded.v4
38-
; CHECKV8: call <8 x ptr addrspace(13)> @julia.gc_loaded.v8
39-
; CHECKV16: call <16 x ptr addrspace(13)> @julia.gc_loaded.v16
34+
; CHECKV2: call <2 x ptr addrspace(13)> @julia.gc_loaded.v2(ptr addrspace(10) %memoryref_mem, <2 x ptr>
35+
; CHECKV4: call <4 x ptr addrspace(13)> @julia.gc_loaded.v4(ptr addrspace(10) %memoryref_mem, <4 x ptr>
36+
; CHECKV8: call <8 x ptr addrspace(13)> @julia.gc_loaded.v8(ptr addrspace(10) %memoryref_mem, <8 x ptr>
37+
; CHECKV16: call <16 x ptr addrspace(13)> @julia.gc_loaded.v16(ptr addrspace(10) %memoryref_mem, <16 x ptr>
4038
%4 = getelementptr i8, ptr addrspace(13) %3, i64 %memoryref_offset
4139
%memoryref_data6 = getelementptr i8, ptr addrspace(13) %4, i64 -8
4240
store i64 4607182418800017408, ptr addrspace(13) %memoryref_data6, align 8, !tbaa !20, !alias.scope !22, !noalias !23
@@ -57,19 +55,19 @@ declare ptr @julia.get_pgcstack()
5755
declare noundef nonnull ptr addrspace(13) @julia.gc_loaded(ptr addrspace(10) nocapture noundef nonnull readnone, ptr noundef nonnull readnone) #1
5856

5957
; Function Attrs: norecurse nosync nounwind speculatable willreturn memory(none)
60-
declare noundef <2 x ptr addrspace(13)> @julia.gc_loaded.v2(<2 x ptr addrspace(10)> noundef, <2 x ptr> noundef) #2
58+
declare noundef <2 x ptr addrspace(13)> @julia.gc_loaded.v2(ptr addrspace(10) nocapture noundef nonnull readnone, <2 x ptr> noundef) #2
6159

6260
; Function Attrs: norecurse nosync nounwind speculatable willreturn memory(none)
63-
declare noundef <4 x ptr addrspace(13)> @julia.gc_loaded.v4(<4 x ptr addrspace(10)> noundef, <4 x ptr> noundef) #2
61+
declare noundef <4 x ptr addrspace(13)> @julia.gc_loaded.v4(ptr addrspace(10) nocapture noundef nonnull readnone, <4 x ptr> noundef) #2
6462

6563
; Function Attrs: norecurse nosync nounwind speculatable willreturn memory(none)
66-
declare noundef <8 x ptr addrspace(13)> @julia.gc_loaded.v8(<8 x ptr addrspace(10)> noundef, <8 x ptr> noundef) #2
64+
declare noundef <8 x ptr addrspace(13)> @julia.gc_loaded.v8(ptr addrspace(10) nocapture noundef nonnull readnone, <8 x ptr> noundef) #2
6765

6866
; Function Attrs: norecurse nosync nounwind speculatable willreturn memory(none)
69-
declare noundef <16 x ptr addrspace(13)> @julia.gc_loaded.v16(<16 x ptr addrspace(10)> noundef, <16 x ptr> noundef) #2
67+
declare noundef <16 x ptr addrspace(13)> @julia.gc_loaded.v16(ptr addrspace(10) nocapture noundef nonnull readnone, <16 x ptr> noundef) #2
7068

7169
attributes #0 = { "frame-pointer"="all" "julia.fsig"="var\22#5\22(FixedSizeArrays.FixedSizeArray{Float64, 1, Memory{Float64}})" "probe-stack"="inline-asm" }
72-
attributes #1 = { norecurse nosync nounwind speculatable willreturn memory(none) "vector-function-abi-variant"="_ZGV_LLVM_N2vv_julia.gc_loaded(julia.gc_loaded.v2),_ZGV_LLVM_N4vv_julia.gc_loaded(julia.gc_loaded.v4),_ZGV_LLVM_N8vv_julia.gc_loaded(julia.gc_loaded.v8),_ZGV_LLVM_N16vv_julia.gc_loaded(julia.gc_loaded.v16)" }
70+
attributes #1 = { norecurse nosync nounwind speculatable willreturn memory(none) "vector-function-abi-variant"="_ZGV_LLVM_N2uv_julia.gc_loaded(julia.gc_loaded.v2),_ZGV_LLVM_N4uv_julia.gc_loaded(julia.gc_loaded.v4),_ZGV_LLVM_N8uv_julia.gc_loaded(julia.gc_loaded.v8),_ZGV_LLVM_N16uv_julia.gc_loaded(julia.gc_loaded.v16)" }
7371
attributes #2 = { norecurse nosync nounwind speculatable willreturn memory(none) }
7472

7573
!llvm.module.flags = !{!0, !1, !2}

0 commit comments

Comments
 (0)