Skip to content

Commit eca2fcb

Browse files
authored
[AMDGPU] Fix cost of fast unsafe f32 fdiv (#68988)
1 parent 9451004 commit eca2fcb

File tree

2 files changed

+71
-0
lines changed

2 files changed

+71
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -650,6 +650,15 @@ InstructionCost GCNTTIImpl::getArithmeticInstrCost(
650650
return LT.first * Cost * NElts;
651651
}
652652

653+
if (SLT == MVT::f32 && ((CxtI && CxtI->hasApproxFunc()) ||
654+
TLI->getTargetMachine().Options.UnsafeFPMath)) {
655+
// Fast unsafe fdiv lowering:
656+
// f32 rcp
657+
// f32 fmul
658+
int Cost = getQuarterRateInstrCost(CostKind) + getFullRateInstrCost();
659+
return LT.first * Cost * NElts;
660+
}
661+
653662
if (SLT == MVT::f32 || SLT == MVT::f16) {
654663
// 4 more v_cvt_* insts without f16 insts support
655664
int Cost = (SLT == MVT::f16 ? 14 : 10) * getFullRateInstrCost() +

llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,37 @@ define amdgpu_kernel void @fdiv_f32_ieee() #0 {
4343
ret void
4444
}
4545

46+
define amdgpu_kernel void @fdiv_f32_afn_ieee() #0 {
47+
; ALL-LABEL: 'fdiv_f32_afn_ieee'
48+
; ALL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f32 = fdiv afn float undef, undef
49+
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2f32 = fdiv afn <2 x float> undef, undef
50+
; ALL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v3f32 = fdiv afn <3 x float> undef, undef
51+
; ALL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4f32 = fdiv afn <4 x float> undef, undef
52+
; ALL-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v5f32 = fdiv afn <5 x float> undef, undef
53+
; ALL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v8f32 = fdiv afn <8 x float> undef, undef
54+
; ALL-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %v9f32 = fdiv afn <9 x float> undef, undef
55+
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
56+
;
57+
; ALL-SIZE-LABEL: 'fdiv_f32_afn_ieee'
58+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f32 = fdiv afn float undef, undef
59+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f32 = fdiv afn <2 x float> undef, undef
60+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v3f32 = fdiv afn <3 x float> undef, undef
61+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4f32 = fdiv afn <4 x float> undef, undef
62+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v5f32 = fdiv afn <5 x float> undef, undef
63+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8f32 = fdiv afn <8 x float> undef, undef
64+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %v9f32 = fdiv afn <9 x float> undef, undef
65+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
66+
;
67+
%f32 = fdiv afn float undef, undef
68+
%v2f32 = fdiv afn <2 x float> undef, undef
69+
%v3f32 = fdiv afn <3 x float> undef, undef
70+
%v4f32 = fdiv afn <4 x float> undef, undef
71+
%v5f32 = fdiv afn <5 x float> undef, undef
72+
%v8f32 = fdiv afn <8 x float> undef, undef
73+
%v9f32 = fdiv afn <9 x float> undef, undef
74+
ret void
75+
}
76+
4677
define amdgpu_kernel void @fdiv_f32_ftzdaz() #1 {
4778
; ALL-LABEL: 'fdiv_f32_ftzdaz'
4879
; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %f32 = fdiv float undef, undef
@@ -74,6 +105,37 @@ define amdgpu_kernel void @fdiv_f32_ftzdaz() #1 {
74105
ret void
75106
}
76107

108+
define amdgpu_kernel void @fdiv_f32_afn_ftzdaz() #1 {
109+
; ALL-LABEL: 'fdiv_f32_afn_ftzdaz'
110+
; ALL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f32 = fdiv afn float undef, undef
111+
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2f32 = fdiv afn <2 x float> undef, undef
112+
; ALL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v3f32 = fdiv afn <3 x float> undef, undef
113+
; ALL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4f32 = fdiv afn <4 x float> undef, undef
114+
; ALL-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v5f32 = fdiv afn <5 x float> undef, undef
115+
; ALL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v8f32 = fdiv afn <8 x float> undef, undef
116+
; ALL-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %v9f32 = fdiv afn <9 x float> undef, undef
117+
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
118+
;
119+
; ALL-SIZE-LABEL: 'fdiv_f32_afn_ftzdaz'
120+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f32 = fdiv afn float undef, undef
121+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f32 = fdiv afn <2 x float> undef, undef
122+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v3f32 = fdiv afn <3 x float> undef, undef
123+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4f32 = fdiv afn <4 x float> undef, undef
124+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v5f32 = fdiv afn <5 x float> undef, undef
125+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8f32 = fdiv afn <8 x float> undef, undef
126+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %v9f32 = fdiv afn <9 x float> undef, undef
127+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
128+
;
129+
%f32 = fdiv afn float undef, undef
130+
%v2f32 = fdiv afn <2 x float> undef, undef
131+
%v3f32 = fdiv afn <3 x float> undef, undef
132+
%v4f32 = fdiv afn <4 x float> undef, undef
133+
%v5f32 = fdiv afn <5 x float> undef, undef
134+
%v8f32 = fdiv afn <8 x float> undef, undef
135+
%v9f32 = fdiv afn <9 x float> undef, undef
136+
ret void
137+
}
138+
77139
define amdgpu_kernel void @fdiv_f64() #0 {
78140
; CIFASTF64-LABEL: 'fdiv_f64'
79141
; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %f64 = fdiv double undef, undef

0 commit comments

Comments
 (0)