@@ -16,8 +16,8 @@ define amdgpu_kernel void @noop_fdiv_fpmath(float addrspace(1)* %out, float %a,
16
16
; CHECK: %md.1ulp = fdiv float %a, %b, !fpmath !2
17
17
; CHECK: %md.25ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0
18
18
; CHECK: %md.3ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !3
19
- ; CHECK: %fast.md.25ulp = call fast float @llvm.amdgcn.fdiv.fast(float %a, float %b) , !fpmath !0
20
- ; CHECK: arcp.md.25ulp = call arcp float @llvm.amdgcn.fdiv.fast(float %a, float %b) , !fpmath !0
19
+ ; CHECK: %fast.md.25ulp = fdiv fast float %a, %b , !fpmath !0
20
+ ; CHECK: arcp.md.25ulp = fdiv arcp float %a, %b , !fpmath !0
21
21
define amdgpu_kernel void @fdiv_fpmath (float addrspace (1 )* %out , float %a , float %b ) #1 {
22
22
%no.md = fdiv float %a , %b
23
23
store volatile float %no.md , float addrspace (1 )* %out
@@ -110,15 +110,8 @@ define amdgpu_kernel void @fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2
110
110
; CHECK: %md.half.ulp = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !1
111
111
; CHECK: %arcp.no.md = fdiv arcp <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}}
112
112
; CHECK: %fast.no.md = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}}
113
-
114
- ; CHECK: extractelement <2 x float> %x
115
- ; CHECK: fdiv arcp float 1.000000e+00, %{{[0-9]+}}, !fpmath !0
116
- ; CHECK: extractelement <2 x float> %x
117
- ; CHECK: fdiv arcp float 1.000000e+00, %{{[0-9]+}}, !fpmath !0
118
- ; CHECK: store volatile <2 x float> %arcp.25ulp
119
-
120
- ; CHECK: fdiv fast float 1.000000e+00, %{{[0-9]+}}, !fpmath !0
121
- ; CHECK: fdiv fast float 1.000000e+00, %{{[0-9]+}}, !fpmath !0
113
+ ; CHECK: %arcp.25ulp = fdiv arcp <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !0
114
+ ; CHECK: %fast.25ulp = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !0
122
115
; CHECK: store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
123
116
define amdgpu_kernel void @rcp_fdiv_fpmath_vector (<2 x float > addrspace (1 )* %out , <2 x float > %x ) #1 {
124
117
%no.md = fdiv <2 x float > <float 1 .0 , float 1 .0 >, %x
@@ -146,17 +139,8 @@ define amdgpu_kernel void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out
146
139
; CHECK: %no.md = fdiv <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x
147
140
; CHECK: %arcp.no.md = fdiv arcp <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x
148
141
; CHECK: %fast.no.md = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x{{$}}
149
-
150
- ; CHECK: %[[X0:[0-9]+]] = extractelement <2 x float> %x, i64 0
151
- ; CHECK: fdiv arcp float 1.000000e+00, %[[X0]], !fpmath !0
152
- ; CHECK: %[[X1:[0-9]+]] = extractelement <2 x float> %x, i64 1
153
- ; CHECK: fdiv arcp float 2.000000e+00, %[[X1]], !fpmath !0
154
- ; CHECK: store volatile <2 x float> %arcp.25ulp
155
-
156
- ; CHECK: %[[X0:[0-9]+]] = extractelement <2 x float> %x, i64 0
157
- ; CHECK: fdiv fast float 1.000000e+00, %[[X0]], !fpmath !0
158
- ; CHECK: %[[X1:[0-9]+]] = extractelement <2 x float> %x, i64 1
159
- ; CHECK: fdiv fast float 2.000000e+00, %[[X1]], !fpmath !0
142
+ ; CHECK: %arcp.25ulp = fdiv arcp <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x, !fpmath !0
143
+ ; CHECK: %fast.25ulp = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x, !fpmath !0
160
144
; CHECK: store volatile <2 x float> %fast.25ulp
161
145
define amdgpu_kernel void @rcp_fdiv_fpmath_vector_nonsplat (<2 x float > addrspace (1 )* %out , <2 x float > %x ) #1 {
162
146
%no.md = fdiv <2 x float > <float 1 .0 , float 2 .0 >, %x
@@ -179,12 +163,10 @@ define amdgpu_kernel void @rcp_fdiv_fpmath_vector_nonsplat(<2 x float> addrspace
179
163
180
164
; FIXME: Should be able to get fdiv for 1.0 component
181
165
; CHECK-LABEL: @rcp_fdiv_fpmath_vector_partial_constant(
182
- ; CHECK: call arcp float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0
183
- ; CHECK: call arcp float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0
166
+ ; CHECK: %arcp.25ulp = fdiv arcp <2 x float> %x.insert, %y, !fpmath !0
184
167
; CHECK: store volatile <2 x float> %arcp.25ulp
185
168
186
- ; CHECK: call fast float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0
187
- ; CHECK: call fast float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0
169
+ ; CHECK: %fast.25ulp = fdiv fast <2 x float> %x.insert, %y, !fpmath !0
188
170
; CHECK: store volatile <2 x float> %fast.25ulp
189
171
define amdgpu_kernel void @rcp_fdiv_fpmath_vector_partial_constant (<2 x float > addrspace (1 )* %out , <2 x float > %x , <2 x float > %y ) #1 {
190
172
%x.insert = insertelement <2 x float > %x , float 1 .0 , i32 0
@@ -204,8 +186,8 @@ define amdgpu_kernel void @rcp_fdiv_fpmath_vector_partial_constant(<2 x float> a
204
186
; CHECK: %md.1ulp = fdiv float %a, %b, !fpmath !2
205
187
; CHECK: %md.25ulp = fdiv float %a, %b, !fpmath !0
206
188
; CHECK: %md.3ulp = fdiv float %a, %b, !fpmath !3
207
- ; CHECK: call fast float @llvm.amdgcn. fdiv. fast( float %a, float %b) , !fpmath !0
208
- ; CHECK: call arcp float @llvm.amdgcn. fdiv.fast( float %a, float %b) , !fpmath !0
189
+ ; CHECK: % fast.md.25ulp = fdiv fast float %a, %b , !fpmath !0
190
+ ; CHECK: % arcp.md.25ulp = fdiv arcp float %a, %b , !fpmath !0
209
191
define amdgpu_kernel void @fdiv_fpmath_f32_denormals (float addrspace (1 )* %out , float %a , float %b ) #2 {
210
192
%no.md = fdiv float %a , %b
211
193
store volatile float %no.md , float addrspace (1 )* %out
0 commit comments