@@ -126,7 +126,7 @@ define amdgpu_kernel void @fmul_x2_xn3_f32(float addrspace(1)* %out, float %x, f
126
126
; GFX8_10: v_mul_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
127
127
; VI-FLUSH: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, 1.0
128
128
; VI-DENORM: v_fma_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, 1.0
129
- ; GFX10-DENORM: v_fmac_f16_e64 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}
129
+ ; GFX10-DENORM: v_fma_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, 1.0
130
130
; GFX10-FLUSH: v_sub_f16_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
131
131
define amdgpu_kernel void @multiple_fadd_use_test_f16 (half addrspace (1 )* %out , i16 zeroext %x.arg , i16 zeroext %y.arg , i16 zeroext %z.arg ) #0 {
132
132
%x = bitcast i16 %x.arg to half
@@ -152,7 +152,7 @@ define amdgpu_kernel void @multiple_fadd_use_test_f16(half addrspace(1)* %out, i
152
152
; VI-FLUSH-DAG: v_mac_f16_e64 [[MAD:v[0-9]+]], [[X]], 2.0
153
153
; VI-DENORM-DAG: v_fma_f16 [[MAD:v[0-9]+]], [[X]], 2.0, v{{[0-9]+}}
154
154
; GFX10-FLUSH-DAG: v_add_f16_e32 [[MAD:v[0-9]+]], s{{[0-9]+}}, [[MUL2]]
155
- ; GFX10-DENORM-DAG: v_fmac_f16_e64 [[MAD:v[0-9]+]], [[X]], 2.0
155
+ ; GFX10-DENORM-DAG: v_fma_f16 [[MAD:v[0-9]+]], [[X]], 2.0, s{{[0-9]+}}
156
156
157
157
; GCN-DAG: buffer_store_short [[MUL2]]
158
158
; GCN-DAG: buffer_store_short [[MAD]]
@@ -174,7 +174,7 @@ define amdgpu_kernel void @multiple_use_fadd_fmac_f16(half addrspace(1)* %out, i
174
174
; VI-FLUSH-DAG: v_mad_f16 [[MAD:v[0-9]+]], |[[X]]|, 2.0, v{{[0-9]+}}
175
175
; VI-DENORM-DAG: v_fma_f16 [[MAD:v[0-9]+]], |[[X]]|, 2.0, v{{[0-9]+}}
176
176
; GFX10-FLUSH-DAG: v_add_f16_e32 [[MAD:v[0-9]+]], s{{[0-9]+}}, [[MUL2]]
177
- ; GFX10-DENORM-DAG: v_fmac_f16_e64 [[MAD:v[0-9]+]], |[[X]]|, 2.0
177
+ ; GFX10-DENORM-DAG: v_fma_f16 [[MAD:v[0-9]+]], |[[X]]|, 2.0, s{{[0-9]+}}
178
178
179
179
; GCN-DAG: buffer_store_short [[MUL2]]
180
180
; GCN-DAG: buffer_store_short [[MAD]]
@@ -201,8 +201,8 @@ define amdgpu_kernel void @multiple_use_fadd_fmad_f16(half addrspace(1)* %out, i
201
201
; GFX10-FLUSH: v_add_f16_e64 [[MUL2:v[0-9]+]], |[[X:s[0-9]+]]|, |{{s[0-9]+}}|
202
202
; GFX10-FLUSH: v_add_f16_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[MUL2]]
203
203
; GFX10-FLUSH: v_add_f16_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[MUL2]]
204
- ; GFX10-DENORM: v_fmac_f16_e64 {{v[0-9]+}}, |[[X:s[0-9]+]]|, 2.0
205
- ; GFX10-DENORM: v_fmac_f16_e64 {{v[0-9]+}}, |[[X]]|, 2.0
204
+ ; GFX10-DENORM: v_fma_f16 {{v[0-9]+}}, |[[X:s[0-9]+]]|, 2.0, s{{[0-9]+}}
205
+ ; GFX10-DENORM: v_fma_f16 {{v[0-9]+}}, |[[X]]|, 2.0, s{{[0-9]+}}
206
206
207
207
define amdgpu_kernel void @multiple_use_fadd_multi_fmad_f16 (half addrspace (1 )* %out , i16 zeroext %x.arg , i16 zeroext %y.arg , i16 zeroext %z.arg ) #0 {
208
208
%x = bitcast i16 %x.arg to half
0 commit comments