Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -1053,18 +1053,18 @@ def : BFEPattern ; def : Pat< - (fcanonicalize f16:$src), - (V_MUL_F16_e64 0, (i32 CONST.FP16_ONE), 0, $src, 0, 0) + (fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))), + (V_MUL_F16_e64 0, (i32 CONST.FP16_ONE), $src_mods, $src, 0, 0) >; def : Pat< - (fcanonicalize f32:$src), - (V_MUL_F32_e64 0, (i32 CONST.FP32_ONE), 0, $src, 0, 0) + (fcanonicalize (f32 (VOP3Mods f32:$src, i32:$src_mods))), + (V_MUL_F32_e64 0, (i32 CONST.FP32_ONE), $src_mods, $src, 0, 0) >; def : Pat< - (fcanonicalize f64:$src), - (V_MUL_F64 0, CONST.FP64_ONE, 0, $src, 0, 0) + (fcanonicalize (f64 (VOP3Mods f64:$src, i32:$src_mods))), + (V_MUL_F64 0, CONST.FP64_ONE, $src_mods, $src, 0, 0) >; // Allow integer inputs Index: test/CodeGen/AMDGPU/fcanonicalize.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fcanonicalize.f16.ll +++ test/CodeGen/AMDGPU/fcanonicalize.f16.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +declare half @llvm.fabs.f16(half) #0 declare half @llvm.canonicalize.f16(half) #0 ; GCN-LABEL: {{^}}v_test_canonicalize_var_f16: @@ -22,6 +23,40 @@ ret void } +; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_f16: +; GCN: v_mul_f16_e64 [[REG:v[0-9]+]], 1.0, |{{v[0-9]+}}| +; GCN: buffer_store_short [[REG]] +define void @v_test_canonicalize_fabs_var_f16(half addrspace(1)* %out) #1 { + %val = load half, half addrspace(1)* %out + %val.fabs = call half @llvm.fabs.f16(half %val) + %canonicalized = call half @llvm.canonicalize.f16(half %val.fabs) + store half %canonicalized, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_f16: +; GCN: v_mul_f16_e64 [[REG:v[0-9]+]], 1.0, -|{{v[0-9]+}}| +; GCN: buffer_store_short [[REG]] +define void @v_test_canonicalize_fneg_fabs_var_f16(half addrspace(1)* %out) #1 { + %val = load half, half addrspace(1)* %out + %val.fabs = call half @llvm.fabs.f16(half %val) + %val.fabs.fneg = fsub half -0.0, %val.fabs + %canonicalized = call half @llvm.canonicalize.f16(half %val.fabs.fneg) + store half %canonicalized, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_f16: +; GCN: v_mul_f16_e64 [[REG:v[0-9]+]], 1.0, -{{v[0-9]+}} +; GCN: buffer_store_short [[REG]] +define void @v_test_canonicalize_fneg_var_f16(half addrspace(1)* %out) #1 { + %val = load half, half addrspace(1)* %out + %val.fneg = fsub half -0.0, %val + %canonicalized = call half @llvm.canonicalize.f16(half %val.fneg) + store half %canonicalized, half addrspace(1)* %out + ret void +} + ; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f16: ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} ; GCN: buffer_store_short [[REG]] Index: test/CodeGen/AMDGPU/fcanonicalize.ll =================================================================== --- test/CodeGen/AMDGPU/fcanonicalize.ll +++ test/CodeGen/AMDGPU/fcanonicalize.ll @@ -1,6 +1,8 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +declare float @llvm.fabs.f32(float) #0 declare float @llvm.canonicalize.f32(float) #0 +declare double @llvm.fabs.f64(double) #0 declare double @llvm.canonicalize.f64(double) #0 ; GCN-LABEL: {{^}}v_test_canonicalize_var_f32: @@ -22,6 +24,40 @@ ret void } +; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_f32: +; GCN: v_mul_f32_e64 [[REG:v[0-9]+]], 1.0, |{{v[0-9]+}}| +; GCN: buffer_store_dword [[REG]] +define void @v_test_canonicalize_fabs_var_f32(float addrspace(1)* %out) #1 { + %val = load float, float addrspace(1)* %out + %val.fabs = call float @llvm.fabs.f32(float %val) + %canonicalized = call float @llvm.canonicalize.f32(float %val.fabs) + store float %canonicalized, float addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_f32: +; GCN: v_mul_f32_e64 [[REG:v[0-9]+]], 1.0, -|{{v[0-9]+}}| +; GCN: buffer_store_dword [[REG]] +define void @v_test_canonicalize_fneg_fabs_var_f32(float addrspace(1)* %out) #1 { + %val = load float, float addrspace(1)* %out + %val.fabs = call float @llvm.fabs.f32(float %val) + %val.fabs.fneg = fsub float -0.0, %val.fabs + %canonicalized = call float @llvm.canonicalize.f32(float %val.fabs.fneg) + store float %canonicalized, float addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_f32: +; GCN: v_mul_f32_e64 [[REG:v[0-9]+]], 1.0, -{{v[0-9]+}} +; GCN: buffer_store_dword [[REG]] +define void @v_test_canonicalize_fneg_var_f32(float addrspace(1)* %out) #1 { + %val = load float, float addrspace(1)* %out + %val.fneg = fsub float -0.0, %val + %canonicalized = call float @llvm.canonicalize.f32(float %val.fneg) + store float %canonicalized, float addrspace(1)* %out + ret void +} + ; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f32: ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} ; GCN: buffer_store_dword [[REG]] @@ -185,6 +221,40 @@ ret void } +; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_f64: +; GCN: v_mul_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 1.0, |{{v\[[0-9]+:[0-9]+\]}}| +; GCN: buffer_store_dwordx2 [[REG]] +define void @v_test_canonicalize_fabs_var_f64(double addrspace(1)* %out) #1 { + %val = load double, double addrspace(1)* %out + %val.fabs = call double @llvm.fabs.f64(double %val) + %canonicalized = call double @llvm.canonicalize.f64(double %val.fabs) + store double %canonicalized, double addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_f64: +; GCN: v_mul_f64 [[REG:v\[[0-9]+:[0-9]\]]], 1.0, -|{{v\[[0-9]+:[0-9]+\]}}| +; GCN: buffer_store_dwordx2 [[REG]] +define void @v_test_canonicalize_fneg_fabs_var_f64(double addrspace(1)* %out) #1 { + %val = load double, double addrspace(1)* %out + %val.fabs = call double @llvm.fabs.f64(double %val) + %val.fabs.fneg = fsub double -0.0, %val.fabs + %canonicalized = call double @llvm.canonicalize.f64(double %val.fabs.fneg) + store double %canonicalized, double addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_f64: +; GCN: v_mul_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 1.0, -{{v\[[0-9]+:[0-9]+\]}} +; GCN: buffer_store_dwordx2 [[REG]] +define void @v_test_canonicalize_fneg_var_f64(double addrspace(1)* %out) #1 { + %val = load double, double addrspace(1)* %out + %val.fneg = fsub double -0.0, %val + %canonicalized = call double @llvm.canonicalize.f64(double %val.fneg) + store double %canonicalized, double addrspace(1)* %out + ret void +} + ; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f64: ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}