diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -4965,6 +4965,17 @@ return DAG.getSetCC(dl, VT, N0, N1, NewCond); } } + + if (isOperationLegalOrCustom(ISD::IS_FPCLASS, N0.getValueType())) { + bool IsFabs = N0.getOpcode() == ISD::FABS; + SDValue Op = IsFabs ? N0.getOperand(0) : N0; + if (Cond == ISD::SETOEQ && CFP->isInfinity()) { + FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf) + : (IsFabs ? fcInf : fcPosInf); + return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op, + DAG.getConstant(Flag, dl, MVT::i32, true)); + } + } } if (N0 == N1) { diff --git a/llvm/test/CodeGen/AMDGPU/fp-classify.ll b/llvm/test/CodeGen/AMDGPU/fp-classify.ll --- a/llvm/test/CodeGen/AMDGPU/fp-classify.ll +++ b/llvm/test/CodeGen/AMDGPU/fp-classify.ll @@ -610,10 +610,10 @@ ; SI-NEXT: s_load_dword s0, s[0:1], 0xb ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_mov_b32 s1, 0x7f800000 +; SI-NEXT: v_mov_b32_e32 v0, 0x200 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_cvt_f32_f16_e64 v0, |s0| -; SI-NEXT: v_cmp_eq_f32_e32 vcc, s1, v0 +; SI-NEXT: v_cvt_f32_f16_e64 v1, |s0| +; SI-NEXT: v_cmp_class_f32_e32 vcc, v1, v0 ; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/fract-match.ll b/llvm/test/CodeGen/AMDGPU/fract-match.ll --- a/llvm/test/CodeGen/AMDGPU/fract-match.ll +++ b/llvm/test/CodeGen/AMDGPU/fract-match.ll @@ -1754,12 +1754,12 @@ ; GFX6-NEXT: v_min_f32_e32 v7, 0x3f7fffff, v7 ; GFX6-NEXT: v_cndmask_b32_e32 v6, v6, v1, vcc ; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX6-NEXT: s_movk_i32 s10, 0x204 +; GFX6-NEXT: v_mov_b32_e32 v8, 0x200 ; GFX6-NEXT: v_cndmask_b32_e32 v7, v7, v0, vcc -; GFX6-NEXT: v_cmp_class_f32_e64 s[8:9], v0, s10 +; GFX6-NEXT: v_cmp_class_f32_e64 s[8:9], |v0|, v8 ; GFX6-NEXT: s_mov_b32 s6, 0 ; GFX6-NEXT: v_cndmask_b32_e64 v0, v7, 0, s[8:9] -; GFX6-NEXT: v_cmp_class_f32_e64 s[8:9], v1, s10 +; GFX6-NEXT: v_cmp_class_f32_e64 s[8:9], |v1|, v8 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_mov_b32 s4, s6 ; GFX6-NEXT: s_mov_b32 s5, s6 @@ -2189,15 +2189,15 @@ ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX8-NEXT: s_movk_i32 s6, 0x204 +; GFX8-NEXT: v_mov_b32_e32 v7, 0x200 ; GFX8-NEXT: v_floor_f16_e32 v4, v3 ; GFX8-NEXT: v_floor_f16_e32 v5, v0 ; GFX8-NEXT: v_fract_f16_e32 v6, v3 -; GFX8-NEXT: v_cmp_class_f16_e64 s[4:5], v3, s6 +; GFX8-NEXT: v_cmp_class_f16_e64 s[4:5], |v3|, v7 ; GFX8-NEXT: v_pack_b32_f16 v4, v5, v4 ; GFX8-NEXT: v_fract_f16_e32 v5, v0 ; GFX8-NEXT: v_cndmask_b32_e64 v3, v6, 0, s[4:5] -; GFX8-NEXT: v_cmp_class_f16_e64 s[4:5], v0, s6 +; GFX8-NEXT: v_cmp_class_f16_e64 s[4:5], |v0|, v7 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v5, 0, s[4:5] ; GFX8-NEXT: v_pack_b32_f16 v0, v0, v3 ; GFX8-NEXT: global_store_dword v[1:2], v4, off @@ -2213,10 +2213,10 @@ ; GFX11-NEXT: v_floor_f16_e32 v5, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_fract_f16_e32 v4, v3 -; GFX11-NEXT: v_cmp_class_f16_e64 s0, v3, 0x204 +; GFX11-NEXT: v_cmp_class_f16_e64 s0, |v3|, 0x200 ; GFX11-NEXT: v_floor_f16_e32 v7, v3 ; GFX11-NEXT: v_cndmask_b32_e64 v3, v4, 0, s0 -; GFX11-NEXT: v_cmp_class_f16_e64 s0, v0, 0x204 +; GFX11-NEXT: v_cmp_class_f16_e64 s0, |v0|, 0x200 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_pack_b32_f16 v4, v5, v7 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v6, 0, s0 @@ -2295,13 +2295,13 @@ ; GFX6-NEXT: v_cndmask_b32_e32 v11, v11, v3, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v10, v10, v2, vcc ; GFX6-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1] -; GFX6-NEXT: s_movk_i32 s10, 0x204 -; GFX6-NEXT: v_cmp_class_f64_e64 s[8:9], v[0:1], s10 +; GFX6-NEXT: v_mov_b32_e32 v14, 0x200 +; GFX6-NEXT: v_cmp_class_f64_e64 s[8:9], |v[0:1]|, v14 ; GFX6-NEXT: v_cndmask_b32_e32 v13, v13, v1, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v12, v12, v0, vcc ; GFX6-NEXT: v_cndmask_b32_e64 v0, v12, 0, s[8:9] ; GFX6-NEXT: v_cndmask_b32_e64 v1, v13, 0, s[8:9] -; GFX6-NEXT: v_cmp_class_f64_e64 s[8:9], v[2:3], s10 +; GFX6-NEXT: v_cmp_class_f64_e64 s[8:9], |v[2:3]|, v14 ; GFX6-NEXT: s_mov_b32 s6, 0 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_mov_b32 s4, s6 @@ -2315,11 +2315,11 @@ ; GFX7-LABEL: safe_math_fract_v2f64: ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: s_movk_i32 s4, 0x204 +; GFX7-NEXT: v_mov_b32_e32 v6, 0x200 ; GFX7-NEXT: v_fract_f64_e32 v[10:11], v[0:1] -; GFX7-NEXT: v_cmp_class_f64_e64 s[8:9], v[0:1], s4 +; GFX7-NEXT: v_cmp_class_f64_e64 s[8:9], |v[0:1]|, v6 ; GFX7-NEXT: v_fract_f64_e32 v[12:13], v[2:3] -; GFX7-NEXT: v_cmp_class_f64_e64 s[10:11], v[2:3], s4 +; GFX7-NEXT: v_cmp_class_f64_e64 s[10:11], |v[2:3]|, v6 ; GFX7-NEXT: v_floor_f64_e32 v[8:9], v[2:3] ; GFX7-NEXT: v_floor_f64_e32 v[6:7], v[0:1] ; GFX7-NEXT: s_mov_b32 s6, 0 @@ -2337,11 +2337,11 @@ ; GFX8-LABEL: safe_math_fract_v2f64: ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_movk_i32 s6, 0x204 +; GFX8-NEXT: v_mov_b32_e32 v6, 0x200 ; GFX8-NEXT: v_fract_f64_e32 v[10:11], v[0:1] -; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], s6 +; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], |v[0:1]|, v6 ; GFX8-NEXT: v_fract_f64_e32 v[12:13], v[2:3] -; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[2:3], s6 +; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], |v[2:3]|, v6 ; GFX8-NEXT: v_floor_f64_e32 v[8:9], v[2:3] ; GFX8-NEXT: v_floor_f64_e32 v[6:7], v[0:1] ; GFX8-NEXT: v_cndmask_b32_e64 v0, v10, 0, s[4:5] @@ -2357,9 +2357,9 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_fract_f64_e32 v[10:11], v[0:1] -; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[0:1], 0x204 +; GFX11-NEXT: v_cmp_class_f64_e64 s0, |v[0:1]|, 0x200 ; GFX11-NEXT: v_fract_f64_e32 v[12:13], v[2:3] -; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[2:3], 0x204 +; GFX11-NEXT: v_cmp_class_f64_e64 s1, |v[2:3]|, 0x200 ; GFX11-NEXT: v_floor_f64_e32 v[8:9], v[2:3] ; GFX11-NEXT: v_floor_f64_e32 v[6:7], v[0:1] ; GFX11-NEXT: v_cndmask_b32_e64 v0, v10, 0, s0 diff --git a/llvm/test/CodeGen/PowerPC/fp-classify.ll b/llvm/test/CodeGen/PowerPC/fp-classify.ll --- a/llvm/test/CodeGen/PowerPC/fp-classify.ll +++ b/llvm/test/CodeGen/PowerPC/fp-classify.ll @@ -18,12 +18,9 @@ ; ; P9-LABEL: abs_isinff: ; P9: # %bb.0: # %entry -; P9-NEXT: addis 3, 2, .LCPI0_0@toc@ha -; P9-NEXT: xsabsdp 0, 1 -; P9-NEXT: li 4, 1 -; P9-NEXT: lfs 1, .LCPI0_0@toc@l(3) +; P9-NEXT: xststdcsp 0, 1, 48 ; P9-NEXT: li 3, 0 -; P9-NEXT: fcmpu 0, 0, 1 +; P9-NEXT: li 4, 1 ; P9-NEXT: iseleq 3, 4, 3 ; P9-NEXT: blr entry: @@ -46,12 +43,9 @@ ; ; P9-LABEL: abs_isinf: ; P9: # %bb.0: # %entry -; P9-NEXT: addis 3, 2, .LCPI1_0@toc@ha -; P9-NEXT: xsabsdp 0, 1 -; P9-NEXT: li 4, 1 -; P9-NEXT: lfs 1, .LCPI1_0@toc@l(3) +; P9-NEXT: xststdcdp 0, 1, 48 ; P9-NEXT: li 3, 0 -; P9-NEXT: fcmpu 0, 0, 1 +; P9-NEXT: li 4, 1 ; P9-NEXT: iseleq 3, 4, 3 ; P9-NEXT: blr entry: @@ -91,13 +85,9 @@ ; ; P9-LABEL: abs_isinfq: ; P9: # %bb.0: # %entry -; P9-NEXT: addis 3, 2, .LCPI2_0@toc@ha -; P9-NEXT: xsabsqp 2, 2 -; P9-NEXT: li 4, 1 -; P9-NEXT: addi 3, 3, .LCPI2_0@toc@l -; P9-NEXT: lxv 35, 0(3) +; P9-NEXT: xststdcqp 0, 2, 48 ; P9-NEXT: li 3, 0 -; P9-NEXT: xscmpuqp 0, 2, 3 +; P9-NEXT: li 4, 1 ; P9-NEXT: iseleq 3, 4, 3 ; P9-NEXT: blr entry: