diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -14102,11 +14102,15 @@ DAGCombinerInfo &DCI) const { assert(N->getOpcode() == ISD::SETCC && "Should be called with a SETCC node"); + EVT VT = N->getValueType(0); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + EVT OpVT = LHS.getValueType(); + SDLoc DL(N); + SelectionDAG &DAG = DCI.DAG; ISD::CondCode CC = cast(N->getOperand(2))->get(); if (CC == ISD::SETNE || CC == ISD::SETEQ) { - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); // If there is a '0 - y' pattern, canonicalize the pattern to the RHS. if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) && @@ -14117,13 +14121,72 @@ // x != 0-y --> x+y != 0 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) && RHS.hasOneUse()) { - SDLoc DL(N); - SelectionDAG &DAG = DCI.DAG; - EVT VT = N->getValueType(0); EVT OpVT = LHS.getValueType(); SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1)); return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC); } + } else if (CC == ISD::SETOEQ && Subtarget.hasP9Vector()) { + unsigned Opcode = 0; + if (OpVT == MVT::f128) + Opcode = PPC::XSTSTDCQP; + else if (OpVT.getScalarType() == MVT::f64) + Opcode = OpVT.isVector() ? PPC::XVTSTDCDP : PPC::XSTSTDCDP; + else + Opcode = OpVT.isVector() ? PPC::XVTSTDCSP : PPC::XSTSTDCSP; + + // Masks defined for ISA 3.0 test data class instructions. + enum DataClassMask { + DC_NAN = 1 << 6, + DC_NEG_INF = 1 << 4, + DC_POS_INF = 1 << 5, + DC_NEG_ZERO = 1 << 2, + DC_POS_ZERO = 1 << 3, + DC_NEG_SUBNORM = 1, + DC_POS_SUBNORM = 1 << 1, + }; + + auto GetMask = [](const SDValue &Op, bool IsAbsolute) -> unsigned { + unsigned Mask = 0; + if (const auto *CFP = dyn_cast(Op.getNode())) { + const APFloat &APF = CFP->getValueAPF(); + if (APF.isInfinity() && !APF.isNegative()) + Mask = IsAbsolute ? (DC_POS_INF | DC_NEG_INF) : DC_POS_INF; + else if (APF.isInfinity() && APF.isNegative()) + Mask = IsAbsolute ? 0 : DC_NEG_INF; + else if (APF.isPosZero()) + Mask = IsAbsolute ? (DC_POS_ZERO | DC_NEG_ZERO) : DC_POS_ZERO; + else if (APF.isNegZero()) + Mask = IsAbsolute ? 0 : DC_NEG_ZERO; + } + return Mask; + }; + + SDValue TestOp = LHS; + bool IsAbsolute = LHS.getOpcode() == ISD::FABS; + if (IsAbsolute) + TestOp = LHS.getOperand(0); + unsigned Mask = 0; + if (RHS.getOpcode() == ISD::BUILD_VECTOR && DAG.isSplatValue(RHS, true)) { + Mask = GetMask(RHS.getOperand(0), IsAbsolute); + } else if (!OpVT.isVector()) { + Mask = GetMask(RHS, IsAbsolute); + } + + if (Opcode != 0 && Mask != 0) { + SDValue TestClass{ + DAG.getMachineNode(Opcode, DL, + OpVT == MVT::v2f64 + ? MVT::v2i64 + : (OpVT == MVT::v4f32 ? MVT::v4i32 : MVT::i32), + DAG.getTargetConstant(Mask, DL, MVT::i32), TestOp), + 0}; + if (OpVT.isVector()) + return TestClass; + return SDValue( + DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, TestClass, + DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32)), + 0); + } } return DAGCombineTruncBoolExt(N, DCI); diff --git a/llvm/test/CodeGen/PowerPC/fp-classify.ll b/llvm/test/CodeGen/PowerPC/fp-classify.ll --- a/llvm/test/CodeGen/PowerPC/fp-classify.ll +++ b/llvm/test/CodeGen/PowerPC/fp-classify.ll @@ -18,12 +18,9 @@ ; ; P9-LABEL: abs_isinff: ; P9: # %bb.0: # %entry -; P9-NEXT: addis 3, 2, .LCPI0_0@toc@ha -; P9-NEXT: xsabsdp 0, 1 -; P9-NEXT: li 4, 1 -; P9-NEXT: lfs 1, .LCPI0_0@toc@l(3) +; P9-NEXT: xststdcsp 0, 1, 48 ; P9-NEXT: li 3, 0 -; P9-NEXT: fcmpu 0, 0, 1 +; P9-NEXT: li 4, 1 ; P9-NEXT: iseleq 3, 4, 3 ; P9-NEXT: blr entry: @@ -46,12 +43,9 @@ ; ; P9-LABEL: abs_isinf: ; P9: # %bb.0: # %entry -; P9-NEXT: addis 3, 2, .LCPI1_0@toc@ha -; P9-NEXT: xsabsdp 0, 1 -; P9-NEXT: li 4, 1 -; P9-NEXT: lfs 1, .LCPI1_0@toc@l(3) +; P9-NEXT: xststdcdp 0, 1, 48 ; P9-NEXT: li 3, 0 -; P9-NEXT: fcmpu 0, 0, 1 +; P9-NEXT: li 4, 1 ; P9-NEXT: iseleq 3, 4, 3 ; P9-NEXT: blr entry: @@ -91,13 +85,9 @@ ; ; P9-LABEL: abs_isinfq: ; P9: # %bb.0: # %entry -; P9-NEXT: addis 3, 2, .LCPI2_0@toc@ha -; P9-NEXT: xsabsqp 2, 2 -; P9-NEXT: li 4, 1 -; P9-NEXT: addi 3, 3, .LCPI2_0@toc@l -; P9-NEXT: lxv 35, 0(3) +; P9-NEXT: xststdcqp 0, 2, 48 ; P9-NEXT: li 3, 0 -; P9-NEXT: xscmpuqp 0, 2, 3 +; P9-NEXT: li 4, 1 ; P9-NEXT: iseleq 3, 4, 3 ; P9-NEXT: blr entry: @@ -119,11 +109,7 @@ ; ; P9-LABEL: abs_isinfv4f32: ; P9: # %bb.0: # %entry -; P9-NEXT: addis 3, 2, .LCPI3_0@toc@ha -; P9-NEXT: xvabssp 0, 34 -; P9-NEXT: addi 3, 3, .LCPI3_0@toc@l -; P9-NEXT: lxv 1, 0(3) -; P9-NEXT: xvcmpeqsp 34, 0, 1 +; P9-NEXT: xvtstdcsp 34, 34, 48 ; P9-NEXT: blr entry: %0 = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %x) @@ -144,11 +130,7 @@ ; ; P9-LABEL: abs_isinfv2f64: ; P9: # %bb.0: # %entry -; P9-NEXT: addis 3, 2, .LCPI4_0@toc@ha -; P9-NEXT: xvabsdp 0, 34 -; P9-NEXT: addi 3, 3, .LCPI4_0@toc@l -; P9-NEXT: lxv 1, 0(3) -; P9-NEXT: xvcmpeqdp 34, 0, 1 +; P9-NEXT: xvtstdcdp 34, 34, 48 ; P9-NEXT: blr entry: %0 = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> %x) @@ -168,10 +150,9 @@ ; ; P9-LABEL: iszerof: ; P9: # %bb.0: # %entry -; P9-NEXT: xxlxor 0, 0, 0 +; P9-NEXT: xststdcsp 0, 1, 8 ; P9-NEXT: li 3, 0 ; P9-NEXT: li 4, 1 -; P9-NEXT: fcmpu 0, 1, 0 ; P9-NEXT: iseleq 3, 4, 3 ; P9-NEXT: blr entry: @@ -191,10 +172,9 @@ ; ; P9-LABEL: iszero: ; P9: # %bb.0: # %entry -; P9-NEXT: xxlxor 0, 0, 0 +; P9-NEXT: xststdcdp 0, 1, 8 ; P9-NEXT: li 3, 0 ; P9-NEXT: li 4, 1 -; P9-NEXT: fcmpu 0, 1, 0 ; P9-NEXT: iseleq 3, 4, 3 ; P9-NEXT: blr entry: @@ -225,12 +205,9 @@ ; ; P9-LABEL: iszeroq: ; P9: # %bb.0: # %entry -; P9-NEXT: addis 3, 2, .LCPI7_0@toc@ha -; P9-NEXT: li 4, 1 -; P9-NEXT: addi 3, 3, .LCPI7_0@toc@l -; P9-NEXT: lxv 35, 0(3) +; P9-NEXT: xststdcqp 0, 2, 8 ; P9-NEXT: li 3, 0 -; P9-NEXT: xscmpuqp 0, 2, 3 +; P9-NEXT: li 4, 1 ; P9-NEXT: iseleq 3, 4, 3 ; P9-NEXT: blr entry: @@ -247,8 +224,7 @@ ; ; P9-LABEL: iszerov4f32: ; P9: # %bb.0: # %entry -; P9-NEXT: xxlxor 0, 0, 0 -; P9-NEXT: xvcmpeqsp 34, 34, 0 +; P9-NEXT: xvtstdcsp 34, 34, 8 ; P9-NEXT: blr entry: %cmp = fcmp oeq <4 x float> %x, @@ -264,8 +240,7 @@ ; ; P9-LABEL: iszerov2f64: ; P9: # %bb.0: # %entry -; P9-NEXT: xxlxor 0, 0, 0 -; P9-NEXT: xvcmpeqdp 34, 34, 0 +; P9-NEXT: xvtstdcdp 34, 34, 8 ; P9-NEXT: blr entry: %cmp = fcmp oeq <2 x double> %x, diff --git a/llvm/test/CodeGen/PowerPC/is_fpclass.ll b/llvm/test/CodeGen/PowerPC/is_fpclass.ll --- a/llvm/test/CodeGen/PowerPC/is_fpclass.ll +++ b/llvm/test/CodeGen/PowerPC/is_fpclass.ll @@ -283,10 +283,9 @@ define i1 @iszero_float(float %x) nounwind { ; CHECK-LABEL: iszero_float: ; CHECK: # %bb.0: -; CHECK-NEXT: xxlxor 0, 0, 0 +; CHECK-NEXT: xststdcsp 0, 1, 8 ; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: li 4, 1 -; CHECK-NEXT: fcmpu 0, 1, 0 ; CHECK-NEXT: iseleq 3, 4, 3 ; CHECK-NEXT: blr %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 96) ; 0x60 = "zero" @@ -296,12 +295,9 @@ define i1 @iszero_f128(fp128 %x) nounwind { ; CHECK-LABEL: iszero_f128: ; CHECK: # %bb.0: -; CHECK-NEXT: addis 3, 2, .LCPI18_0@toc@ha -; CHECK-NEXT: li 4, 1 -; CHECK-NEXT: addi 3, 3, .LCPI18_0@toc@l -; CHECK-NEXT: lxv 35, 0(3) +; CHECK-NEXT: xststdcqp 0, 2, 8 ; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: xscmpuqp 0, 2, 3 +; CHECK-NEXT: li 4, 1 ; CHECK-NEXT: iseleq 3, 4, 3 ; CHECK-NEXT: blr %1 = call i1 @llvm.is.fpclass.f128(fp128 %x, i32 96) ; 0x60 = "zero"