Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -968,6 +968,11 @@ SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const; + // This function looks at SETCC that compares integers. It replaces SETCC + // with integer subtraction when (1) there is a legal way of doing it. + // (2) keeping the result of comparison in GPR has some performance benefit. + SDValue ConvertSETCCToSubtract(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR) const override; Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -9917,6 +9917,87 @@ return false; } + +// This function is called when we have proved that a SETCC node can be replaced +// by subtraction (and other supporting instructions) so that the result of +// comparison is kept in a GPR instead of CR. This function is purely for +// codegen purposes and has some flags to guide the codegen process. +static SDValue GenerateEquivalentSub(SDNode *N, int Size, bool Complement, + bool Swap, SDLoc &DL, SelectionDAG &DAG) { + + assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."); + + // Zero extend the operands to the largest legal integer. Originally, they + // must be of a strictly smaller size. + auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0), + DAG.getConstant(Size, DL, MVT::i32)); + auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1), + DAG.getConstant(Size, DL, MVT::i32)); + + // Swap if needed. Depends on the condition code. + if (Swap) + std::swap(Op0, Op1); + + // Subtract extended integers. + auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1); + + // Move the sign bit to the least significant position and zero out the rest. + // Now the least significant bit carries the result of original comparison. + auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode, + DAG.getConstant(Size - 1, DL, MVT::i32)); + auto Final = Shifted; + + // Complement the result if needed. Based on the condition code. + if (Complement) + Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted, + DAG.getConstant(1, DL, MVT::i64)); + + return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final); +} + +SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N, + DAGCombinerInfo &DCI) const { + + assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."); + + SelectionDAG &DAG = DCI.DAG; + SDLoc DL(N); + + // Size of integers being compared has a critical role in the following + // analysis, so we prefer to do this when all types are legal. + if (!DCI.isAfterLegalizeVectorOps()) + return SDValue(); + + // If all users of SETCC extend its value to a legal integer type + // then we replace SETCC with a subtraction + for (SDNode::use_iterator UI = N->use_begin(), + UE = N->use_end(); UI != UE; ++UI) { + if (UI->getOpcode() != ISD::ZERO_EXTEND) + return SDValue(); + } + + ISD::CondCode CC = cast(N->getOperand(2))->get();; + auto OpSize = N->getOperand(0).getValueSizeInBits(); + + unsigned Size = 64; //Size of widest legal int on PPC + + if (OpSize < Size) { + switch (CC) { + default: break; + case ISD::SETULT: + return GenerateEquivalentSub(N, Size, false, false, DL, DAG); + case ISD::SETULE: + return GenerateEquivalentSub(N, Size, true, false, DL, DAG); + case ISD::SETUGT: + return GenerateEquivalentSub(N, Size, true, true, DL, DAG); + case ISD::SETUGE: + return GenerateEquivalentSub(N, Size, false, true, DL, DAG); + } + } + + return SDValue(); +} + SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -9958,7 +10039,8 @@ APInt::getHighBitsSet(OpBits, OpBits-1)) || !DAG.MaskedValueIsZero(N->getOperand(1), APInt::getHighBitsSet(OpBits, OpBits-1))) - return SDValue(); + return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI) + : SDValue()); } else { // This is neither a signed nor an unsigned comparison, just make sure // that the high bits are equal. Index: test/CodeGen/PowerPC/setcc-to-sub.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/setcc-to-sub.ll @@ -0,0 +1,51 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 < %s | FileCheck %s + +%class.PB2 = type { [1 x i32], %class.PB1* } +%class.PB1 = type { [1 x i32], i64, i64, i32 } + +; Function Attrs: norecurse nounwind readonly +define zeroext i1 @test1(%class.PB2* %s_a, %class.PB2* %s_b) local_unnamed_addr #0 { +entry: + %arrayidx.i6 = bitcast %class.PB2* %s_a to i32* + %0 = load i32, i32* %arrayidx.i6, align 8, !tbaa !1 + %and.i = and i32 %0, 8 + %arrayidx.i37 = bitcast %class.PB2* %s_b to i32* + %1 = load i32, i32* %arrayidx.i37, align 8, !tbaa !1 + %and.i4 = and i32 %1, 8 + %cmp.i5 = icmp ult i32 %and.i, %and.i4 + ret i1 %cmp.i5 + +; CHECK-LABEL: @test1 +; CHECK: rlwinm [[RES1:[0-9]*]] +; CHECK: rlwinm [[RES2:[0-9]*]] +; CHECK: sub 3, [[RES1]], [[RES2]] +; CHECK: blr + +} + +; Function Attrs: norecurse nounwind readonly +define zeroext i1 @test2(%class.PB2* %s_a, %class.PB2* %s_b) local_unnamed_addr #0 { +entry: + %arrayidx.i6 = bitcast %class.PB2* %s_a to i32* + %0 = load i32, i32* %arrayidx.i6, align 8, !tbaa !1 + %and.i = and i32 %0, 8 + %arrayidx.i37 = bitcast %class.PB2* %s_b to i32* + %1 = load i32, i32* %arrayidx.i37, align 8, !tbaa !1 + %and.i4 = and i32 %1, 8 + %cmp.i5 = icmp ule i32 %and.i, %and.i4 + ret i1 %cmp.i5 + +; CHECK-LABEL: @test2 +; CHECK: rlwinm [[RES1:[0-9]*]] +; CHECK: rlwinm [[RES2:[0-9]*]] +; CHECK: sub 3, [[RES1]], [[RES2]] +; CHECK: xori 3, 3, 1 +; CHECK: blr + +} + +!1 = !{!2, !2, i64 0} +!2 = !{!"int", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C++ TBAA"}