diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1272,6 +1272,7 @@ SDValue combineFMALike(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineSelectCC(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineVectorShuffle(ShuffleVectorSDNode *SVN, diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -13575,6 +13575,59 @@ return DAGCombineTruncBoolExt(N, DCI); } +SDValue PPCTargetLowering::combineSelectCC(SDNode *N, + DAGCombinerInfo &DCI) const { + assert(N->getOpcode() == ISD::SELECT_CC && "Expect a select_cc node"); + SelectionDAG &DAG = DCI.DAG; + SDLoc DL(N); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue TrueOp = N->getOperand(2); + SDValue FalseOp = N->getOperand(3); + ISD::CondCode CC = cast(N->getOperand(4))->get(); + EVT VT = TrueOp.getValueType(); + + auto *TrueC = dyn_cast(TrueOp); + auto *FalseC = dyn_cast(FalseOp); + // select_cc x, 0, y, z, lt + // -> sub z, (shl zext(shr x sizeof(x) - 1) log2(z - y)) + // where (z - y) is power of 2. + if (VT.isInteger() && TrueC && FalseC && isTypeLegal(VT) && + (isNullConstant(RHS) && CC == ISD::SETLT)) { + SDValue CompOp = LHS; + EVT CompVT = CompOp.getValueType(); + const APInt &TrueVal = TrueC->getAPIntValue(); + const APInt &FalseVal = FalseC->getAPIntValue(); + bool OV; + APInt Diff = FalseVal.ssub_ov(TrueVal, OV); + APInt AbsDiff = Diff.abs(); + if (!OV && AbsDiff.isPowerOf2() && CompVT.isInteger() && + isTypeLegal(CompVT)) { + unsigned SignBitShiftAmount = CompVT.getSizeInBits() - 1; + SDValue SignBit = + DAG.getNode(ISD::SRL, DL, CompVT, CompOp, + DAG.getConstant(SignBitShiftAmount, DL, CompVT)); + if (CompVT.getSizeInBits() < VT.getSizeInBits()) + SignBit = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SignBit); + else if (CompVT.getSizeInBits() > VT.getSizeInBits()) + SignBit = DAG.getNode(ISD::TRUNCATE, DL, VT, SignBit); + auto *SubOp = FalseC; + if (FalseVal.slt(TrueVal)) { + SignBit = DAG.getNOT(DL, SignBit, VT); + SubOp = TrueC; + } + unsigned Shift = AbsDiff.logBase2(); + if (Shift == 0) + return DAG.getNode(ISD::SUB, DL, VT, SDValue(SubOp, 0), SignBit); + SDValue Temp = DAG.getNode(ISD::SHL, DL, VT, SignBit, + DAG.getConstant(Shift, DL, VT)); + return DAG.getNode(ISD::SUB, DL, VT, SDValue(SubOp, 0), Temp); + } + } + + return SDValue(); +} + // Is this an extending load from an f32 to an f64? static bool isFPExtLoad(SDValue Op) { if (LoadSDNode *LD = dyn_cast(Op.getNode())) @@ -14628,8 +14681,10 @@ case ISD::SETCC: if (SDValue CSCC = combineSetCC(N, DCI)) return CSCC; - LLVM_FALLTHROUGH; + return DAGCombineTruncBoolExt(N, DCI); case ISD::SELECT_CC: + if (SDValue Val = combineSelectCC(N, DCI)) + return Val; return DAGCombineTruncBoolExt(N, DCI); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: diff --git a/llvm/test/CodeGen/PowerPC/select.ll b/llvm/test/CodeGen/PowerPC/select.ll --- a/llvm/test/CodeGen/PowerPC/select.ll +++ b/llvm/test/CodeGen/PowerPC/select.ll @@ -9,29 +9,89 @@ define i64 @f0(i64 %x) { ; CHECK-LE-LABEL: f0: ; CHECK-LE: # %bb.0: -; CHECK-LE-NEXT: li r4, 125 -; CHECK-LE-NEXT: cmpdi r3, 0 -; CHECK-LE-NEXT: li r3, -3 -; CHECK-LE-NEXT: isellt r3, r3, r4 +; CHECK-LE-NEXT: rotldi r3, r3, 1 +; CHECK-LE-NEXT: rldic r3, r3, 7, 56 +; CHECK-LE-NEXT: subfic r3, r3, 125 ; CHECK-LE-NEXT: blr ; ; CHECK-32-LABEL: f0: ; CHECK-32: # %bb.0: -; CHECK-32-NEXT: li r4, 125 -; CHECK-32-NEXT: li r5, -3 -; CHECK-32-NEXT: cmpwi r3, 0 -; CHECK-32-NEXT: bc 12, lt, .LBB0_1 -; CHECK-32-NEXT: b .LBB0_2 -; CHECK-32-NEXT: .LBB0_1: -; CHECK-32-NEXT: addi r4, r5, 0 -; CHECK-32-NEXT: .LBB0_2: -; CHECK-32-NEXT: srawi r3, r3, 31 +; CHECK-32-NEXT: srawi r5, r3, 31 +; CHECK-32-NEXT: rlwinm r3, r3, 8, 24, 24 +; CHECK-32-NEXT: subfic r4, r3, 125 +; CHECK-32-NEXT: mr r3, r5 ; CHECK-32-NEXT: blr %c = icmp slt i64 %x, 0 %r = select i1 %c, i64 -3, i64 125 ret i64 %r } +define i64 @f0_reverse(i64 %x) { +; CHECK-LE-LABEL: f0_reverse: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: rldicl r3, r3, 1, 63 +; CHECK-LE-NEXT: not r3, r3 +; CHECK-LE-NEXT: sldi r3, r3, 7 +; CHECK-LE-NEXT: subfic r3, r3, 125 +; CHECK-LE-NEXT: blr +; +; CHECK-32-LABEL: f0_reverse: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: not r4, r3 +; CHECK-32-NEXT: srwi r5, r3, 31 +; CHECK-32-NEXT: srawi r3, r4, 31 +; CHECK-32-NEXT: not r4, r5 +; CHECK-32-NEXT: slwi r4, r4, 7 +; CHECK-32-NEXT: subfic r4, r4, 125 +; CHECK-32-NEXT: blr + %c = icmp slt i64 %x, 0 + %r = select i1 %c, i64 125, i64 -3 + ret i64 %r +} + +define i32 @f0_might_ov(i32 %x) { +; CHECK-LE-LABEL: f0_might_ov: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: rlwinm r3, r3, 0, 0, 0 +; CHECK-LE-NEXT: subfic r3, r3, 1 +; CHECK-LE-NEXT: blr +; +; CHECK-32-LABEL: f0_might_ov: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: lis r4, -32768 +; CHECK-32-NEXT: cmpwi r3, 0 +; CHECK-32-NEXT: ori r3, r4, 1 +; CHECK-32-NEXT: li r4, 1 +; CHECK-32-NEXT: bclr 12, lt, 0 +; CHECK-32-NEXT: # %bb.1: +; CHECK-32-NEXT: ori r3, r4, 0 +; CHECK-32-NEXT: blr + %c = icmp slt i32 %x, 0 + %r = select i1 %c, i32 -2147483647, i32 1 + ret i32 %r +} + +define i32 @f0_not_ov(i32 %x) { +; CHECK-LE-LABEL: f0_not_ov: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: rlwinm r3, r3, 1, 31, 31 +; CHECK-LE-NEXT: not r3, r3 +; CHECK-LE-NEXT: sldi r3, r3, 30 +; CHECK-LE-NEXT: neg r3, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-32-LABEL: f0_not_ov: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: srwi r3, r3, 31 +; CHECK-32-NEXT: not r3, r3 +; CHECK-32-NEXT: slwi r3, r3, 30 +; CHECK-32-NEXT: neg r3, r3 +; CHECK-32-NEXT: blr + %c = icmp slt i32 %x, 0 + %r = select i1 %c, i32 0, i32 -1073741824 + ret i32 %r +} + define i64 @f1(i64 %x) { ; CHECK-LE-LABEL: f1: ; CHECK-LE: # %bb.0: @@ -46,11 +106,11 @@ ; CHECK-32-NEXT: li r4, 512 ; CHECK-32-NEXT: cmpwi r3, 0 ; CHECK-32-NEXT: li r3, 64 -; CHECK-32-NEXT: bc 12, lt, .LBB1_1 -; CHECK-32-NEXT: b .LBB1_2 -; CHECK-32-NEXT: .LBB1_1: +; CHECK-32-NEXT: bc 12, lt, .LBB4_1 +; CHECK-32-NEXT: b .LBB4_2 +; CHECK-32-NEXT: .LBB4_1: ; CHECK-32-NEXT: addi r4, r3, 0 -; CHECK-32-NEXT: .LBB1_2: +; CHECK-32-NEXT: .LBB4_2: ; CHECK-32-NEXT: li r3, 0 ; CHECK-32-NEXT: blr %c = icmp slt i64 %x, 0 @@ -70,13 +130,13 @@ ; CHECK-32: # %bb.0: ; CHECK-32-NEXT: or. r3, r4, r3 ; CHECK-32-NEXT: li r3, 1024 -; CHECK-32-NEXT: bc 12, eq, .LBB2_2 +; CHECK-32-NEXT: bc 12, eq, .LBB5_2 ; CHECK-32-NEXT: # %bb.1: ; CHECK-32-NEXT: ori r4, r3, 0 -; CHECK-32-NEXT: b .LBB2_3 -; CHECK-32-NEXT: .LBB2_2: +; CHECK-32-NEXT: b .LBB5_3 +; CHECK-32-NEXT: .LBB5_2: ; CHECK-32-NEXT: li r4, 0 -; CHECK-32-NEXT: .LBB2_3: +; CHECK-32-NEXT: .LBB5_3: ; CHECK-32-NEXT: li r3, 0 ; CHECK-32-NEXT: blr %c = icmp eq i64 %x, 0 @@ -94,12 +154,12 @@ ; CHECK-32-LABEL: f3: ; CHECK-32: # %bb.0: ; CHECK-32-NEXT: or. r3, r4, r3 -; CHECK-32-NEXT: bc 12, eq, .LBB3_2 +; CHECK-32-NEXT: bc 12, eq, .LBB6_2 ; CHECK-32-NEXT: # %bb.1: ; CHECK-32-NEXT: ori r3, r5, 0 ; CHECK-32-NEXT: ori r4, r6, 0 ; CHECK-32-NEXT: blr -; CHECK-32-NEXT: .LBB3_2: +; CHECK-32-NEXT: .LBB6_2: ; CHECK-32-NEXT: li r3, 0 ; CHECK-32-NEXT: li r4, 0 ; CHECK-32-NEXT: blr @@ -126,9 +186,9 @@ ; CHECK-32-NEXT: crandc 4*cr5+gt, eq, 4*cr1+eq ; CHECK-32-NEXT: cror 4*cr5+lt, 4*cr5+gt, 4*cr5+lt ; CHECK-32-NEXT: subfze r6, r3 -; CHECK-32-NEXT: bc 12, 4*cr5+lt, .LBB4_1 +; CHECK-32-NEXT: bc 12, 4*cr5+lt, .LBB7_1 ; CHECK-32-NEXT: blr -; CHECK-32-NEXT: .LBB4_1: +; CHECK-32-NEXT: .LBB7_1: ; CHECK-32-NEXT: addi r3, r6, 0 ; CHECK-32-NEXT: addi r4, r5, 0 ; CHECK-32-NEXT: blr