diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1268,6 +1268,7 @@ SDValue combineFMALike(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineSelectCC(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineVectorShuffle(ShuffleVectorSDNode *SVN, diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -14419,8 +14419,10 @@ case ISD::SETCC: if (SDValue CSCC = combineSetCC(N, DCI)) return CSCC; - LLVM_FALLTHROUGH; + return DAGCombineTruncBoolExt(N, DCI); case ISD::SELECT_CC: + if (SDValue Value = combineSelectCC(N, DCI)) + return Value; return DAGCombineTruncBoolExt(N, DCI); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: @@ -16710,3 +16712,58 @@ return SDValue(); } + +SDValue PPCTargetLowering::combineSelectCC(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + SDLoc DL(N); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + SDValue N3 = N->getOperand(3); + ISD::CondCode CC = cast(N->getOperand(4))->get(); + EVT VT = N2.getValueType(); + EVT CmpOpVT = N0.getValueType(); + SDValue CmpOp = N0; + auto *N2C = dyn_cast(N2); + auto *N3C = dyn_cast(N3); + + // Exploit sign bit to simplify select_cc x, 0, y, z, cc. + // Commonly we have select_cc x, 0, y, z, lt -> + // result = z - signbit_of_x * (z - y). + // On PPC, it's cheap to materialize signbit_of_x * (z - y) with rlwinm if + // (z - y) is a int32 ShiftedMask, which is + // select_cc x, 0, y, z, lt is sub (z, and ((sra x, sizeof(x)-1), z - y)). + if (!N2C || !N3C || !VT.isInteger() || !CmpOpVT.isInteger() || + !isNullConstant(N1) || CC != ISD::SETLT || !CmpOpVT.bitsGE(VT)) + return SDValue(); + + unsigned SignBitShiftAmount = CmpOpVT.getSizeInBits() - 1; + const APInt &TrueVal = N2C->getAPIntValue(); + const APInt &FalseVal = N3C->getAPIntValue(); + bool Overflow; + APInt Diff = FalseVal.ssub_ov(TrueVal, Overflow); + APInt AbsDiff = Diff.abs(); + + if (Overflow) + return SDValue(); + + if (AbsDiff.isNullValue()) + // N2 == N3. + return N2; + + // It cheap to materialize related constants with rlwinm. + if (!TrueVal.isSignedIntN(16) || !FalseVal.isSignedIntN(16) || + !Diff.isIntN(32) || !Diff.isShiftedMask()) + return SDValue(); + + unsigned LastOp = Diff.isNegative() ? ISD::ADD : ISD::SUB; + SDValue SignBitShifted = + DAG.getNode(ISD::SRA, DL, CmpOpVT, CmpOp, + DAG.getConstant(SignBitShiftAmount, DL, CmpOpVT)); + if (CmpOpVT.bitsGT(VT)) + SignBitShifted = DAG.getNode(ISD::TRUNCATE, DL, VT, SignBitShifted); + SDValue Temp = DAG.getNode(ISD::AND, DL, VT, SignBitShifted, + DAG.getConstant(AbsDiff, DL, VT)); + return DAG.getNode(LastOp, DL, VT, SDValue(N3C, 0), Temp); +} diff --git a/llvm/test/CodeGen/PowerPC/select.ll b/llvm/test/CodeGen/PowerPC/select.ll --- a/llvm/test/CodeGen/PowerPC/select.ll +++ b/llvm/test/CodeGen/PowerPC/select.ll @@ -9,23 +9,16 @@ define i64 @f0(i64 %x) { ; CHECK-LE-LABEL: f0: ; CHECK-LE: # %bb.0: -; CHECK-LE-NEXT: li r4, 125 -; CHECK-LE-NEXT: cmpdi r3, 0 -; CHECK-LE-NEXT: li r3, -3 -; CHECK-LE-NEXT: isellt r3, r3, r4 +; CHECK-LE-NEXT: rotldi r3, r3, 1 +; CHECK-LE-NEXT: rldic r3, r3, 7, 56 +; CHECK-LE-NEXT: subfic r3, r3, 125 ; CHECK-LE-NEXT: blr ; ; CHECK-32-LABEL: f0: ; CHECK-32: # %bb.0: -; CHECK-32-NEXT: li r4, 125 -; CHECK-32-NEXT: li r5, -3 -; CHECK-32-NEXT: cmpwi r3, 0 -; CHECK-32-NEXT: bc 12, lt, .LBB0_1 -; CHECK-32-NEXT: b .LBB0_2 -; CHECK-32-NEXT: .LBB0_1: -; CHECK-32-NEXT: addi r4, r5, 0 -; CHECK-32-NEXT: .LBB0_2: ; CHECK-32-NEXT: srawi r3, r3, 31 +; CHECK-32-NEXT: rlwinm r4, r3, 0, 24, 24 +; CHECK-32-NEXT: subfic r4, r4, 125 ; CHECK-32-NEXT: blr %c = icmp slt i64 %x, 0 %r = select i1 %c, i64 -3, i64 125 @@ -35,23 +28,18 @@ define i64 @f1(i64 %x) { ; CHECK-LE-LABEL: f1: ; CHECK-LE: # %bb.0: -; CHECK-LE-NEXT: li r4, 512 -; CHECK-LE-NEXT: cmpdi r3, 0 -; CHECK-LE-NEXT: li r3, 64 -; CHECK-LE-NEXT: isellt r3, r3, r4 +; CHECK-LE-NEXT: sradi r3, r3, 63 +; CHECK-LE-NEXT: rlwinm r3, r3, 0, 23, 25 +; CHECK-LE-NEXT: subfic r3, r3, 512 ; CHECK-LE-NEXT: blr ; ; CHECK-32-LABEL: f1: ; CHECK-32: # %bb.0: -; CHECK-32-NEXT: li r4, 512 -; CHECK-32-NEXT: cmpwi r3, 0 -; CHECK-32-NEXT: li r3, 64 -; CHECK-32-NEXT: bc 12, lt, .LBB1_1 -; CHECK-32-NEXT: b .LBB1_2 -; CHECK-32-NEXT: .LBB1_1: -; CHECK-32-NEXT: addi r4, r3, 0 -; CHECK-32-NEXT: .LBB1_2: +; CHECK-32-NEXT: mr r4, r3 +; CHECK-32-NEXT: srawi r4, r4, 31 +; CHECK-32-NEXT: rlwinm r4, r4, 0, 23, 25 ; CHECK-32-NEXT: li r3, 0 +; CHECK-32-NEXT: subfic r4, r4, 512 ; CHECK-32-NEXT: blr %c = icmp slt i64 %x, 0 %r = select i1 %c, i64 64, i64 512