diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -2780,6 +2780,15 @@ return false; } + /// Return true if it's cheap to perform sub and logic AND between a shifted + /// sign bit and a constant difference. + virtual bool + isCheapToFoldSignBitSubAndConstantDiff(EVT ResultVT, EVT SignBitVT, + const APInt &TrueVal, + const APInt &FalseVal) const { + return false; + } + //===--------------------------------------------------------------------===// // Runtime Library hooks // diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -547,6 +547,9 @@ SDValue foldSignChangeInBitcast(SDNode *N); SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC); + SDValue foldSelectConstantComparingZero(const SDLoc &DL, SDValue N0, + SDValue N1, SDValue N2, SDValue N3, + ISD::CondCode CC); SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, const SDLoc &DL); SDValue unfoldMaskedMerge(SDNode *N); @@ -21732,6 +21735,58 @@ return DAG.getNode(ISD::AND, DL, AType, Shift, N2); } +// Exploit sign bit to simplify select_cc x, 0, c0, c1, cc. +// Commonly we have select_cc x, 0, y, z, lt -> +// result = z - signbit * (z - y). +// If signbit * (z - y) is trivial to materialize, this optz applies. +SDValue DAGCombiner::foldSelectConstantComparingZero(const SDLoc &DL, + SDValue N0, SDValue N1, + SDValue N2, SDValue N3, + ISD::CondCode CC) { + EVT VT = N2.getValueType(); + EVT CmpOpVT = N0.getValueType(); + SDValue CmpOp = N0; + auto *N2C = dyn_cast(N2); + auto *N3C = dyn_cast(N3); + + if (!N2C || !N3C || !VT.isInteger() || !CmpOpVT.isInteger() || + !isNullConstant(N1) || CC != ISD::SETLT || !CmpOpVT.bitsGE(VT)) + return SDValue(); + + unsigned SignBitShiftAmount = CmpOpVT.getSizeInBits() - 1; + if (TLI.shouldAvoidTransformToShift(CmpOpVT, SignBitShiftAmount)) + return SDValue(); + + const APInt &TrueVal = N2C->getAPIntValue(); + const APInt &FalseVal = N3C->getAPIntValue(); + if (!TLI.isCheapToFoldSignBitSubAndConstantDiff(VT, CmpOpVT, TrueVal, + FalseVal)) + return SDValue(); + + bool Overflow; + APInt Diff = FalseVal.ssub_ov(TrueVal, Overflow); + APInt AbsDiff = Diff.abs(); + + if (Overflow) + return SDValue(); + + if (AbsDiff.isNullValue()) + // N2 == N3. + return N2; + + unsigned LastOp = FalseVal.slt(TrueVal) ? ISD::ADD : ISD::SUB; + // In general, select_cc x, 0, y, z, lt -> + // sub (z, and ((sra x, sizeof(x)-1), z - y)) + SDValue SignBitShifted = + DAG.getNode(ISD::SRA, DL, CmpOpVT, CmpOp, + DAG.getConstant(SignBitShiftAmount, DL, CmpOpVT)); + if (CmpOpVT.bitsGT(VT)) + SignBitShifted = DAG.getNode(ISD::TRUNCATE, DL, VT, SignBitShifted); + SDValue Temp = DAG.getNode(ISD::AND, DL, VT, SignBitShifted, + DAG.getConstant(AbsDiff, DL, VT)); + return DAG.getNode(LastOp, DL, VT, SDValue(N3C, 0), Temp); +} + // Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values. SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) { SDValue N0 = N->getOperand(0); @@ -21976,6 +22031,9 @@ } } + if (SDValue V = foldSelectConstantComparingZero(DL, N0, N1, N2, N3, CC)) + return V; + return SDValue(); } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -924,6 +924,15 @@ bool isFPExtFree(EVT DestVT, EVT SrcVT) const override; + bool isCheapToFoldSignBitSubAndConstantDiff( + EVT ResultVT, EVT SignBitVT, const APInt &TrueVal, + const APInt &FalseVal) const override { + bool Overflow; + APInt Diff = TrueVal.ssub_ov(FalseVal, Overflow).abs(); + return TrueVal.isSignedIntN(16) && FalseVal.isSignedIntN(16) && + Diff.isIntN(32) && Diff.isShiftedMask(); + } + /// Returns true if it is beneficial to convert a load of a constant /// to just the constant itself. bool shouldConvertConstantLoadToIntImm(const APInt &Imm, diff --git a/llvm/test/CodeGen/PowerPC/select.ll b/llvm/test/CodeGen/PowerPC/select.ll --- a/llvm/test/CodeGen/PowerPC/select.ll +++ b/llvm/test/CodeGen/PowerPC/select.ll @@ -9,23 +9,17 @@ define i64 @f0(i64 %x) { ; CHECK-LE-LABEL: f0: ; CHECK-LE: # %bb.0: -; CHECK-LE-NEXT: li r4, 125 -; CHECK-LE-NEXT: cmpdi r3, 0 -; CHECK-LE-NEXT: li r3, -3 -; CHECK-LE-NEXT: isellt r3, r3, r4 +; CHECK-LE-NEXT: rotldi r3, r3, 1 +; CHECK-LE-NEXT: rldic r3, r3, 7, 56 +; CHECK-LE-NEXT: subfic r3, r3, 125 ; CHECK-LE-NEXT: blr ; ; CHECK-32-LABEL: f0: ; CHECK-32: # %bb.0: -; CHECK-32-NEXT: li r4, 125 -; CHECK-32-NEXT: li r5, -3 -; CHECK-32-NEXT: cmpwi r3, 0 -; CHECK-32-NEXT: bc 12, lt, .LBB0_1 -; CHECK-32-NEXT: b .LBB0_2 -; CHECK-32-NEXT: .LBB0_1: -; CHECK-32-NEXT: addi r4, r5, 0 -; CHECK-32-NEXT: .LBB0_2: -; CHECK-32-NEXT: srawi r3, r3, 31 +; CHECK-32-NEXT: rlwinm r3, r3, 8, 24, 24 +; CHECK-32-NEXT: li r5, 0 +; CHECK-32-NEXT: subfic r4, r3, 125 +; CHECK-32-NEXT: subfze r3, r5 ; CHECK-32-NEXT: blr %c = icmp slt i64 %x, 0 %r = select i1 %c, i64 -3, i64 125 @@ -35,23 +29,18 @@ define i64 @f1(i64 %x) { ; CHECK-LE-LABEL: f1: ; CHECK-LE: # %bb.0: -; CHECK-LE-NEXT: li r4, 512 -; CHECK-LE-NEXT: cmpdi r3, 0 -; CHECK-LE-NEXT: li r3, 64 -; CHECK-LE-NEXT: isellt r3, r3, r4 +; CHECK-LE-NEXT: sradi r3, r3, 63 +; CHECK-LE-NEXT: rlwinm r3, r3, 0, 23, 25 +; CHECK-LE-NEXT: subfic r3, r3, 512 ; CHECK-LE-NEXT: blr ; ; CHECK-32-LABEL: f1: ; CHECK-32: # %bb.0: -; CHECK-32-NEXT: li r4, 512 -; CHECK-32-NEXT: cmpwi r3, 0 -; CHECK-32-NEXT: li r3, 64 -; CHECK-32-NEXT: bc 12, lt, .LBB1_1 -; CHECK-32-NEXT: b .LBB1_2 -; CHECK-32-NEXT: .LBB1_1: -; CHECK-32-NEXT: addi r4, r3, 0 -; CHECK-32-NEXT: .LBB1_2: -; CHECK-32-NEXT: li r3, 0 +; CHECK-32-NEXT: srawi r3, r3, 31 +; CHECK-32-NEXT: rlwinm r3, r3, 0, 23, 25 +; CHECK-32-NEXT: li r5, 0 +; CHECK-32-NEXT: subfic r4, r3, 512 +; CHECK-32-NEXT: subfze r3, r5 ; CHECK-32-NEXT: blr %c = icmp slt i64 %x, 0 %r = select i1 %c, i64 64, i64 512