Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -952,6 +952,7 @@ SDValue DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const; SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue ConvertSETCCToSubtract(SDNode *N, DAGCombinerInfo &DCI) const; SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, unsigned &RefinementSteps, Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -9888,6 +9888,65 @@ return false; } +static SDValue GenerateEquivalentSub(SDNode *N, int size, bool complement, + bool swap, SDLoc &dl, SelectionDAG &DAG) { + auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, N->getOperand(0), + DAG.getConstant(size, dl, MVT::i32)); + auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, N->getOperand(1), + DAG.getConstant(size, dl, MVT::i32)); + + if (swap) + std::swap(Op0, Op1); + + auto SubNode = DAG.getNode(ISD::SUB, dl, MVT::i64, Op0, Op1); + auto Shifted = DAG.getNode(ISD::SRL, dl, MVT::i64, SubNode, + DAG.getConstant(size - 1, dl, MVT::i32)); + auto Final = Shifted; + if (complement) + Final = DAG.getNode(ISD::XOR, dl, MVT::i64, Shifted, + DAG.getConstant(1, dl, MVT::i64)); + + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Final); +} + +SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + SDLoc dl(N); + + if (!DCI.isAfterLegalizeVectorOps()) + return SDValue(); + + // If all users of SETCC extend its value to a legal type + // then we replace SETCC with a subtraction + for (SDNode::use_iterator UI = N->use_begin(), + UE = N->use_end(); UI != UE; ++UI) { + if (UI->getOpcode() != ISD::ZERO_EXTEND) + return SDValue(); + } + + ISD::CondCode CC = cast(N->getOperand(2))->get();; + auto OpSize = N->getOperand(0).getNode()->getValueType(0).getSizeInBits(); + + unsigned size = 64; //size of widest legal int on PPC + + if (OpSize < size) { + switch (CC) { + default: break; + case ISD::SETULT: + return GenerateEquivalentSub(N, size, false, false, dl, DAG); + case ISD::SETULE: + return GenerateEquivalentSub(N, size, true, false, dl, DAG); + case ISD::SETUGT: + return GenerateEquivalentSub(N, size, true, true, dl, DAG); + case ISD::SETUGE: + return GenerateEquivalentSub(N, size, false, true, dl, DAG); + } + } + + return SDValue(); +} + SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -9929,7 +9988,7 @@ APInt::getHighBitsSet(OpBits, OpBits-1)) || !DAG.MaskedValueIsZero(N->getOperand(1), APInt::getHighBitsSet(OpBits, OpBits-1))) - return SDValue(); + return ConvertSETCCToSubtract(N, DCI); } else { // This is neither a signed nor an unsigned comparison, just make sure // that the high bits are equal. Index: test/CodeGen/PowerPC/setcc-to-sub.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/setcc-to-sub.ll @@ -0,0 +1,51 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 < %s | FileCheck %s + +%class.PB2 = type { [1 x i32], %class.PB1* } +%class.PB1 = type { [1 x i32], i64, i64, i32 } + +; Function Attrs: norecurse nounwind readonly +define zeroext i1 @test1(%class.PB2* %s_a, %class.PB2* %s_b) local_unnamed_addr #0 { +entry: + %arrayidx.i6 = bitcast %class.PB2* %s_a to i32* + %0 = load i32, i32* %arrayidx.i6, align 8, !tbaa !1 + %and.i = and i32 %0, 8 + %arrayidx.i37 = bitcast %class.PB2* %s_b to i32* + %1 = load i32, i32* %arrayidx.i37, align 8, !tbaa !1 + %and.i4 = and i32 %1, 8 + %cmp.i5 = icmp ult i32 %and.i, %and.i4 + ret i1 %cmp.i5 + +; CHECK-LABEL: @test1 +; CHECK: rlwinm [[RES1:[0-9]*]] +; CHECK: rlwinm [[RES2:[0-9]*]] +; CHECK: sub 3, [[RES1]], [[RES2]] +; CHECK: blr + +} + +; Function Attrs: norecurse nounwind readonly +define zeroext i1 @test2(%class.PB2* %s_a, %class.PB2* %s_b) local_unnamed_addr #0 { +entry: + %arrayidx.i6 = bitcast %class.PB2* %s_a to i32* + %0 = load i32, i32* %arrayidx.i6, align 8, !tbaa !1 + %and.i = and i32 %0, 8 + %arrayidx.i37 = bitcast %class.PB2* %s_b to i32* + %1 = load i32, i32* %arrayidx.i37, align 8, !tbaa !1 + %and.i4 = and i32 %1, 8 + %cmp.i5 = icmp ule i32 %and.i, %and.i4 + ret i1 %cmp.i5 + +; CHECK-LABEL: @test2 +; CHECK: rlwinm [[RES1:[0-9]*]] +; CHECK: rlwinm [[RES2:[0-9]*]] +; CHECK: sub 3, [[RES1]], [[RES2]] +; CHECK: xori 3, 3, 1 +; CHECK: blr + +} + +!1 = !{!2, !2, i64 0} +!2 = !{!"int", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C++ TBAA"}