Index: lib/Target/PowerPC/PPCISelLowering.h
===================================================================
--- lib/Target/PowerPC/PPCISelLowering.h
+++ lib/Target/PowerPC/PPCISelLowering.h
@@ -968,6 +968,11 @@
     SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const;
 
+    // This function looks at SETCC that compares integers. It replaces SETCC
+    // with integer subtraction when (1) there is a legal way of doing it.
+    // (2) keeping the result of comparison in GPR has some performance benefit.
+    SDValue ConvertSETCCToSubtract(SDNode *N, DAGCombinerInfo &DCI) const;
+
     SDValue getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
                              int &RefinementSteps,
                              bool &UseOneConstNR) const override;
Index: lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- lib/Target/PowerPC/PPCISelLowering.cpp
+++ lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9917,6 +9917,87 @@
   return false;
 }
 
+
+// This function is called when we have proved that a SETCC node can be replaced
+// by subtraction (and other supporting instructions) so that the result of
+// comparison is kept in a GPR instead of CR. This function is purely for
+// codegen purposes and has some flags to guide the codegen process.
+static SDValue GenerateEquivalentSub(SDNode *N, int Size, bool Complement,
+                                     bool Swap, SDLoc &DL, SelectionDAG &DAG) {
+
+  assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
+
+  // Zero extend the operands to the largest legal integer. Originally, they
+  // must be of a strictly smaller size.
+  auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
+                         DAG.getConstant(Size, DL, MVT::i32));
+  auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
+                         DAG.getConstant(Size, DL, MVT::i32));
+
+  // Swap if needed. Depends on the condition code.
+  if (Swap)
+    std::swap(Op0, Op1);
+
+  // Subtract extended integers.
+  auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
+
+  // Move the sign bit to the least significant position and zero out the rest.
+  // Now the least significant bit carries the result of original comparison.
+  auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
+                             DAG.getConstant(Size - 1, DL, MVT::i32));
+  auto Final = Shifted;
+
+  // Complement the result if needed. Based on the condition code.
+  if (Complement)
+    Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
+                        DAG.getConstant(1, DL, MVT::i64));
+
+  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
+}
+
+SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
+                                                  DAGCombinerInfo &DCI) const {
+
+  assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
+
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc DL(N);
+
+  // Size of integers being compared has a critical role in the following
+  // analysis, so we prefer to do this when all types are legal.
+  if (!DCI.isAfterLegalizeVectorOps())
+    return SDValue();
+
+  // If all users of SETCC extend its value to a legal integer type
+  // then we replace SETCC with a subtraction
+  for (SDNode::use_iterator UI = N->use_begin(),
+       UE = N->use_end(); UI != UE; ++UI) {
+    if (UI->getOpcode() != ISD::ZERO_EXTEND)
+      return SDValue();
+  }
+
+  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();;
+  auto OpSize = N->getOperand(0).getValueSizeInBits();
+
+  unsigned Size = 64; //Size of widest legal int on PPC
+
+  if (OpSize < Size) {
+    switch (CC) {
+    default: break;
+    case ISD::SETULT:
+      return GenerateEquivalentSub(N, Size, false, false, DL, DAG);
+    case ISD::SETULE:
+      return GenerateEquivalentSub(N, Size, true, false, DL, DAG);
+    case ISD::SETUGT:
+      return GenerateEquivalentSub(N, Size, true, true, DL, DAG);
+    case ISD::SETUGE:
+      return GenerateEquivalentSub(N, Size, false, true, DL, DAG);
+    }
+  }
+
+  return SDValue();
+}
+
 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
                                                   DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -9958,7 +10039,8 @@
                                  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
           !DAG.MaskedValueIsZero(N->getOperand(1),
                                  APInt::getHighBitsSet(OpBits, OpBits-1)))
-        return SDValue();
+        return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
+                                             : SDValue());
     } else {
       // This is neither a signed nor an unsigned comparison, just make sure
       // that the high bits are equal.
Index: test/CodeGen/PowerPC/setcc-to-sub.ll
===================================================================
--- /dev/null
+++ test/CodeGen/PowerPC/setcc-to-sub.ll
@@ -0,0 +1,51 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=pwr8 < %s | FileCheck %s
+
+%class.PB2 = type { [1 x i32], %class.PB1* }
+%class.PB1 = type { [1 x i32], i64, i64, i32 }
+
+; Function Attrs: norecurse nounwind readonly
+define zeroext i1 @test1(%class.PB2* %s_a, %class.PB2* %s_b) local_unnamed_addr #0 {
+entry:
+  %arrayidx.i6 = bitcast %class.PB2* %s_a to i32*
+  %0 = load i32, i32* %arrayidx.i6, align 8, !tbaa !1
+  %and.i = and i32 %0, 8
+  %arrayidx.i37 = bitcast %class.PB2* %s_b to i32*
+  %1 = load i32, i32* %arrayidx.i37, align 8, !tbaa !1
+  %and.i4 = and i32 %1, 8
+  %cmp.i5 = icmp ult i32 %and.i, %and.i4
+  ret i1 %cmp.i5
+
+; CHECK-LABEL: @test1
+; CHECK: rlwinm [[RES1:[0-9]*]]
+; CHECK: rlwinm [[RES2:[0-9]*]]
+; CHECK: sub 3, [[RES1]], [[RES2]]
+; CHECK: blr
+
+}
+
+; Function Attrs: norecurse nounwind readonly
+define zeroext i1 @test2(%class.PB2* %s_a, %class.PB2* %s_b) local_unnamed_addr #0 {
+entry:
+  %arrayidx.i6 = bitcast %class.PB2* %s_a to i32*
+  %0 = load i32, i32* %arrayidx.i6, align 8, !tbaa !1
+  %and.i = and i32 %0, 8
+  %arrayidx.i37 = bitcast %class.PB2* %s_b to i32*
+  %1 = load i32, i32* %arrayidx.i37, align 8, !tbaa !1
+  %and.i4 = and i32 %1, 8
+  %cmp.i5 = icmp ule i32 %and.i, %and.i4
+  ret i1 %cmp.i5
+
+; CHECK-LABEL: @test2
+; CHECK: rlwinm [[RES1:[0-9]*]]
+; CHECK: rlwinm [[RES2:[0-9]*]]
+; CHECK: sub 3, [[RES1]], [[RES2]]
+; CHECK: xori 3, 3, 1
+; CHECK: blr
+
+}
+
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C++ TBAA"}