Index: lib/Target/PowerPC/PPCISelLowering.h
===================================================================
--- lib/Target/PowerPC/PPCISelLowering.h
+++ lib/Target/PowerPC/PPCISelLowering.h
@@ -968,6 +968,11 @@
     SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const;
 
+    // This function looks at SETCC that compares integers. It replaces SETCC
+    // with integer subtraction when (1) there is a legal way of doing it.
+    // (2) keeping the result of comparison in GPR has some performance benefit.
+    SDValue ConvertSETCCToSubtract(SDNode *N, DAGCombinerInfo &DCI) const;
+
     SDValue getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
                              int &RefinementSteps,
                              bool &UseOneConstNR) const override;
Index: lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- lib/Target/PowerPC/PPCISelLowering.cpp
+++ lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9917,6 +9917,87 @@
   return false;
 }
 
+
+// This function is called when we have proved that a SETCC node can be replaced
+// by subtraction (and other supporting instructions) so that the result of
+// comparison is kept in a GPR instead of CR. This function is purely for
+// codegen purposes and has some flags to guide the codegen process.
+static SDValue GenerateEquivalentSub(SDNode *N, int Size, bool Complement,
+                                     bool Swap, SDLoc &DL, SelectionDAG &DAG) {
+
+  assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
+
+  // Zero extend the operands to the largest legal integer. Originally, they
+  // must be of a strictly smaller size.
+  auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
+                         DAG.getConstant(Size, DL, MVT::i32));
+  auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
+                         DAG.getConstant(Size, DL, MVT::i32));
+
+  // Swap if needed. Depends on the condition code.
+  if (Swap)
+    std::swap(Op0, Op1);
+
+  // Subtract extended integers.
+  auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
+
+  // Move the sign bit to the least significant position and zero out the rest.
+  // Now the least significant bit carries the result of original comparison.
+  auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
+                             DAG.getConstant(Size - 1, DL, MVT::i32));
+  auto Final = Shifted;
+
+  // Complement the result if needed. Based on the condition code.
+  if (Complement)
+    Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
+                        DAG.getConstant(1, DL, MVT::i64));
+
+  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
+}
+
+SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
+                                                  DAGCombinerInfo &DCI) const {
+
+  assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
+
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc DL(N);
+
+  // Size of integers being compared has a critical role in the following
+  // analysis, so we prefer to do this when all types are legal.
+  if (!DCI.isAfterLegalizeVectorOps())
+    return SDValue();
+
+  // If all users of SETCC extend its value to a legal integer type
+  // then we replace SETCC with a subtraction
+  for (SDNode::use_iterator UI = N->use_begin(),
+       UE = N->use_end(); UI != UE; ++UI) {
+    if (UI->getOpcode() != ISD::ZERO_EXTEND)
+      return SDValue();
+  }
+
+  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+  auto OpSize = N->getOperand(0).getValueSizeInBits();
+
+  unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
+
+  if (OpSize < Size) {
+    switch (CC) {
+    default: break;
+    case ISD::SETULT:
+      return GenerateEquivalentSub(N, Size, false, false, DL, DAG);
+    case ISD::SETULE:
+      return GenerateEquivalentSub(N, Size, true, true, DL, DAG);
+    case ISD::SETUGT:
+      return GenerateEquivalentSub(N, Size, false, true, DL, DAG);
+    case ISD::SETUGE:
+      return GenerateEquivalentSub(N, Size, true, false, DL, DAG);
+    }
+  }
+
+  return SDValue();
+}
+
 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
                                                   DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -9958,7 +10039,8 @@
                                  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
           !DAG.MaskedValueIsZero(N->getOperand(1),
                                  APInt::getHighBitsSet(OpBits, OpBits-1)))
-        return SDValue();
+        return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
+                                             : SDValue());
     } else {
       // This is neither a signed nor an unsigned comparison, just make sure
       // that the high bits are equal.
Index: pzero-fp-xored.ll
===================================================================
--- pzero-fp-xored.ll
+++ /dev/null
@@ -1,71 +0,0 @@
-; RUN: llc -mtriple=powerpc-unknown-linux-gnu -mattr=+vsx < %s |  \
-; RUN:   FileCheck %s --implicit-check-not lxvd2x --implicit-check-not lfs
-; RUN: llc -mtriple=powerpc-unknown-linux-gnu -mattr=-vsx -mattr=-p8altivec < %s | \
-; RUN:   FileCheck %s --check-prefix=CHECK-NVSXP8A --implicit-check-not xxlxor \
-; RUN:                                             --implicit-check-not vxor
-
-define signext i32 @t1(float %x) local_unnamed_addr #0 {
-entry:
-  %cmp = fcmp ogt float %x, 0.000000e+00
-  %tmp = select i1 %cmp, i32 43, i32 11
-  ret i32 %tmp
-
-; CHECK-LABEL: t1:
-; CHECK: xxlxor [[REG1:[0-9]+]], [[REG1]], [[REG1]]
-; CHECK: fcmpu {{[0-9]+}}, {{[0-9]+}}, [[REG1]]
-; CHECK: blr
-; CHECK-NVSXP8A: lfs [[REG1:[0-9]+]]
-; CHECK-NVSXP8A: fcmpu {{[0-9]+}}, {{[0-9]+}}, [[REG1]]
-; CHECK-NVSXP8A: blr
-}
-
-define signext i32 @t2(double %x) local_unnamed_addr #0 {
-entry:
-  %cmp = fcmp ogt double %x, 0.000000e+00
-  %tmp = select i1 %cmp, i32 43, i32 11
-  ret i32 %tmp
-
-; CHECK-LABEL: t2:
-; CHECK: xxlxor [[REG2:[0-9]+]], [[REG2]], [[REG2]]
-; CHECK: xscmpudp {{[0-9]+}}, {{[0-9]+}}, [[REG2]]
-; CHECK: blr
-; CHECK-NVSXP8A: lfs [[REG2:[0-9]+]]
-; CHECK-NVSXP8A: fcmpu {{[0-9]+}}, {{[0-9]+}}, [[REG2]]
-; CHECK-NVSXP8A: blr
-}
-
-define signext i32 @t3(ppc_fp128 %x) local_unnamed_addr #0 {
-entry:
-  %cmp = fcmp ogt ppc_fp128 %x, 0xM00000000000000000000000000000000
-  %tmp = select i1 %cmp, i32 43, i32 11
-  ret i32 %tmp
-
-; CHECK-LABEL: t3:
-; CHECK: xxlxor [[REG3:[0-9]+]], [[REG3]], [[REG3]]
-; CHECK: fcmpu {{[0-9]+}}, {{[0-9]+}}, [[REG3]]
-; CHECK: fcmpu {{[0-9]+}}, {{[0-9]+}}, [[REG3]]
-; CHECK: blr
-; CHECK-NVSXP8A: lfs [[REG3:[0-9]+]]
-; CHECK-NVSXP8A: fcmpu {{[0-9]+}}, {{[0-9]+}}, [[REG3]]
-; CHECK-NVSXP8A: blr
-}
-
-define <2 x double> @t4() local_unnamed_addr #0 {
-  ret <2 x double> zeroinitializer
-; CHECK-LABEL: t4:
-; CHECK: vxor [[REG4:[0-9]+]], [[REG4]], [[REG4]]
-; CHECK: blr
-; CHECK-NVSXP8A: lfs [[REG4:[0-9]+]]
-; CHECK-NVSXP8A: fmr {{[0-9]+}}, [[REG4:[0-9]+]]
-; CHECK-NVSXP8A: blr
-}
-
-define <2 x i64> @t5() local_unnamed_addr #0 {
-  ret <2 x i64> zeroinitializer
-; CHECK-LABEL: t5:
-; CHECK: vxor [[REG5:[0-9]+]], [[REG5]], [[REG5]]
-; CHECK: blr
-; CHECK-NVSXP8A: lvx
-; CHECK-NVSXP8A: blr
-}
-
Index: test/CodeGen/PowerPC/setcc-to-sub.ll
===================================================================
--- /dev/null
+++ test/CodeGen/PowerPC/setcc-to-sub.ll
@@ -0,0 +1,96 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=pwr8 < %s | FileCheck %s
+
+%class.PB2 = type { [1 x i32], %class.PB1* }
+%class.PB1 = type { [1 x i32], i64, i64, i32 }
+
+; Function Attrs: norecurse nounwind readonly
+define zeroext i1 @test1(%class.PB2* %s_a, %class.PB2* %s_b) local_unnamed_addr #0 {
+entry:
+  %arrayidx.i6 = bitcast %class.PB2* %s_a to i32*
+  %0 = load i32, i32* %arrayidx.i6, align 8, !tbaa !1
+  %and.i = and i32 %0, 8
+  %arrayidx.i37 = bitcast %class.PB2* %s_b to i32*
+  %1 = load i32, i32* %arrayidx.i37, align 8, !tbaa !1
+  %and.i4 = and i32 %1, 8
+  %cmp.i5 = icmp ult i32 %and.i, %and.i4
+  ret i1 %cmp.i5
+
+; CHECK-LABEL: @test1
+; CHECK: rlwinm [[REG1:[0-9]*]]
+; CHECK-NEXT: rlwinm [[REG2:[0-9]*]]
+; CHECK-NEXT: sub [[REG3:[0-9]*]], [[REG1]], [[REG2]]
+; CHECK-NEXT: rldicl 3, [[REG3]]
+; CHECK: blr
+
+}
+
+; Function Attrs: norecurse nounwind readonly
+define zeroext i1 @test2(%class.PB2* %s_a, %class.PB2* %s_b) local_unnamed_addr #0 {
+entry:
+  %arrayidx.i6 = bitcast %class.PB2* %s_a to i32*
+  %0 = load i32, i32* %arrayidx.i6, align 8, !tbaa !1
+  %and.i = and i32 %0, 8
+  %arrayidx.i37 = bitcast %class.PB2* %s_b to i32*
+  %1 = load i32, i32* %arrayidx.i37, align 8, !tbaa !1
+  %and.i4 = and i32 %1, 8
+  %cmp.i5 = icmp ule i32 %and.i, %and.i4
+  ret i1 %cmp.i5
+
+; CHECK-LABEL: @test2
+; CHECK: rlwinm [[REG1:[0-9]*]]
+; CHECK-NEXT: rlwinm [[REG2:[0-9]*]]
+; CHECK-NEXT: sub [[REG3:[0-9]*]], [[REG2]], [[REG1]]
+; CHECK-NEXT: rldicl [[REG4:[0-9]*]], [[REG3]]
+; CHECK-NEXT: xori 3, [[REG4]], 1
+; CHECK: blr
+
+}
+
+; Function Attrs: norecurse nounwind readonly
+define zeroext i1 @test3(%class.PB2* %s_a, %class.PB2* %s_b) local_unnamed_addr #0 {
+entry:
+  %arrayidx.i6 = bitcast %class.PB2* %s_a to i32*
+  %0 = load i32, i32* %arrayidx.i6, align 8, !tbaa !1
+  %and.i = and i32 %0, 8
+  %arrayidx.i37 = bitcast %class.PB2* %s_b to i32*
+  %1 = load i32, i32* %arrayidx.i37, align 8, !tbaa !1
+  %and.i4 = and i32 %1, 8
+  %cmp.i5 = icmp ugt i32 %and.i, %and.i4
+  ret i1 %cmp.i5
+
+; CHECK-LABEL: @test3
+; CHECK: rlwinm [[REG1:[0-9]*]]
+; CHECK-NEXT: rlwinm [[REG2:[0-9]*]]
+; CHECK-NEXT: sub [[REG3:[0-9]*]], [[REG2]], [[REG1]]
+; CHECK-NEXT: rldicl 3, [[REG3]]
+; CHECK: blr
+
+}
+
+; Function Attrs: norecurse nounwind readonly
+define zeroext i1 @test4(%class.PB2* %s_a, %class.PB2* %s_b) local_unnamed_addr #0 {
+entry:
+  %arrayidx.i6 = bitcast %class.PB2* %s_a to i32*
+  %0 = load i32, i32* %arrayidx.i6, align 8, !tbaa !1
+  %and.i = and i32 %0, 8
+  %arrayidx.i37 = bitcast %class.PB2* %s_b to i32*
+  %1 = load i32, i32* %arrayidx.i37, align 8, !tbaa !1
+  %and.i4 = and i32 %1, 8
+  %cmp.i5 = icmp uge i32 %and.i, %and.i4
+  ret i1 %cmp.i5
+
+; CHECK-LABEL: @test4
+; CHECK: rlwinm [[REG1:[0-9]*]]
+; CHECK-NEXT: rlwinm [[REG2:[0-9]*]]
+; CHECK-NEXT: sub [[REG3:[0-9]*]], [[REG1]], [[REG2]]
+; CHECK-NEXT: rldicl [[REG4:[0-9]*]], [[REG3]]
+; CHECK-NEXT: xori 3, [[REG4]], 1
+; CHECK: blr
+
+}
+
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C++ TBAA"}