Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -415,7 +415,8 @@
     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
                               const SDLoc &DL);
     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
-                          const SDLoc &DL, bool foldBooleans = true);
+                          const SDLoc &DL, bool foldBooleans);
+    SDValue rebuildSetCC(SDValue N);
 
     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
                            SDValue &CC) const;
@@ -7157,9 +7158,33 @@
 }
 
 SDValue DAGCombiner::visitSETCC(SDNode *N) {
-  return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
-                       cast<CondCodeSDNode>(N->getOperand(2))->get(),
-                       SDLoc(N));
+  // setcc is very commonly used as an argument to brcond. This pattern
+  // also lend itself to numerous combines and, as a result, it is desired
+  // we keep the argument to a brcond as a setcc as much as possible.
+  bool PreferSetCC =
+      N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
+
+  SDValue Combined = SimplifySetCC(
+      N->getValueType(0), N->getOperand(0), N->getOperand(1),
+      cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
+
+  if (!Combined)
+    return SDValue();
+
+  // If we prefer to have a setcc, and we don't, we'll try our best to
+  // recreate one using rebuildSetCC.
+  if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
+    SDValue NewSetCC = rebuildSetCC(Combined);
+
+    // We don't have anything interesting to combine to.
+    if (NewSetCC.getNode() == N)
+      return SDValue();
+
+    if (NewSetCC)
+      return NewSetCC;
+  }
+
+  return Combined;
 }
 
 SDValue DAGCombiner::visitSETCCE(SDNode *N) {
@@ -11151,16 +11176,22 @@
                        N1.getOperand(0), N1.getOperand(1), N2);
   }
 
-  if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
-      ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
-       (N1.getOperand(0).hasOneUse() &&
-        N1.getOperand(0).getOpcode() == ISD::SRL))) {
-    SDNode *Trunc = nullptr;
-    if (N1.getOpcode() == ISD::TRUNCATE) {
-      // Look pass the truncate.
-      Trunc = N1.getNode();
-      N1 = N1.getOperand(0);
-    }
+  if (N1.hasOneUse()) {
+    if (SDValue NewN1 = rebuildSetCC(N1))
+      return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::rebuildSetCC(SDValue N) {
+  if (N.getOpcode() == ISD::SRL ||
+      (N.getOpcode() == ISD::TRUNCATE &&
+       (N.getOperand(0).hasOneUse() &&
+        N.getOperand(0).getOpcode() == ISD::SRL))) {
+    // Look pass the truncate.
+    if (N.getOpcode() == ISD::TRUNCATE)
+      N = N.getOperand(0);
 
     // Match this pattern so that we can generate simpler code:
     //
@@ -11179,75 +11210,43 @@
     // This applies only when the AND constant value has one bit set and the
     // SRL constant is equal to the log2 of the AND constant. The back-end is
     // smart enough to convert the result into a TEST/JMP sequence.
-    SDValue Op0 = N1.getOperand(0);
-    SDValue Op1 = N1.getOperand(1);
+    SDValue Op0 = N.getOperand(0);
+    SDValue Op1 = N.getOperand(1);
 
-    if (Op0.getOpcode() == ISD::AND &&
-        Op1.getOpcode() == ISD::Constant) {
+    if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
       SDValue AndOp1 = Op0.getOperand(1);
 
       if (AndOp1.getOpcode() == ISD::Constant) {
         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
 
         if (AndConst.isPowerOf2() &&
-            cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
+            cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
           SDLoc DL(N);
-          SDValue SetCC =
-            DAG.getSetCC(DL,
-                         getSetCCResultType(Op0.getValueType()),
-                         Op0, DAG.getConstant(0, DL, Op0.getValueType()),
-                         ISD::SETNE);
-
-          SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL,
-                                          MVT::Other, Chain, SetCC, N2);
-          // Don't add the new BRCond into the worklist or else SimplifySelectCC
-          // will convert it back to (X & C1) >> C2.
-          CombineTo(N, NewBRCond, false);
-          // Truncate is dead.
-          if (Trunc)
-            deleteAndRecombine(Trunc);
-          // Replace the uses of SRL with SETCC
-          WorklistRemover DeadNodes(*this);
-          DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
-          deleteAndRecombine(N1.getNode());
-          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+          return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
+                              Op0, DAG.getConstant(0, DL, Op0.getValueType()),
+                              ISD::SETNE);
         }
       }
     }
-
-    if (Trunc)
-      // Restore N1 if the above transformation doesn't match.
-      N1 = N->getOperand(1);
   }
 
   // Transform br(xor(x, y)) -> br(x != y)
   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
-  if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
-    SDNode *TheXor = N1.getNode();
-    SDValue Op0 = TheXor->getOperand(0);
-    SDValue Op1 = TheXor->getOperand(1);
-    if (Op0.getOpcode() == Op1.getOpcode()) {
-      // Avoid missing important xor optimizations.
-      if (SDValue Tmp = visitXOR(TheXor)) {
-        if (Tmp.getNode() != TheXor) {
-          DEBUG(dbgs() << "\nReplacing.8 ";
-                TheXor->dump(&DAG);
-                dbgs() << "\nWith: ";
-                Tmp.getNode()->dump(&DAG);
-                dbgs() << '\n');
-          WorklistRemover DeadNodes(*this);
-          DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
-          deleteAndRecombine(TheXor);
-          return DAG.getNode(ISD::BRCOND, SDLoc(N),
-                             MVT::Other, Chain, Tmp, N2);
-        }
+  if (N.getOpcode() == ISD::XOR) {
+    SDNode *TheXor = N.getNode();
 
-        // visitXOR has changed XOR's operands or replaced the XOR completely,
-        // bail out.
-        return SDValue(N, 0);
-      }
+    // Avoid missing important xor optimizations.
+    while (SDValue Tmp = visitXOR(TheXor)) {
+      // We don't have a XOR anymore, bail.
+      if (Tmp.getOpcode() != ISD::XOR)
+        return Tmp;
+
+      TheXor = Tmp.getNode();
     }
 
+    SDValue Op0 = TheXor->getOperand(0);
+    SDValue Op1 = TheXor->getOperand(1);
+
     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
       bool Equal = false;
       if (isOneConstant(Op0) && Op0.hasOneUse() &&
@@ -11256,19 +11255,12 @@
         Equal = true;
       }
 
-      EVT SetCCVT = N1.getValueType();
+      EVT SetCCVT = N.getValueType();
       if (LegalTypes)
         SetCCVT = getSetCCResultType(SetCCVT);
-      SDValue SetCC = DAG.getSetCC(SDLoc(TheXor),
-                                   SetCCVT,
-                                   Op0, Op1,
-                                   Equal ? ISD::SETEQ : ISD::SETNE);
       // Replace the uses of XOR with SETCC
-      WorklistRemover DeadNodes(*this);
-      DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
-      deleteAndRecombine(N1.getNode());
-      return DAG.getNode(ISD::BRCOND, SDLoc(N),
-                         MVT::Other, Chain, SetCC, N2);
+      return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
+                          Equal ? ISD::SETEQ : ISD::SETNE);
     }
   }
 
Index: llvm/trunk/lib/Target/Hexagon/HexagonPatterns.td
===================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonPatterns.td
+++ llvm/trunk/lib/Target/Hexagon/HexagonPatterns.td
@@ -2800,6 +2800,8 @@
          (J2_jumpf I1:$Pu, bb:$dst)>;
 def: Pat<(brcond (i1 (setne I1:$Pu, -1)), bb:$dst),
          (J2_jumpf I1:$Pu, bb:$dst)>;
+def: Pat<(brcond (i1 (seteq I1:$Pu, 0)), bb:$dst),
+         (J2_jumpf I1:$Pu, bb:$dst)>;
 def: Pat<(brcond (i1 (setne I1:$Pu, 0)), bb:$dst),
          (J2_jumpt I1:$Pu, bb:$dst)>;
 
Index: llvm/trunk/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll
===================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll
+++ llvm/trunk/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll
@@ -7,7 +7,6 @@
 ; GCN-LABEL: {{^}}vcc_shrink_vcc_def:
 ; GCN: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0{{$}}
 ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
-; GCN: v_cndmask_b32_e64 v0, 0, 1, s{{\[[0-9]+:[0-9]+\]}}
 define amdgpu_kernel void @vcc_shrink_vcc_def(float %arg, i32 %arg1, float %arg2, i32 %arg3) {
 bb0:
   %tmp = icmp sgt i32 %arg1, 4
@@ -34,7 +33,6 @@
 ; GCN-LABEL: {{^}}preserve_condition_undef_flag:
 ; GCN-NOT: vcc
 ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
-; GCN: v_cndmask_b32_e64 v0, 0, 1, s{{\[[0-9]+:[0-9]+\]}}
 define amdgpu_kernel void @preserve_condition_undef_flag(float %arg, i32 %arg1, float %arg2) {
 bb0:
   %tmp = icmp sgt i32 %arg1, 4
Index: llvm/trunk/test/CodeGen/AMDGPU/setcc.ll
===================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/setcc.ll
+++ llvm/trunk/test/CodeGen/AMDGPU/setcc.ll
@@ -397,9 +397,9 @@
 }
 
 ; FUNC-LABEL: setcc-i1-and-xor
-; GCN-DAG: v_cmp_ge_f32_e64 [[A:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
-; GCN-DAG: v_cmp_le_f32_e64 [[B:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 1.0
-; GCN: s_and_b64 s[2:3], [[A]], [[B]]
+; GCN-DAG: v_cmp_nge_f32_e64 [[A:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
+; GCN-DAG: v_cmp_nle_f32_e64 [[B:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 1.0
+; GCN: s_or_b64 s[2:3], [[A]], [[B]]
 define amdgpu_kernel void @setcc-i1-and-xor(i32 addrspace(1)* %out, float %cond) #0 {
 bb0:
   %tmp5 = fcmp oge float %cond, 0.000000e+00
Index: llvm/trunk/test/CodeGen/X86/and-sink.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/and-sink.ll
+++ llvm/trunk/test/CodeGen/X86/and-sink.ll
@@ -14,8 +14,8 @@
 ; CHECK-NEXT:    je .LBB0_3
 ; CHECK-NEXT:  # %bb.1: # %bb0
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movl $0, A
 ; CHECK-NEXT:    testb $4, %al
+; CHECK-NEXT:    movl $0, A
 ; CHECK-NEXT:    jne .LBB0_3
 ; CHECK-NEXT:  # %bb.2: # %bb1
 ; CHECK-NEXT:    movl $1, %eax
@@ -61,8 +61,8 @@
 ; CHECK-NEXT:    je .LBB1_5
 ; CHECK-NEXT:  # %bb.3: # %bb1
 ; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT:    movl $0, C
 ; CHECK-NEXT:    testb $4, %cl
+; CHECK-NEXT:    movl $0, C
 ; CHECK-NEXT:    jne .LBB1_2
 ; CHECK-NEXT:  # %bb.4: # %bb2
 ; CHECK-NEXT:    movl $1, %eax
Index: llvm/trunk/test/CodeGen/X86/fold-rmw-ops.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/fold-rmw-ops.ll
+++ llvm/trunk/test/CodeGen/X86/fold-rmw-ops.ll
@@ -1146,12 +1146,9 @@
 define void @and32_imm_br() nounwind {
 ; CHECK-LABEL: and32_imm_br:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl $-2147483648, %eax # encoding: [0xb8,0x00,0x00,0x00,0x80]
+; CHECK-NEXT:    andl $-2147483648, {{.*}}(%rip) # encoding: [0x81,0x25,A,A,A,A,0x00,0x00,0x00,0x80]
+; CHECK-NEXT:    # fixup A - offset: 2, value: g32-8, kind: reloc_riprel_4byte
 ; CHECK-NEXT:    # imm = 0x80000000
-; CHECK-NEXT:    andl {{.*}}(%rip), %eax # encoding: [0x23,0x05,A,A,A,A]
-; CHECK-NEXT:    # fixup A - offset: 2, value: g32-4, kind: reloc_riprel_4byte
-; CHECK-NEXT:    movl %eax, {{.*}}(%rip) # encoding: [0x89,0x05,A,A,A,A]
-; CHECK-NEXT:    # fixup A - offset: 2, value: g32-4, kind: reloc_riprel_4byte
 ; CHECK-NEXT:    jne .LBB35_2 # encoding: [0x75,A]
 ; CHECK-NEXT:    # fixup A - offset: 1, value: .LBB35_2-1, kind: FK_PCRel_1
 ; CHECK-NEXT:  # %bb.1: # %a
@@ -1244,13 +1241,9 @@
 define void @and16_imm_br() nounwind {
 ; CHECK-LABEL: and16_imm_br:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movzwl {{.*}}(%rip), %eax # encoding: [0x0f,0xb7,0x05,A,A,A,A]
-; CHECK-NEXT:    # fixup A - offset: 3, value: g16-4, kind: reloc_riprel_4byte
-; CHECK-NEXT:    andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00]
+; CHECK-NEXT:    andw $-32768, {{.*}}(%rip) # encoding: [0x66,0x81,0x25,A,A,A,A,0x00,0x80]
+; CHECK-NEXT:    # fixup A - offset: 3, value: g16-6, kind: reloc_riprel_4byte
 ; CHECK-NEXT:    # imm = 0x8000
-; CHECK-NEXT:    movw %ax, {{.*}}(%rip) # encoding: [0x66,0x89,0x05,A,A,A,A]
-; CHECK-NEXT:    # fixup A - offset: 3, value: g16-4, kind: reloc_riprel_4byte
-; CHECK-NEXT:    testw %ax, %ax # encoding: [0x66,0x85,0xc0]
 ; CHECK-NEXT:    jne .LBB38_2 # encoding: [0x75,A]
 ; CHECK-NEXT:    # fixup A - offset: 1, value: .LBB38_2-1, kind: FK_PCRel_1
 ; CHECK-NEXT:  # %bb.1: # %a
Index: llvm/trunk/test/CodeGen/X86/or-branch.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/or-branch.ll
+++ llvm/trunk/test/CodeGen/X86/or-branch.ll
@@ -19,11 +19,10 @@
 ; JUMP1-LABEL: foo:
 ; JUMP1:       # %bb.0: # %entry
 ; JUMP1-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
-; JUMP1-NEXT:    sete %al
-; JUMP1-NEXT:    cmpl $5, {{[0-9]+}}(%esp)
-; JUMP1-NEXT:    setl %cl
-; JUMP1-NEXT:    orb %al, %cl
-; JUMP1-NEXT:    cmpb $1, %cl
+; JUMP1-NEXT:    setne %al
+; JUMP1-NEXT:    cmpl $4, {{[0-9]+}}(%esp)
+; JUMP1-NEXT:    setg %cl
+; JUMP1-NEXT:    testb %al, %cl
 ; JUMP1-NEXT:    jne .LBB0_1
 ; JUMP1-NEXT:  # %bb.2: # %cond_true
 ; JUMP1-NEXT:    jmp bar # TAILCALL
@@ -50,11 +49,10 @@
 ; JUMP2-LABEL: unpredictable:
 ; JUMP2:       # %bb.0: # %entry
 ; JUMP2-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
-; JUMP2-NEXT:    sete %al
-; JUMP2-NEXT:    cmpl $5, {{[0-9]+}}(%esp)
-; JUMP2-NEXT:    setl %cl
-; JUMP2-NEXT:    orb %al, %cl
-; JUMP2-NEXT:    cmpb $1, %cl
+; JUMP2-NEXT:    setne %al
+; JUMP2-NEXT:    cmpl $4, {{[0-9]+}}(%esp)
+; JUMP2-NEXT:    setg %cl
+; JUMP2-NEXT:    testb %al, %cl
 ; JUMP2-NEXT:    jne .LBB1_1
 ; JUMP2-NEXT:  # %bb.2: # %cond_true
 ; JUMP2-NEXT:    jmp bar # TAILCALL
@@ -64,11 +62,10 @@
 ; JUMP1-LABEL: unpredictable:
 ; JUMP1:       # %bb.0: # %entry
 ; JUMP1-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
-; JUMP1-NEXT:    sete %al
-; JUMP1-NEXT:    cmpl $5, {{[0-9]+}}(%esp)
-; JUMP1-NEXT:    setl %cl
-; JUMP1-NEXT:    orb %al, %cl
-; JUMP1-NEXT:    cmpb $1, %cl
+; JUMP1-NEXT:    setne %al
+; JUMP1-NEXT:    cmpl $4, {{[0-9]+}}(%esp)
+; JUMP1-NEXT:    setg %cl
+; JUMP1-NEXT:    testb %al, %cl
 ; JUMP1-NEXT:    jne .LBB1_1
 ; JUMP1-NEXT:  # %bb.2: # %cond_true
 ; JUMP1-NEXT:    jmp bar # TAILCALL