Index: llvm/trunk/lib/Target/X86/X86FlagsCopyLowering.cpp
===================================================================
--- llvm/trunk/lib/Target/X86/X86FlagsCopyLowering.cpp
+++ llvm/trunk/lib/Target/X86/X86FlagsCopyLowering.cpp
@@ -127,6 +127,10 @@
                       MachineInstr &JmpI, CondRegArray &CondRegs);
   void rewriteCopy(MachineInstr &MI, MachineOperand &FlagUse,
                    MachineInstr &CopyDefI);
+  void rewriteSetCarryExtended(MachineBasicBlock &TestMBB,
+                               MachineBasicBlock::iterator TestPos,
+                               DebugLoc TestLoc, MachineInstr &SetBI,
+                               MachineOperand &FlagUse, CondRegArray &CondRegs);
   void rewriteSetCC(MachineBasicBlock &TestMBB,
                     MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
                     MachineInstr &SetCCI, MachineOperand &FlagUse,
@@ -512,8 +516,7 @@
         } else if (MI.getOpcode() == TargetOpcode::COPY) {
           rewriteCopy(MI, *FlagUse, CopyDefI);
         } else {
-          // We assume that arithmetic instructions that use flags also def
-          // them.
+          // We assume all other instructions that use flags also def them.
           assert(MI.findRegisterDefOperand(X86::EFLAGS) &&
                  "Expected a def of EFLAGS for this instruction!");
 
@@ -525,7 +528,23 @@
           // logic.
           FlagsKilled = true;
 
-          rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
+          switch (MI.getOpcode()) {
+          case X86::SETB_C8r:
+          case X86::SETB_C16r:
+          case X86::SETB_C32r:
+          case X86::SETB_C64r:
+            // Use custom lowering for arithmetic that is merely extending the
+            // carry flag. We model this as the SETB_C* pseudo instructions.
+            rewriteSetCarryExtended(TestMBB, TestPos, TestLoc, MI, *FlagUse,
+                                    CondRegs);
+            break;
+
+          default:
+            // Generically handle remaining uses as arithmetic instructions.
+            rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse,
+                              CondRegs);
+            break;
+          }
           break;
         }
 
@@ -753,6 +772,126 @@
   MI.eraseFromParent();
 }
 
+void X86FlagsCopyLoweringPass::rewriteSetCarryExtended(
+    MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
+    DebugLoc TestLoc, MachineInstr &SetBI, MachineOperand &FlagUse,
+    CondRegArray &CondRegs) {
+  // This routine is only used to handle pseudos for setting a register to zero
+  // or all ones based on CF. This is essentially the sign extended from 1-bit
+  // form of SETB and modeled with the SETB_C* pseudos. They require special
+  // handling as they aren't normal SETcc instructions and are lowered to an
+  // EFLAGS clobbering operation (SBB typically). One simplifying aspect is that
+  // they are only provided in reg-defining forms. A complicating factor is that
+  // they can define many different register widths.
+  assert(SetBI.getOperand(0).isReg() &&
+         "Cannot have a non-register defined operand to this variant of SETB!");
+
+  // Little helper to do the common final step of replacing the register def'ed
+  // by this SETB instruction with a new register and removing the SETB
+  // instruction.
+  auto RewriteToReg = [&](unsigned Reg) {
+    MRI->replaceRegWith(SetBI.getOperand(0).getReg(), Reg);
+    SetBI.eraseFromParent();
+  };
+
+  // Grab the register class used for this particular instruction.
+  auto &SetBRC = *MRI->getRegClass(SetBI.getOperand(0).getReg());
+
+  MachineBasicBlock &MBB = *SetBI.getParent();
+  auto SetPos = SetBI.getIterator();
+  auto SetLoc = SetBI.getDebugLoc();
+
+  auto AdjustReg = [&](unsigned Reg) {
+    auto &OrigRC = *MRI->getRegClass(Reg);
+    if (&OrigRC == &SetBRC)
+      return Reg;
+
+    unsigned NewReg;
+
+    int OrigRegSize = TRI->getRegSizeInBits(OrigRC) / 8;
+    int TargetRegSize = TRI->getRegSizeInBits(SetBRC) / 8;
+    assert(OrigRegSize <= 8 && "No GPRs larger than 64-bits!");
+    assert(TargetRegSize <= 8 && "No GPRs larger than 64-bits!");
+    int SubRegIdx[] = {X86::NoSubRegister, X86::sub_8bit, X86::sub_16bit,
+                       X86::NoSubRegister, X86::sub_32bit};
+
+    // If the original size is smaller than the target *and* is smaller than 4
+    // bytes, we need to explicitly zero extend it. We always extend to 4-bytes
+    // to maximize the chance of being able to CSE that operation and to avoid
+    // partial dependency stalls extending to 2-bytes.
+    if (OrigRegSize < TargetRegSize && OrigRegSize < 4) {
+      NewReg = MRI->createVirtualRegister(&X86::GR32RegClass);
+      BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOVZX32rr8), NewReg)
+          .addReg(Reg);
+      if (&SetBRC == &X86::GR32RegClass)
+        return NewReg;
+      Reg = NewReg;
+      OrigRegSize = 4;
+    }
+
+    NewReg = MRI->createVirtualRegister(&SetBRC);
+    if (OrigRegSize < TargetRegSize) {
+      BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::SUBREG_TO_REG),
+              NewReg)
+          .addImm(0)
+          .addReg(Reg)
+          .addImm(SubRegIdx[OrigRegSize]);
+    } else if (OrigRegSize > TargetRegSize) {
+      BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::EXTRACT_SUBREG),
+              NewReg)
+          .addReg(Reg)
+          .addImm(SubRegIdx[TargetRegSize]);
+    } else {
+      BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY), NewReg)
+          .addReg(Reg);
+    }
+    return NewReg;
+  };
+
+  unsigned &CondReg = CondRegs[X86::COND_B];
+  if (!CondReg)
+    CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, X86::COND_B);
+
+  // Adjust the condition to have the desired register width by zero-extending
+  // as needed.
+  // FIXME: We should use a better API to avoid the local reference and using a
+  // different variable here.
+  unsigned ExtCondReg = AdjustReg(CondReg);
+
+  // Now we need to turn this into a bitmask. We do this by subtracting it from
+  // zero.
+  unsigned ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass);
+  BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOV32r0), ZeroReg);
+  ZeroReg = AdjustReg(ZeroReg);
+
+  unsigned Sub;
+  switch (SetBI.getOpcode()) {
+  case X86::SETB_C8r:
+    Sub = X86::SUB8rr;
+    break;
+
+  case X86::SETB_C16r:
+    Sub = X86::SUB16rr;
+    break;
+
+  case X86::SETB_C32r:
+    Sub = X86::SUB32rr;
+    break;
+
+  case X86::SETB_C64r:
+    Sub = X86::SUB64rr;
+    break;
+
+  default:
+    llvm_unreachable("Invalid SETB_C* opcode!");
+  }
+  unsigned ResultReg = MRI->createVirtualRegister(&SetBRC);
+  BuildMI(MBB, SetPos, SetLoc, TII->get(Sub), ResultReg)
+      .addReg(ZeroReg)
+      .addReg(ExtCondReg);
+  return RewriteToReg(ResultReg);
+}
+
 void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB,
                                             MachineBasicBlock::iterator TestPos,
                                             DebugLoc TestLoc,
Index: llvm/trunk/test/CodeGen/X86/copy-eflags.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/copy-eflags.ll
+++ llvm/trunk/test/CodeGen/X86/copy-eflags.ll
@@ -304,3 +304,62 @@
   %tmp12 = trunc i32 %tmp11 to i16
   br label %bb1
 }
+
+; Use a particular instruction pattern in order to lower to the post-RA pseudo
+; used to lower SETB into an SBB pattern in order to make sure that kind of
+; usage of a copied EFLAGS continues to work.
+define void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3) {
+; X32-LABEL: PR37431:
+; X32:       # %bb.0: # %entry
+; X32-NEXT:    pushl %esi
+; X32-NEXT:    .cfi_def_cfa_offset 8
+; X32-NEXT:    .cfi_offset %esi, -8
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movl (%eax), %eax
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    sarl $31, %ecx
+; X32-NEXT:    cmpl %eax, %eax
+; X32-NEXT:    sbbl %ecx, %eax
+; X32-NEXT:    setb %al
+; X32-NEXT:    sbbb %cl, %cl
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT:    movb %cl, (%edx)
+; X32-NEXT:    movzbl %al, %eax
+; X32-NEXT:    xorl %ecx, %ecx
+; X32-NEXT:    subl %eax, %ecx
+; X32-NEXT:    xorl %eax, %eax
+; X32-NEXT:    xorl %edx, %edx
+; X32-NEXT:    idivl %ecx
+; X32-NEXT:    movb %dl, (%esi)
+; X32-NEXT:    popl %esi
+; X32-NEXT:    .cfi_def_cfa_offset 4
+; X32-NEXT:    retl
+;
+; X64-LABEL: PR37431:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movslq (%rdi), %rax
+; X64-NEXT:    cmpq %rax, %rax
+; X64-NEXT:    sbbb %dl, %dl
+; X64-NEXT:    cmpq %rax, %rax
+; X64-NEXT:    movb %dl, (%rsi)
+; X64-NEXT:    sbbl %esi, %esi
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    idivl %esi
+; X64-NEXT:    movb %dl, (%rcx)
+; X64-NEXT:    retq
+entry:
+  %tmp = load i32, i32* %arg1
+  %tmp1 = sext i32 %tmp to i64
+  %tmp2 = icmp ugt i64 %tmp1, undef
+  %tmp3 = zext i1 %tmp2 to i8
+  %tmp4 = sub i8 0, %tmp3
+  store i8 %tmp4, i8* %arg2
+  %tmp5 = sext i8 %tmp4 to i32
+  %tmp6 = srem i32 0, %tmp5
+  %tmp7 = trunc i32 %tmp6 to i8
+  store i8 %tmp7, i8* %arg3
+  ret void
+}
Index: llvm/trunk/test/CodeGen/X86/flags-copy-lowering.mir
===================================================================
--- llvm/trunk/test/CodeGen/X86/flags-copy-lowering.mir
+++ llvm/trunk/test/CodeGen/X86/flags-copy-lowering.mir
@@ -66,6 +66,12 @@
     call void @foo()
     ret void
   }
+
+  define void @test_setb_c(i64 %a, i64 %b) {
+  entry:
+    call void @foo()
+    ret void
+  }
 ...
 ---
 name:            test_branch
@@ -482,3 +488,68 @@
     RET 0
 
 ...
+---
+name:            test_setb_c
+# CHECK-LABEL: name: test_setb_c
+liveins:
+  - { reg: '$rdi', virtual-reg: '%0' }
+  - { reg: '$rsi', virtual-reg: '%1' }
+body:             |
+  bb.0:
+    liveins: $rdi, $rsi
+
+    %0:gr64 = COPY $rdi
+    %1:gr64 = COPY $rsi
+    %2:gr64 = ADD64rr %0, %1, implicit-def $eflags
+    %3:gr64 = COPY $eflags
+  ; CHECK-NOT:  COPY{{( killed)?}} $eflags
+  ; CHECK:      %[[CF_REG:[^:]*]]:gr8 = SETBr implicit $eflags
+  ; CHECK-NOT:  COPY{{( killed)?}} $eflags
+
+    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
+    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+
+    $eflags = COPY %3
+    %4:gr8 = SETB_C8r implicit-def $eflags, implicit $eflags
+    MOV8mr $rsp, 1, $noreg, -16, $noreg, killed %4
+  ; CHECK-NOT:     $eflags =
+  ; CHECK:         %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def $eflags
+  ; CHECK-NEXT:    %[[ZERO_SUBREG:[^:]*]]:gr8 = EXTRACT_SUBREG %[[ZERO]], %subreg.sub_8bit
+  ; CHECK-NEXT:    %[[REPLACEMENT:[^:]*]]:gr8 = SUB8rr %[[ZERO_SUBREG]], %[[CF_REG]]
+  ; CHECK-NEXT:    MOV8mr $rsp, 1, $noreg, -16, $noreg, killed %[[REPLACEMENT]]
+
+    $eflags = COPY %3
+    %5:gr16 = SETB_C16r implicit-def $eflags, implicit $eflags
+    MOV16mr $rsp, 1, $noreg, -16, $noreg, killed %5
+  ; CHECK-NOT:     $eflags =
+  ; CHECK:         %[[CF_EXT:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
+  ; CHECK-NEXT:    %[[CF_TRUNC:[^:]*]]:gr16 = EXTRACT_SUBREG %[[CF_EXT]], %subreg.sub_16bit
+  ; CHECK-NEXT:    %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def $eflags
+  ; CHECK-NEXT:    %[[ZERO_SUBREG:[^:]*]]:gr16 = EXTRACT_SUBREG %[[ZERO]], %subreg.sub_16bit
+  ; CHECK-NEXT:    %[[REPLACEMENT:[^:]*]]:gr16 = SUB16rr %[[ZERO_SUBREG]], %[[CF_TRUNC]]
+  ; CHECK-NEXT:    MOV16mr $rsp, 1, $noreg, -16, $noreg, killed %[[REPLACEMENT]]
+
+    $eflags = COPY %3
+    %6:gr32 = SETB_C32r implicit-def $eflags, implicit $eflags
+    MOV32mr $rsp, 1, $noreg, -16, $noreg, killed %6
+  ; CHECK-NOT:     $eflags =
+  ; CHECK:         %[[CF_EXT:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
+  ; CHECK-NEXT:    %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def $eflags
+  ; CHECK-NEXT:    %[[REPLACEMENT:[^:]*]]:gr32 = SUB32rr %[[ZERO]], %[[CF_EXT]]
+  ; CHECK-NEXT:    MOV32mr $rsp, 1, $noreg, -16, $noreg, killed %[[REPLACEMENT]]
+
+    $eflags = COPY %3
+    %7:gr64 = SETB_C64r implicit-def $eflags, implicit $eflags
+    MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %7
+  ; CHECK-NOT:     $eflags =
+  ; CHECK:         %[[CF_EXT1:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
+  ; CHECK-NEXT:    %[[CF_EXT2:[^:]*]]:gr64 = SUBREG_TO_REG 0, %[[CF_EXT1]], %subreg.sub_32bit
+  ; CHECK-NEXT:    %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def $eflags
+  ; CHECK-NEXT:    %[[ZERO_EXT:[^:]*]]:gr64 = SUBREG_TO_REG 0, %[[ZERO]], %subreg.sub_32bit
+  ; CHECK-NEXT:    %[[REPLACEMENT:[^:]*]]:gr64 = SUB64rr %[[ZERO_EXT]], %[[CF_EXT2]]
+  ; CHECK-NEXT:    MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %[[REPLACEMENT]]
+
+    RET 0
+
+...