diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1481,6 +1481,9 @@
 
     /// Reassociate floating point divisions into multiply by reciprocal.
     unsigned combineRepeatedFPDivisors() const override;
+
+    SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
+                          SmallVectorImpl<SDNode *> &Created) const override;
   };
 
   namespace X86 {
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -20096,6 +20096,50 @@
   return 2;
 }
 
+SDValue
+X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
+                                 SelectionDAG &DAG,
+                                 SmallVectorImpl<SDNode *> &Created) const {
+  AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+  if (isIntDivCheap(N->getValueType(0), Attr))
+    return SDValue(N,0); // Lower SDIV as SDIV
+
+  // fold (sdiv X, pow2)
+  EVT VT = N->getValueType(0);
+  if (!Subtarget.hasCMov() ||
+      (VT != MVT::i32 && !(Subtarget.is64Bit() && VT == MVT::i64)) ||
+      !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()) ||
+      Divisor.isMinSignedValue())
+    return SDValue();
+
+  SDLoc DL(N);
+  SDValue N0 = N->getOperand(0);
+  unsigned Lg2 = Divisor.countTrailingZeros();
+  SDValue Zero = DAG.getConstant(0, DL, VT);
+  SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
+
+  // Add (N0 < 0) ? Pow2 - 1 : 0;
+  SDValue Cmp = DAG.getSetCC(DL, MVT::i8, N0, Zero, ISD::SETLT);
+  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
+  SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
+
+  Created.push_back(Cmp.getNode());
+  Created.push_back(Add.getNode());
+  Created.push_back(CMov.getNode());
+
+  // Divide by pow2.
+  SDValue SRA =
+      DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, MVT::i64));
+
+  // If we're dividing by a positive value, we're done.  Otherwise, we must
+  // negate the result.
+  if (Divisor.isNonNegative())
+    return SRA;
+
+  Created.push_back(SRA.getNode());
+  return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
+}
+
 /// Result of 'and' is compared against zero. Change to a BT node if possible.
 /// Returns the BT node and the condition code needed to use it.
 static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
diff --git a/llvm/test/CodeGen/X86/pr32588.ll b/llvm/test/CodeGen/X86/pr32588.ll
--- a/llvm/test/CodeGen/X86/pr32588.ll
+++ b/llvm/test/CodeGen/X86/pr32588.ll
@@ -8,9 +8,17 @@
 define void @fn1() {
 ; CHECK-LABEL: fn1:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    cmpl $0, {{.*}}(%rip)
+; CHECK-NEXT:    setne %al
+; CHECK-NEXT:    cmpl $0, {{.*}}(%rip)
+; CHECK-NEXT:    setne %cl
+; CHECK-NEXT:    andb %al, %cl
+; CHECK-NEXT:    movzbl %cl, %eax
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    negl %eax
+; CHECK-NEXT:    cmovsl %ecx, %eax
 ; CHECK-NEXT:    cmpl $1, {{.*}}(%rip)
-; CHECK-NEXT:    sbbl %eax, %eax
-; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    adcl $0, %eax
 ; CHECK-NEXT:    movl %eax, {{.*}}(%rip)
 ; CHECK-NEXT:    retq
   %t0 = load i32, i32* @c, align 4
diff --git a/llvm/test/CodeGen/X86/rem.ll b/llvm/test/CodeGen/X86/rem.ll
--- a/llvm/test/CodeGen/X86/rem.ll
+++ b/llvm/test/CodeGen/X86/rem.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i386-unknown-unknown | FileCheck %s
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=cmov | FileCheck %s
 
 define i32 @test1(i32 %X) {
 ; CHECK-LABEL: test1:
@@ -27,10 +27,9 @@
 ; CHECK-LABEL: test2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movl %eax, %ecx
-; CHECK-NEXT:    sarl $31, %ecx
-; CHECK-NEXT:    shrl $24, %ecx
-; CHECK-NEXT:    addl %eax, %ecx
+; CHECK-NEXT:    leal 255(%eax), %ecx
+; CHECK-NEXT:    testl %eax, %eax
+; CHECK-NEXT:    cmovnsl %eax, %ecx
 ; CHECK-NEXT:    andl $-256, %ecx
 ; CHECK-NEXT:    subl %ecx, %eax
 ; CHECK-NEXT:    retl
diff --git a/llvm/test/CodeGen/X86/srem-seteq.ll b/llvm/test/CodeGen/X86/srem-seteq.ll
--- a/llvm/test/CodeGen/X86/srem-seteq.ll
+++ b/llvm/test/CodeGen/X86/srem-seteq.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=i686-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=cmov < %s | FileCheck %s --check-prefixes=CHECK,X86
 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,X64
 
 ;------------------------------------------------------------------------------;
@@ -318,10 +318,9 @@
 ; X86-LABEL: test_srem_pow2:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl %ecx, %edx
-; X86-NEXT:    sarl $31, %edx
-; X86-NEXT:    shrl $28, %edx
-; X86-NEXT:    addl %ecx, %edx
+; X86-NEXT:    leal 15(%ecx), %edx
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    cmovnsl %ecx, %edx
 ; X86-NEXT:    andl $-16, %edx
 ; X86-NEXT:    xorl %eax, %eax
 ; X86-NEXT:    cmpl %edx, %ecx
@@ -330,10 +329,10 @@
 ;
 ; X64-LABEL: test_srem_pow2:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %ecx
-; X64-NEXT:    sarl $31, %ecx
-; X64-NEXT:    shrl $28, %ecx
-; X64-NEXT:    addl %edi, %ecx
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal 15(%rdi), %ecx
+; X64-NEXT:    testl %edi, %edi
+; X64-NEXT:    cmovnsl %edi, %ecx
 ; X64-NEXT:    andl $-16, %ecx
 ; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    cmpl %ecx, %edi
diff --git a/llvm/test/CodeGen/X86/twoaddr-coalesce-3.ll b/llvm/test/CodeGen/X86/twoaddr-coalesce-3.ll
--- a/llvm/test/CodeGen/X86/twoaddr-coalesce-3.ll
+++ b/llvm/test/CodeGen/X86/twoaddr-coalesce-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-- -relocation-model=pic | FileCheck %s
+; RUN: llc < %s -mtriple=i386-- -relocation-model=pic | FileCheck %s
 ; This test is to ensure the TwoAddrInstruction pass chooses the proper operands to
 ; merge and generates fewer mov insns.