diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -5638,6 +5638,43 @@
     SDValue N0 = Node->getOperand(0);
     SDValue N1 = Node->getOperand(1);
 
+    EVT OpVT = N0.getValueType();
+    // This is a fixup if we converted (cmp Op8/Op16, i8) -> (cmp Op32, i32)
+    // during lowering.
+    if (OpVT.isScalarInteger() && OpVT.getScalarSizeInBits() > 8 &&
+        isa<ConstantSDNode>(N1) && !isNullConstant(N1) &&
+        hasNoSignFlagUses(SDValue(Node, 0))) {
+      const APInt &C = cast<ConstantSDNode>(N1)->getAPIntValue();
+      // Only do replacement if the constant can get imm8 encoding. Imm16 values
+      // cause LCP stalls in the frontend.
+      // TODO: Enable imm16 transform aswell if -Os is set?
+      if (C.getSignificantBits() <= 8) {
+          MVT NewVT;
+          if(CurDAG->MaskedValueIsZero(
+                 N0, APInt::getBitsSetFrom(OpVT.getScalarSizeInBits(), 8)))
+              NewVT = MVT::i8;
+          else if(CurDAG->MaskedValueIsZero(
+                 N0, APInt::getBitsSetFrom(OpVT.getScalarSizeInBits(), 16)))
+              NewVT = MVT::i16;
+          else
+              break;
+
+        SDValue TruncN0 = CurDAG->getZExtOrTrunc(N0, dl, NewVT);
+        insertDAGNode(*CurDAG, SDValue(Node, 0), TruncN0);
+        SDValue TruncN1 = CurDAG->getConstant(
+            C.truncSSat(NewVT.getScalarSizeInBits()).getZExtValue(), dl,
+            MVT::i32);
+        insertDAGNode(*CurDAG, SDValue(Node, 0), TruncN1);
+        SDValue NewCmp =
+            CurDAG->getNode(X86ISD::CMP, dl, MVT::i32, TruncN0, TruncN1);
+        ReplaceNode(Node, NewCmp.getNode());
+        if (N1.getNode()->use_empty())
+          CurDAG->RemoveDeadNode(N1.getNode());
+        SelectCode(NewCmp.getNode());
+        return;
+      }
+    }
+
     // Optimizations for TEST compares.
     if (!isNullConstant(N1))
       break;
diff --git a/llvm/test/CodeGen/X86/combine-movmsk.ll b/llvm/test/CodeGen/X86/combine-movmsk.ll
--- a/llvm/test/CodeGen/X86/combine-movmsk.ll
+++ b/llvm/test/CodeGen/X86/combine-movmsk.ll
@@ -41,7 +41,7 @@
 ; SSE-NEXT:    xorpd %xmm1, %xmm1
 ; SSE-NEXT:    cmpeqpd %xmm0, %xmm1
 ; SSE-NEXT:    movmskpd %xmm1, %eax
-; SSE-NEXT:    cmpl $3, %eax
+; SSE-NEXT:    cmpb $3, %al
 ; SSE-NEXT:    sete %al
 ; SSE-NEXT:    retq
 ;
@@ -95,14 +95,14 @@
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
 ; SSE2-NEXT:    movmskps %xmm0, %eax
-; SSE2-NEXT:    cmpl $15, %eax
+; SSE2-NEXT:    cmpb $15, %al
 ; SSE2-NEXT:    sete %al
 ; SSE2-NEXT:    retq
 ;
 ; SSE42-LABEL: pmovmskb_allof_bitcast_v2i64:
 ; SSE42:       # %bb.0:
 ; SSE42-NEXT:    movmskpd %xmm0, %eax
-; SSE42-NEXT:    cmpl $3, %eax
+; SSE42-NEXT:    cmpb $3, %al
 ; SSE42-NEXT:    sete %al
 ; SSE42-NEXT:    retq
 ;
@@ -151,7 +151,7 @@
 ; SSE-NEXT:    xorps %xmm1, %xmm1
 ; SSE-NEXT:    cmpeqps %xmm0, %xmm1
 ; SSE-NEXT:    movmskps %xmm1, %eax
-; SSE-NEXT:    cmpl $15, %eax
+; SSE-NEXT:    cmpb $15, %al
 ; SSE-NEXT:    sete %al
 ; SSE-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-cmp.ll b/llvm/test/CodeGen/X86/vector-reduce-and-cmp.ll
--- a/llvm/test/CodeGen/X86/vector-reduce-and-cmp.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-cmp.ll
@@ -681,14 +681,14 @@
 ; SSE-LABEL: test_v2i8:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    movd %xmm0, %eax
-; SSE-NEXT:    cmpw $-1, %ax
+; SSE-NEXT:    cmpw $255, %ax
 ; SSE-NEXT:    sete %al
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: test_v2i8:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vmovd %xmm0, %eax
-; AVX-NEXT:    cmpw $-1, %ax
+; AVX-NEXT:    cmpw $255, %ax
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
   %1 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> %a0)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-scalar.ll b/llvm/test/CodeGen/X86/vector-reduce-and-scalar.ll
--- a/llvm/test/CodeGen/X86/vector-reduce-and-scalar.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-scalar.ll
@@ -697,13 +697,13 @@
 define i1 @test_v2i8(ptr %ptr) nounwind {
 ; SSE-LABEL: test_v2i8:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    cmpw $-1, (%rdi)
+; SSE-NEXT:    cmpw $255, (%rdi)
 ; SSE-NEXT:    sete %al
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: test_v2i8:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    cmpw $-1, (%rdi)
+; AVX-NEXT:    cmpw $255, (%rdi)
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
   %vload = load <2 x i8>, ptr %ptr