Index: llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2917,6 +2917,30 @@
     return new ICmpInst(Pred, BCSrcOp, Op1);
   }
 
+  const APInt *C;
+  if (!match(Cmp.getOperand(1), m_APInt(C)) ||
+      !Bitcast->getType()->isIntegerTy() ||
+      !Bitcast->getSrcTy()->isIntOrIntVectorTy())
+    return nullptr;
+
+  // If this is checking if all elements of a vector compare are set or not,
+  // invert the casted vector equality compare and test if all compare
+  // elements are clear or not. Compare against zero is generally easier for
+  // analysis and codegen.
+  // icmp eq/ne (bitcast (icmp Pred1 <N x iM> X, Y) to iN), -1 -->
+  //     icmp eq/ne (bitcast (icmp InvPred1 <N x iM> X, Y) to iN), 0
+  // Example: are all elements equal? --> are zero elements not equal?
+  ICmpInst::Predicate Pred1;
+  Value *X, *Y;
+  if (Cmp.isEquality() && C->isAllOnesValue() && Bitcast->hasOneUse() &&
+      match(BCSrcOp, m_OneUse(m_ICmp(Pred1, m_Value(X), m_Value(Y))))) {
+    Value *FirstCmp =
+        Builder.CreateICmp(ICmpInst::getInversePredicate(Pred1), X, Y);
+    Value *Cast = Builder.CreateBitCast(FirstCmp, Bitcast->getType());
+    return new ICmpInst(Pred, Cast,
+                        ConstantInt::getNullValue(Bitcast->getType()));
+  }
+
   // Folding: icmp <pred> iN X, C
   //  where X = bitcast <M x iK> (shufflevector <M x iK> %vec, undef, SC)) to iN
   //    and C is a splat of a K-bit pattern
@@ -2924,12 +2948,6 @@
   // Into:
   //   %E = extractelement <M x iK> %vec, i32 C'
   //   icmp <pred> iK %E, trunc(C)
-  const APInt *C;
-  if (!match(Cmp.getOperand(1), m_APInt(C)) ||
-      !Bitcast->getType()->isIntegerTy() ||
-      !Bitcast->getSrcTy()->isIntOrIntVectorTy())
-    return nullptr;
-
   Value *Vec;
   ArrayRef<int> Mask;
   if (match(BCSrcOp, m_Shuffle(m_Value(Vec), m_Undef(), m_Mask(Mask)))) {
Index: llvm/test/Transforms/InstCombine/icmp-vec.ll
===================================================================
--- llvm/test/Transforms/InstCombine/icmp-vec.ll
+++ llvm/test/Transforms/InstCombine/icmp-vec.ll
@@ -402,9 +402,9 @@
 
 define i1 @eq_cast_eq-1(<2 x i4> %x, <2 x i4> %y) {
 ; CHECK-LABEL: @eq_cast_eq-1(
-; CHECK-NEXT:    [[IC:%.*]] = icmp eq <2 x i4> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[B:%.*]] = bitcast <2 x i1> [[IC]] to i2
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i2 [[B]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <2 x i4> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i1> [[TMP1]] to i2
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i2 [[TMP2]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %ic = icmp eq <2 x i4> %x, %y
@@ -415,9 +415,9 @@
 
 define i1 @ne_cast_eq-1(<3 x i7> %x, <3 x i7> %y) {
 ; CHECK-LABEL: @ne_cast_eq-1(
-; CHECK-NEXT:    [[IC:%.*]] = icmp ne <3 x i7> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[B:%.*]] = bitcast <3 x i1> [[IC]] to i3
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i3 [[B]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <3 x i7> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <3 x i1> [[TMP1]] to i3
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i3 [[TMP2]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %ic = icmp ne <3 x i7> %x, %y
@@ -428,9 +428,9 @@
 
 define i1 @eq_cast_ne-1(<2 x i7> %x, <2 x i7> %y) {
 ; CHECK-LABEL: @eq_cast_ne-1(
-; CHECK-NEXT:    [[IC:%.*]] = icmp eq <2 x i7> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[B:%.*]] = bitcast <2 x i1> [[IC]] to i2
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i2 [[B]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <2 x i7> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i1> [[TMP1]] to i2
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i2 [[TMP2]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %ic = icmp eq <2 x i7> %x, %y
@@ -441,9 +441,9 @@
 
 define i1 @ne_cast_ne-1(<3 x i5> %x, <3 x i5> %y) {
 ; CHECK-LABEL: @ne_cast_ne-1(
-; CHECK-NEXT:    [[IC:%.*]] = icmp ne <3 x i5> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[B:%.*]] = bitcast <3 x i1> [[IC]] to i3
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i3 [[B]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <3 x i5> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <3 x i1> [[TMP1]] to i3
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i3 [[TMP2]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %ic = icmp ne <3 x i5> %x, %y
@@ -454,9 +454,9 @@
 
 define i1 @ugt_cast_eq-1(<2 x i4> %x, <2 x i4> %y) {
 ; CHECK-LABEL: @ugt_cast_eq-1(
-; CHECK-NEXT:    [[IC:%.*]] = icmp ugt <2 x i4> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[B:%.*]] = bitcast <2 x i1> [[IC]] to i2
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i2 [[B]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <2 x i4> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i1> [[TMP1]] to i2
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i2 [[TMP2]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %ic = icmp ugt <2 x i4> %x, %y
@@ -467,9 +467,9 @@
 
 define i1 @slt_cast_ne-1(<2 x i4> %x, <2 x i4> %y) {
 ; CHECK-LABEL: @slt_cast_ne-1(
-; CHECK-NEXT:    [[IC:%.*]] = icmp slt <2 x i4> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[B:%.*]] = bitcast <2 x i1> [[IC]] to i2
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i2 [[B]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sge <2 x i4> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i1> [[TMP1]] to i2
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i2 [[TMP2]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %ic = icmp slt <2 x i4> %x, %y
@@ -478,6 +478,8 @@
   ret i1 %r
 }
 
+; negative test - need equality pred on 2nd cmp
+
 define i1 @eq_cast_sgt-1(<3 x i4> %x, <3 x i4> %y) {
 ; CHECK-LABEL: @eq_cast_sgt-1(
 ; CHECK-NEXT:    [[IC:%.*]] = icmp eq <3 x i4> [[X:%.*]], [[Y:%.*]]
@@ -491,6 +493,8 @@
   ret i1 %r
 }
 
+; negative test - need all-ones constant on 2nd cmp
+
 define i1 @eq_cast_eq1(<2 x i4> %x, <2 x i4> %y) {
 ; CHECK-LABEL: @eq_cast_eq1(
 ; CHECK-NEXT:    [[IC:%.*]] = icmp eq <2 x i4> [[X:%.*]], [[Y:%.*]]
@@ -504,6 +508,8 @@
   ret i1 %r
 }
 
+; negative test - extra use
+
 define i1 @eq_cast_eq-1_use1(<2 x i4> %x, <2 x i4> %y, <2 x i1>* %p) {
 ; CHECK-LABEL: @eq_cast_eq-1_use1(
 ; CHECK-NEXT:    [[IC:%.*]] = icmp sgt <2 x i4> [[X:%.*]], [[Y:%.*]]
@@ -519,6 +525,8 @@
   ret i1 %r
 }
 
+; negative test - extra use
+
 define i1 @eq_cast_eq-1_use2(<2 x i4> %x, <2 x i4> %y, i2* %p) {
 ; CHECK-LABEL: @eq_cast_eq-1_use2(
 ; CHECK-NEXT:    [[IC:%.*]] = icmp sgt <2 x i4> [[X:%.*]], [[Y:%.*]]
Index: llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll
===================================================================
--- llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll
+++ llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll
@@ -350,18 +350,18 @@
 ; CHECK-LABEL: @test_merge_allof_v4si(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[T_FR:%.*]] = freeze <4 x i32> [[T:%.*]]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp slt <4 x i32> [[T_FR]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt <4 x i32> [[T_FR]], zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i4 [[TMP1]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i4 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[RETURN:%.*]], label [[LOR_LHS_FALSE:%.*]]
 ; CHECK:       lor.lhs.false:
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt <4 x i32> [[T_FR]], <i32 255, i32 255, i32 255, i32 255>
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt <4 x i32> [[T_FR]], <i32 256, i32 256, i32 256, i32 256>
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i4 [[TMP4]], -1
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i4 [[TMP4]], 0
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[RETURN]], label [[IF_END:%.*]]
 ; CHECK:       if.end:
 ; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[SHIFT]], [[T_FR]]
+; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[T_FR]], [[SHIFT]]
 ; CHECK-NEXT:    [[ADD:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
 ; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[ADD]] to float
 ; CHECK-NEXT:    br label [[RETURN]]
@@ -507,16 +507,16 @@
 ; CHECK-LABEL: @test_separate_allof_v4si(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[T_FR:%.*]] = freeze <4 x i32> [[T:%.*]]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp slt <4 x i32> [[T_FR]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt <4 x i32> [[T_FR]], zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i4 [[TMP1]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i4 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[RETURN:%.*]], label [[IF_END:%.*]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt <4 x i32> [[T_FR]], <i32 255, i32 255, i32 255, i32 255>
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt <4 x i32> [[T_FR]], <i32 256, i32 256, i32 256, i32 256>
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i4 [[TMP4]], -1
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i4 [[TMP4]], 0
 ; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[SHIFT]], [[T_FR]]
+; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[T_FR]], [[SHIFT]]
 ; CHECK-NEXT:    [[ADD:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
 ; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[TMP5]], i32 0, i32 [[ADD]]
 ; CHECK-NEXT:    br label [[RETURN]]