Index: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -437,173 +437,6 @@
   return Optional<std::pair<unsigned, unsigned>>(std::make_pair(LeftType, RightType));
 }
 
-/// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) into a single
-/// (icmp(A & X) ==/!= Y), where the left-hand side is of type Mask_NotAllZeros
-/// and the right hand side is of type BMask_Mixed. For example,
-/// (icmp (A & 12) != 0) & (icmp (A & 15) == 8) -> (icmp (A & 15) == 8).
-static Value * foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(
-    ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
-    Value *A, Value *B, Value *C, Value *D, Value *E,
-    ICmpInst::Predicate PredL, ICmpInst::Predicate PredR,
-    llvm::InstCombiner::BuilderTy &Builder) {
-  // We are given the canonical form:
-  //   (icmp ne (A & B), 0) & (icmp eq (A & D), E).
-  // where D & E == E.
-  //
-  // If IsAnd is false, we get it in negated form:
-  //   (icmp eq (A & B), 0) | (icmp ne (A & D), E) ->
-  //      !((icmp ne (A & B), 0) & (icmp eq (A & D), E)).
-  //
-  // We currently handle the case of B, C, D, E are constant.
-  //
-  ConstantInt *BCst = dyn_cast<ConstantInt>(B);
-  if (!BCst)
-    return nullptr;
-  ConstantInt *CCst = dyn_cast<ConstantInt>(C);
-  if (!CCst)
-    return nullptr;
-  ConstantInt *DCst = dyn_cast<ConstantInt>(D);
-  if (!DCst)
-    return nullptr;
-  ConstantInt *ECst = dyn_cast<ConstantInt>(E);
-  if (!ECst)
-    return nullptr;
-
-  ICmpInst::Predicate NewCC = IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE;
-
-  // Update E to the canonical form when D is a power of two and RHS is
-  // canonicalized as,
-  // (icmp ne (A & D), 0) -> (icmp eq (A & D), D) or
-  // (icmp ne (A & D), D) -> (icmp eq (A & D), 0).
-  if (PredR != NewCC)
-    ECst = cast<ConstantInt>(ConstantExpr::getXor(DCst, ECst));
-
-  // If B or D is zero, skip because if LHS or RHS can be trivially folded by
-  // other folding rules and this pattern won't apply any more.
-  if (BCst->getValue() == 0 || DCst->getValue() == 0)
-    return nullptr;
-
-  // If B and D don't intersect, ie. (B & D) == 0, no folding because we can't
-  // deduce anything from it.
-  // For example,
-  // (icmp ne (A & 12), 0) & (icmp eq (A & 3), 1) -> no folding.
-  if ((BCst->getValue() & DCst->getValue()) == 0)
-    return nullptr;
-
-  // If the following two conditions are met:
-  //
-  // 1. mask B covers only a single bit that's not covered by mask D, that is,
-  // (B & (B ^ D)) is a power of 2 (in other words, B minus the intersection of
-  // B and D has only one bit set) and,
-  //
-  // 2. RHS (and E) indicates that the rest of B's bits are zero (in other
-  // words, the intersection of B and D is zero), that is, ((B & D) & E) == 0
-  //
-  // then that single bit in B must be one and thus the whole expression can be
-  // folded to
-  //   (A & (B | D)) == (B & (B ^ D)) | E.
-  //
-  // For example,
-  // (icmp ne (A & 12), 0) & (icmp eq (A & 7), 1) -> (icmp eq (A & 15), 9)
-  // (icmp ne (A & 15), 0) & (icmp eq (A & 7), 0) -> (icmp eq (A & 15), 8)
-  if ((((BCst->getValue() & DCst->getValue()) & ECst->getValue()) == 0) &&
-      (BCst->getValue() & (BCst->getValue() ^ DCst->getValue())).isPowerOf2()) {
-    APInt BorD = BCst->getValue() | DCst->getValue();
-    APInt BandBxorDorE = (BCst->getValue() & (BCst->getValue() ^ DCst->getValue())) |
-        ECst->getValue();
-    Value *NewMask = ConstantInt::get(BCst->getType(), BorD);
-    Value *NewMaskedValue = ConstantInt::get(BCst->getType(), BandBxorDorE);
-    Value *NewAnd = Builder.CreateAnd(A, NewMask);
-    return Builder.CreateICmp(NewCC, NewAnd, NewMaskedValue);
-  }
-
-  auto IsSubSetOrEqual = [](ConstantInt *C1, ConstantInt *C2) {
-    return (C1->getValue() & C2->getValue()) == C1->getValue();
-  };
-  auto IsSuperSetOrEqual = [](ConstantInt *C1, ConstantInt *C2) {
-    return (C1->getValue() & C2->getValue()) == C2->getValue();
-  };
-
-  // In the following, we consider only the cases where B is a superset of D, B
-  // is a subset of D, or B == D because otherwise there's at least one bit
-  // covered by B but not D, in which case we can't deduce much from it, so
-  // no folding (aside from the single must-be-one bit case right above.)
-  // For example,
-  // (icmp ne (A & 14), 0) & (icmp eq (A & 3), 1) -> no folding.
-  if (!IsSubSetOrEqual(BCst, DCst) && !IsSuperSetOrEqual(BCst, DCst))
-    return nullptr;
-
-  // At this point, either B is a superset of D, B is a subset of D or B == D.
-
-  // If E is zero, if B is a subset of (or equal to) D, LHS and RHS contradict
-  // and the whole expression becomes false (or true if negated), otherwise, no
-  // folding.
-  // For example,
-  // (icmp ne (A & 3), 0) & (icmp eq (A & 7), 0) -> false.
-  // (icmp ne (A & 15), 0) & (icmp eq (A & 3), 0) -> no folding.
-  if (ECst->isZero()) {
-    if (IsSubSetOrEqual(BCst, DCst))
-      return ConstantInt::get(LHS->getType(), !IsAnd);
-    return nullptr;
-  }
-
-  // At this point, B, D, E aren't zero and (B & D) == B, (B & D) == D or B ==
-  // D. If B is a superset of (or equal to) D, since E is not zero, LHS is
-  // subsumed by RHS (RHS implies LHS.) So the whole expression becomes
-  // RHS. For example,
-  // (icmp ne (A & 255), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8).
-  // (icmp ne (A & 15), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8).
-  if (IsSuperSetOrEqual(BCst, DCst))
-    return RHS;
-  // Otherwise, B is a subset of D. If B and E have a common bit set,
-  // ie. (B & E) != 0, then LHS is subsumed by RHS. For example.
-  // (icmp ne (A & 12), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8).
-  assert(IsSubSetOrEqual(BCst, DCst) && "Precondition due to above code");
-  if ((BCst->getValue() & ECst->getValue()) != 0)
-    return RHS;
-  // Otherwise, LHS and RHS contradict and the whole expression becomes false
-  // (or true if negated.) For example,
-  // (icmp ne (A & 7), 0) & (icmp eq (A & 15), 8) -> false.
-  // (icmp ne (A & 6), 0) & (icmp eq (A & 15), 8) -> false.
-  return ConstantInt::get(LHS->getType(), !IsAnd);
-}
-
-/// Try to fold (icmp(A & B) ==/!= 0) &/| (icmp(A & D) ==/!= E) into a single
-/// (icmp(A & X) ==/!= Y), where the left-hand side and the right hand side
-/// aren't of the common mask pattern type.
-static Value *foldLogOpOfMaskedICmpsAsymmetric(
-    ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
-    Value *A, Value *B, Value *C, Value *D, Value *E,
-    ICmpInst::Predicate PredL, ICmpInst::Predicate PredR,
-    unsigned LHSMask, unsigned RHSMask,
-    llvm::InstCombiner::BuilderTy &Builder) {
-  assert(ICmpInst::isEquality(PredL) && ICmpInst::isEquality(PredR) &&
-         "Expected equality predicates for masked type of icmps.");
-  // Handle Mask_NotAllZeros-BMask_Mixed cases.
-  // (icmp ne/eq (A & B), C) &/| (icmp eq/ne (A & D), E), or
-  // (icmp eq/ne (A & B), C) &/| (icmp ne/eq (A & D), E)
-  //    which gets swapped to
-  //    (icmp ne/eq (A & D), E) &/| (icmp eq/ne (A & B), C).
-  if (!IsAnd) {
-    LHSMask = conjugateICmpMask(LHSMask);
-    RHSMask = conjugateICmpMask(RHSMask);
-  }
-  if ((LHSMask & Mask_NotAllZeros) && (RHSMask & BMask_Mixed)) {
-    if (Value *V = foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(
-            LHS, RHS, IsAnd, A, B, C, D, E,
-            PredL, PredR, Builder)) {
-      return V;
-    }
-  } else if ((LHSMask & BMask_Mixed) && (RHSMask & Mask_NotAllZeros)) {
-    if (Value *V = foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(
-            RHS, LHS, IsAnd, A, D, E, B, C,
-            PredR, PredL, Builder)) {
-      return V;
-    }
-  }
-  return nullptr;
-}
-
 /// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
 /// into a single (icmp(A & X) ==/!= Y).
 static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
@@ -619,15 +452,6 @@
   unsigned LHSMask = MaskPair->first;
   unsigned RHSMask = MaskPair->second;
   unsigned Mask = LHSMask & RHSMask;
-  if (Mask == 0) {
-    // Even if the two sides don't share a common pattern, check if folding can
-    // still happen.
-    if (Value *V = foldLogOpOfMaskedICmpsAsymmetric(
-            LHS, RHS, IsAnd, A, B, C, D, E, PredL, PredR, LHSMask, RHSMask,
-            Builder))
-      return V;
-    return nullptr;
-  }
 
   // In full generality:
   //     (icmp (A & B) Op C) | (icmp (A & D) Op E)
@@ -711,41 +535,47 @@
       return RHS;
   }
 
-  if (Mask & BMask_Mixed) {
-    // (icmp eq (A & B), C) & (icmp eq (A & D), E)
-    // We already know that B & C == C && D & E == E.
-    // If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of
-    // C and E, which are shared by both the mask B and the mask D, don't
-    // contradict, then we can transform to
-    // -> (icmp eq (A & (B|D)), (C|E))
-    // Currently, we only handle the case of B, C, D, and E being constant.
-    // We can't simply use C and E because we might actually handle
-    //   (icmp ne (A & B), B) & (icmp eq (A & D), D)
-    // with B and D, having a single bit set.
-    ConstantInt *CCst = dyn_cast<ConstantInt>(C);
-    if (!CCst)
-      return nullptr;
-    ConstantInt *ECst = dyn_cast<ConstantInt>(E);
-    if (!ECst)
-      return nullptr;
-    if (PredL != NewCC)
-      CCst = cast<ConstantInt>(ConstantExpr::getXor(BCst, CCst));
-    if (PredR != NewCC)
-      ECst = cast<ConstantInt>(ConstantExpr::getXor(DCst, ECst));
-
-    // If there is a conflict, we should actually return a false for the
-    // whole construct.
-    if (((BCst->getValue() & DCst->getValue()) &
-         (CCst->getValue() ^ ECst->getValue())).getBoolValue())
-      return ConstantInt::get(LHS->getType(), !IsAnd);
+  // B, C, D, and E must be constants for this optimization
+  ConstantInt *CCst = dyn_cast<ConstantInt>(C);
+  if (!CCst)
+    return nullptr;
+  ConstantInt *ECst = dyn_cast<ConstantInt>(E);
+  if (!ECst)
+    return nullptr;
 
-    Value *NewOr1 = Builder.CreateOr(B, D);
-    Value *NewOr2 = ConstantExpr::getOr(CCst, ECst);
-    Value *NewAnd = Builder.CreateAnd(A, NewOr1);
-    return Builder.CreateICmp(NewCC, NewAnd, NewOr2);
-  }
+  // If B and D intersect, ie. (B & D) != 0, test if they contradict each other
+  // and if so simplify to false (or true if this is an Or).
+  // For example,
+  // (icmp (A & 14), 2) and (icmp (A & 3), 1) -> false.
+  APInt Intersection = BCst->getValue() & DCst->getValue();
+  if (Intersection != 0 &&
+    ((Intersection & CCst->getValue()) != (Intersection & ECst->getValue())))
+      return ConstantInt::get(LHS->getType(), !IsAnd);
 
-  return nullptr;
+  // Use an Xor mask to compare full range against zero
+  // (icmp(A & 4) == 4) && (icmp(A & 3) == 2)
+  // => (icmp((xor A, 6), 7) == 0)
+  // icmp(A & 4) == 4) || (icmp(A & 3) == 2)
+  // => (icmp((xor A, 1), 7) != 0)
+  APInt BAndD = BCst->getValue() & DCst->getValue();
+  Value *X = ConstantExpr::getOr(BCst, DCst);
+  APInt ZI = APInt(BCst->getBitWidth(), 0),
+    Zero = APInt(BCst->getBitWidth(), 0),
+    BI = BCst->getValue(), CI = CCst->getValue(),
+    DI = DCst->getValue(), EI = ECst->getValue();
+  bool LIsEq = (PredL == ICmpInst::ICMP_EQ);
+  bool RIsEq = (PredR == ICmpInst::ICMP_EQ);
+
+  ZI |= LIsEq ? ~CI & BI : CI & BI;
+  ZI |= RIsEq ? ~EI & DI : EI & DI;
+
+  if (!IsAnd)
+    ZI ^= BAndD;
+
+  Value *NewXor = Builder.CreateXor(A, ConstantInt::get(A->getContext(), ZI));
+  Value *NewAnd = Builder.CreateAnd(X, NewXor);
+  return Builder.CreateICmp(IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
+    ConstantInt::get(A->getContext(), Zero), NewAnd);
 }
 
 /// Try to fold a signed range checked with lower bound 0 to an unsigned icmp.
Index: test/Transforms/InstSimplify/AndOrXor.ll
===================================================================
--- test/Transforms/InstSimplify/AndOrXor.ll
+++ test/Transforms/InstSimplify/AndOrXor.ll
@@ -64,7 +64,7 @@
 define i64 @pow2(i32 %x) {
 ; CHECK-LABEL: @pow2(
 ; CHECK-NEXT:    [[NEGX:%.*]] = sub i32 0, [[X:%.*]]
-; CHECK-NEXT:    [[X2:%.*]] = and i32 [[X]], [[NEGX]]
+; CHECK-NEXT:    [[X2:%.*]] = and i32 [[NEGX]], [[X]]
 ; CHECK-NEXT:    [[E:%.*]] = zext i32 [[X2]] to i64
 ; CHECK-NEXT:    ret i64 [[E]]
 ;
@@ -528,15 +528,12 @@
 define i16 @and_of_different_cast_icmps(i8 %i) {
 ; CHECK-LABEL: @and_of_different_cast_icmps(
 ; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq i8 [[I:%.*]], 0
-; CHECK-NEXT:    [[CONV0:%.*]] = zext i1 [[CMP0]] to i16
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i8 [[I]], 1
-; CHECK-NEXT:    [[CONV1:%.*]] = sext i1 [[CMP1]] to i16
-; CHECK-NEXT:    [[AND:%.*]] = and i16 [[CONV0]], [[CONV1]]
+; CHECK-NEXT:    [[AND:%.*]] = zext i1 [[CMP0]] to i16
 ; CHECK-NEXT:    ret i16 [[AND]]
 ;
   %cmp0 = icmp eq i8 %i, 0
   %conv0 = zext i1 %cmp0 to i16
-  %cmp1 = icmp eq i8 %i, 1
+  %cmp1 = icmp ne i8 %i, 1
   %conv1 = sext i1 %cmp1 to i16
   %and = and i16 %conv0, %conv1
   ret i16 %and
@@ -545,10 +542,9 @@
 define <2 x i3> @and_of_different_cast_icmps_vec(<2 x i8> %i, <2 x i16> %j) {
 ; CHECK-LABEL: @and_of_different_cast_icmps_vec(
 ; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq <2 x i8> [[I:%.*]], zeroinitializer
-; CHECK-NEXT:    [[CONV0:%.*]] = zext <2 x i1> [[CMP0]] to <2 x i3>
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt <2 x i16> [[J:%.*]], <i16 1, i16 1>
-; CHECK-NEXT:    [[CONV1:%.*]] = zext <2 x i1> [[CMP1]] to <2 x i3>
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i3> [[CONV0]], [[CONV1]]
+; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i1> [[CMP0]], [[CMP1]]
+; CHECK-NEXT:    [[AND:%.*]] = zext <2 x i1> [[AND1]] to <2 x i3>
 ; CHECK-NEXT:    ret <2 x i3> [[AND]]
 ;
   %cmp0 = icmp eq <2 x i8> %i, zeroinitializer
@@ -575,7 +571,7 @@
 
 define i3 @or_of_bitcast_icmps_vec(<3 x i65> %i) {
 ; CHECK-LABEL: @or_of_bitcast_icmps_vec(
-; CHECK-NEXT:    ret i3 bitcast (<3 x i1> <i1 true, i1 true, i1 true> to i3)
+; CHECK-NEXT:    ret i3 -1
 ;
   %cmp0 = icmp sge <3 x i65> %i, zeroinitializer
   %conv0 = bitcast <3 x i1> %cmp0 to i3
@@ -591,9 +587,8 @@
 ; CHECK-LABEL: @or_of_different_cast_icmps(
 ; CHECK-NEXT:    [[CMP0:%.*]] = icmp ne i8 [[I:%.*]], 0
 ; CHECK-NEXT:    [[CONV0:%.*]] = zext i1 [[CMP0]] to i16
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp ne i8 [[I]], 1
-; CHECK-NEXT:    [[CONV1:%.*]] = sext i1 [[CMP1]] to i16
-; CHECK-NEXT:    [[OR:%.*]] = or i16 [[CONV0]], [[CONV1]]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i8 [[I]], 1
+; CHECK-NEXT:    [[OR:%.*]] = select i1 [[CMP1]], i16 [[CONV0]], i16 -1
 ; CHECK-NEXT:    ret i16 [[OR]]
 ;
   %cmp0 = icmp ne i8 %i, 0
@@ -662,7 +657,7 @@
 define i32 @test45(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test45(
 ; CHECK-NEXT:    [[NEGB:%.*]] = xor i32 [[B:%.*]], -1
-; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[A:%.*]], [[NEGB]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[NEGB]], [[A:%.*]]
 ; CHECK-NEXT:    ret i32 [[XOR]]
 ;
   %nega = xor i32 %a, -1
@@ -676,7 +671,7 @@
 define i32 @test45_commuted_and(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test45_commuted_and(
 ; CHECK-NEXT:    [[NEGB:%.*]] = xor i32 [[B:%.*]], -1
-; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[A:%.*]], [[NEGB]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[NEGB]], [[A:%.*]]
 ; CHECK-NEXT:    ret i32 [[XOR]]
 ;
   %nega = xor i32 %a, -1
@@ -693,7 +688,7 @@
 define i32 @test46(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test46(
 ; CHECK-NEXT:    [[NEGB:%.*]] = xor i32 [[B:%.*]], -1
-; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[A:%.*]], [[NEGB]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[NEGB]], [[A:%.*]]
 ; CHECK-NEXT:    ret i32 [[XOR]]
 ;
   %nega = xor i32 %a, -1
@@ -709,7 +704,7 @@
 define i32 @test46_commuted_and(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test46_commuted_and(
 ; CHECK-NEXT:    [[NEGB:%.*]] = xor i32 [[B:%.*]], -1
-; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[A:%.*]], [[NEGB]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[NEGB]], [[A:%.*]]
 ; CHECK-NEXT:    ret i32 [[XOR]]
 ;
   %nega = xor i32 %a, -1
@@ -738,7 +733,7 @@
 define i32 @test48(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test48(
 ; CHECK-NEXT:    [[NEGA:%.*]] = xor i32 [[A:%.*]], -1
-; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[B:%.*]], [[NEGA]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[NEGA]], [[B:%.*]]
 ; CHECK-NEXT:    ret i32 [[XOR]]
 ;
   %nega = xor i32 %a, -1
@@ -751,7 +746,7 @@
 define i32 @test49(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test49(
 ; CHECK-NEXT:    [[NEGA:%.*]] = xor i32 [[A:%.*]], -1
-; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[B:%.*]], [[NEGA]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[NEGA]], [[B:%.*]]
 ; CHECK-NEXT:    ret i32 [[XOR]]
 ;
   %nega = xor i32 %a, -1
@@ -790,7 +785,7 @@
 define i32 @test52(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test52(
 ; CHECK-NEXT:    [[NEGA:%.*]] = xor i32 [[A:%.*]], -1
-; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[B:%.*]], [[NEGA]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[NEGA]], [[B:%.*]]
 ; CHECK-NEXT:    ret i32 [[XOR]]
 ;
   %nega = xor i32 %a, -1
@@ -803,7 +798,7 @@
 define i32 @test53(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test53(
 ; CHECK-NEXT:    [[NEGA:%.*]] = xor i32 [[A:%.*]], -1
-; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[B:%.*]], [[NEGA]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[NEGA]], [[B:%.*]]
 ; CHECK-NEXT:    ret i32 [[XOR]]
 ;
   %nega = xor i32 %a, -1
@@ -850,7 +845,7 @@
 ; CHECK-NEXT:    [[AND:%.*]] = and i32 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[A]], [[B]]
 ; CHECK-NEXT:    [[XNOR:%.*]] = xor i32 [[XOR]], -1
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[XNOR]], [[AND]]
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[AND]], [[XNOR]]
 ; CHECK-NEXT:    ret i32 [[OR]]
 ;
   %and = and i32 %a, %b
@@ -884,7 +879,7 @@
 ; CHECK-NEXT:    [[AND:%.*]] = and i32 [[B:%.*]], [[A:%.*]]
 ; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[A]], [[B]]
 ; CHECK-NEXT:    [[XNOR:%.*]] = xor i32 [[XOR]], -1
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[XNOR]], [[AND]]
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[AND]], [[XNOR]]
 ; CHECK-NEXT:    ret i32 [[OR]]
 ;
   %and = and i32 %b, %a
@@ -917,7 +912,7 @@
 define i8 @lshr_undersized_mask(i8 %x) {
 ; CHECK-LABEL: @lshr_undersized_mask(
 ; CHECK-NEXT:    [[SH:%.*]] = lshr i8 [[X:%.*]], 5
-; CHECK-NEXT:    [[MASK:%.*]] = and i8 [[SH]], -2
+; CHECK-NEXT:    [[MASK:%.*]] = and i8 [[SH]], 6
 ; CHECK-NEXT:    ret i8 [[MASK]]
 ;
   %sh = lshr i8 %x, 5
@@ -948,7 +943,7 @@
 define <2 x i8> @shl_undersized_mask_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @shl_undersized_mask_splat(
 ; CHECK-NEXT:    [[SH:%.*]] = shl <2 x i8> [[X:%.*]], <i8 6, i8 6>
-; CHECK-NEXT:    [[MASK:%.*]] = and <2 x i8> [[SH]], <i8 -120, i8 -120>
+; CHECK-NEXT:    [[MASK:%.*]] = and <2 x i8> [[SH]], <i8 -128, i8 -128>
 ; CHECK-NEXT:    ret <2 x i8> [[MASK]]
 ;
   %sh = shl <2 x i8> %x, <i8 6, i8 6>
@@ -981,7 +976,7 @@
 define i64 @shl_or_and2(i32 %a, i1 %b) {
 ; CHECK-LABEL: @shl_or_and2(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i1 [[B:%.*]] to i64
-; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 32
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw i64 [[TMP1]], 32
 ; CHECK-NEXT:    ret i64 [[TMP3]]
 ;
   %tmp1 = zext i1 %b to i64
@@ -1038,10 +1033,10 @@
 define i32 @shl_or_and6(i16 %a, i16 %b) {
 ; CHECK-LABEL: @shl_or_and6(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
 ; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i32 [[TMP1]], 16
-; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], -65535
+; CHECK-NEXT:    [[TMP1:%.*]] = and i16 [[B:%.*]], 1
+; CHECK-NEXT:    [[TMP2_MASKED:%.*]] = zext i16 [[TMP1]] to i32
+; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP3]], [[TMP2_MASKED]]
 ; CHECK-NEXT:    ret i32 [[TMP5]]
 ;
   %tmp1 = zext i16 %a to i32
@@ -1056,10 +1051,8 @@
 define i32 @shl_or_and7(i16 %a, i16 %b) {
 ; CHECK-LABEL: @shl_or_and7(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
 ; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i32 [[TMP1]], 16
-; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], -131072
+; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP3]], -131072
 ; CHECK-NEXT:    ret i32 [[TMP5]]
 ;
   %tmp1 = zext i16 %a to i32
@@ -1076,8 +1069,8 @@
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
 ; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i32 [[TMP1]], 16
-; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], 131071
+; CHECK-NEXT:    [[TMP3_MASKED:%.*]] = and i32 [[TMP3]], 65536
+; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP3_MASKED]], [[TMP2]]
 ; CHECK-NEXT:    ret i32 [[TMP5]]
 ;
   %tmp1 = zext i16 %a to i32
@@ -1104,7 +1097,7 @@
 define <2 x i64> @shl_or_and2v(<2 x i32> %a, <2 x i1> %b) {
 ; CHECK-LABEL: @shl_or_and2v(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i64>
-; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP1]], <i64 32, i64 32>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw <2 x i64> [[TMP1]], <i64 32, i64 32>
 ; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 ;
   %tmp1 = zext <2 x i1> %b to <2 x i64>
@@ -1119,10 +1112,10 @@
 ; A variation of above test case, but fails due to the mask value
 ; CHECK-LABEL: @shl_or_and3v(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext <2 x i16> [[A:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[TMP2:%.*]] = zext <2 x i16> [[B:%.*]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw <2 x i32> [[TMP1]], <i32 16, i32 16>
-; CHECK-NEXT:    [[TMP4:%.*]] = or <2 x i32> [[TMP2]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = and <2 x i32> [[TMP4]], <i32 -65535, i32 -65535>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i16> [[B:%.*]], <i16 1, i16 1>
+; CHECK-NEXT:    [[TMP2_MASKED:%.*]] = zext <2 x i16> [[TMP1]] to <2 x i32>
+; CHECK-NEXT:    [[TMP5:%.*]] = or <2 x i32> [[TMP3]], [[TMP2_MASKED]]
 ; CHECK-NEXT:    ret <2 x i32> [[TMP5]]
 ;
   %tmp1 = zext <2 x i16> %a to <2 x i32>
@@ -1132,3 +1125,34 @@
   %tmp5 = and <2 x i32> %tmp4, <i32 4294901761, i32 4294901761> ; mask with 0xFFFF0001
   ret <2 x i32> %tmp5
 }
+
+define i1 @icmp_set_and_unset_bits(i32 %a) {
+; CHECK-LABEL: @icmp_set_and_unset_bits(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[A:%.*]], 7
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp1 = and i32 %a, 1
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp3 = and i32 %a, 6
+  %tmp4 = icmp ne i32 %tmp3, 0
+  %tmp5 = and i1 %tmp2, %tmp4
+  ret i1 %tmp5
+}
+
+define i1 @icmp_set_and_unset_bits_recursive(i32 %a) {
+; CHECK-LABEL: @icmp_set_and_unset_bits_recursive(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[A:%.*]], 199
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 134
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp1 = and i32 %a, 1
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp3 = and i32 %a, 6
+  %tmp4 = icmp ne i32 %tmp3, 0
+  %tmp5 = and i32 %a, 192
+  %tmp6 = icmp ne i32 %tmp5, 128
+  %tmp7 = and i1 %tmp2, %tmp4
+  %tmp8 = and i1 %tmp6, %tmp7
+  ret i1 %tmp8
+}