Index: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -212,6 +212,10 @@
 /// "Not" means that in above descriptions "==" should be replaced by "!=".
 ///   Example: (icmp ne (A & 3), 3) -> AMask_NotAllOnes
 ///
+/// "OnesAdjacent" means that (A & B) == C and the ones in B are adjacent,
+///   possibly considering least and most-significat bits adjacent.
+///   i.e. 0b00111100
+///
 /// If the mask A contains a single bit, then the following is equivalent:
 ///    (icmp eq (A & B), A) equals (icmp ne (A & B), 0)
 ///    (icmp ne (A & B), A) equals (icmp eq (A & B), 0)
@@ -225,7 +229,8 @@
   AMask_Mixed             =    64,
   AMask_NotMixed          =   128,
   BMask_Mixed             =   256,
-  BMask_NotMixed          =   512
+  BMask_NotMixed          =   512,
+  BMask_OnesAdjacent      =  1024,
 };
 
 /// Return the set of patterns (from MaskedICmpType) that (icmp SCC (A & B), C)
@@ -249,7 +254,6 @@
     if (IsBPow2)
       MaskVal |= (IsEq ? (BMask_NotAllOnes | BMask_NotMixed)
                        : (BMask_AllOnes | BMask_Mixed));
-    return MaskVal;
   }
 
   if (A == C) {
@@ -272,6 +276,19 @@
     MaskVal |= (IsEq ? BMask_Mixed : BMask_NotMixed);
   }
 
+  if (BCst) {
+    APInt BI = BCst->getValue();
+    unsigned POP = BI.countPopulation(),
+      LZ = BI.countLeadingZeros(), TZ = BI.countTrailingZeros(),
+      LO = BI.countLeadingOnes(), TO = BI.countTrailingOnes(),
+      BW = BI.getBitWidth();
+
+    if (POP == BW - (LZ + TZ))
+      MaskVal |= BMask_OnesAdjacent;
+    if (POP == LO + TO)
+      MaskVal |= BMask_OnesAdjacent;
+  }
+
   return MaskVal;
 }
 
@@ -289,6 +306,11 @@
                       AMask_NotMixed | BMask_NotMixed))
              >> 1;
 
+  // ZerosAdjacent is identical to OnesAdjacent (as wrapping is handled),
+  // so the conjugate is identical
+  if (Mask & BMask_OnesAdjacent)
+    NewMask |= BMask_OnesAdjacent;
+
   return NewMask;
 }
 
@@ -619,15 +641,6 @@
   unsigned LHSMask = MaskPair->first;
   unsigned RHSMask = MaskPair->second;
   unsigned Mask = LHSMask & RHSMask;
-  if (Mask == 0) {
-    // Even if the two sides don't share a common pattern, check if folding can
-    // still happen.
-    if (Value *V = foldLogOpOfMaskedICmpsAsymmetric(
-            LHS, RHS, IsAnd, A, B, C, D, E, PredL, PredR, LHSMask, RHSMask,
-            Builder))
-      return V;
-    return nullptr;
-  }
 
   // In full generality:
   //     (icmp (A & B) Op C) | (icmp (A & D) Op E)
@@ -711,6 +724,13 @@
       return RHS;
   }
 
+  ConstantInt *CCst = dyn_cast<ConstantInt>(C);
+  if (!CCst)
+    return nullptr;
+  ConstantInt *ECst = dyn_cast<ConstantInt>(E);
+  if (!ECst)
+    return nullptr;
+
   if (Mask & BMask_Mixed) {
     // (icmp eq (A & B), C) & (icmp eq (A & D), E)
     // We already know that B & C == C && D & E == E.
@@ -722,12 +742,6 @@
     // We can't simply use C and E because we might actually handle
     //   (icmp ne (A & B), B) & (icmp eq (A & D), D)
     // with B and D, having a single bit set.
-    ConstantInt *CCst = dyn_cast<ConstantInt>(C);
-    if (!CCst)
-      return nullptr;
-    ConstantInt *ECst = dyn_cast<ConstantInt>(E);
-    if (!ECst)
-      return nullptr;
     if (PredL != NewCC)
       CCst = cast<ConstantInt>(ConstantExpr::getXor(BCst, CCst));
     if (PredR != NewCC)
@@ -745,6 +759,100 @@
     return Builder.CreateICmp(NewCC, NewAnd, NewOr2);
   }
 
+  if (Value *V = foldLogOpOfMaskedICmpsAsymmetric(
+          LHS, RHS, IsAnd, A, B, C, D, E, PredL, PredR, LHSMask, RHSMask,
+          Builder))
+    return V;
+
+  // (icmp eq (A & 3), 0) && (icmp ne (A & 4), 0)
+  //-> (icmp eq (cttz A), 2)
+  // Our job is to remove the extra branch. Let the other optimizers clean up.
+  APInt BI = BCst->getValue(), CI = CCst->getValue(),
+    DI = DCst->getValue(), EI = ECst->getValue();
+  unsigned BW = BCst->getBitWidth(),
+    BPOP = BI.countPopulation(),
+    BLO = BI.countLeadingOnes(), BLZ = BI.countLeadingZeros(),
+    BTO = BI.countTrailingOnes(),
+    DPOP = DI.countPopulation(),
+    DLO = DI.countLeadingOnes(), DLZ = DI.countLeadingZeros(),
+    DTO = DI.countTrailingOnes();
+  Function *Fshl = Intrinsic::getDeclaration(LHS->getModule(), Intrinsic::fshl, A->getType());
+  Function *Cttz = Intrinsic::getDeclaration(LHS->getModule(), Intrinsic::cttz, A->getType());
+  bool LIsEq = (PredL == ICmpInst::ICMP_EQ);
+  bool RIsEq = (PredR == ICmpInst::ICMP_EQ);
+  if (Mask & BMask_OnesAdjacent) {
+    Value *I1Zero = ConstantInt::get(IntegerType::get(A->getContext(), 1), 0);
+    Value *One = ConstantInt::get(A->getType(), 1);
+    if (IsAnd && DPOP == 1 && LIsEq) {
+      unsigned Rotl = BTO ? BLO : BLZ + BPOP;
+      Value *NewShift1 = Builder.Insert(CallInst::Create(Fshl,
+        {A, A, ConstantInt::get(A->getType(), Rotl)}));
+      Value *NewShift2 = Builder.Insert(CallInst::Create(Fshl,
+        {A, A, ConstantInt::get(A->getType(), DLZ + 1 + BPOP)}));
+      Value *NewAnd1 = Builder.CreateAnd(NewShift1, NewShift2);
+      APInt XorMask = CI.rotl(Rotl);
+      assert(EI == 0 && "Single bit comparison not normalized to comparison against zero");
+      RIsEq ? XorMask.clearBit(BPOP) : XorMask.setBit(BPOP);
+      Value *NewXor1 = Builder.CreateXor(
+        NewAnd1, ConstantInt::get(A->getContext(), XorMask));
+      Value *NewCttz1 = Builder.Insert(CallInst::Create(Cttz, {NewXor1, I1Zero}));
+      return Builder.CreateICmp(ICmpInst::ICMP_EQ,
+        NewCttz1, ConstantInt::get(A->getType(), BPOP));
+    } else if (IsAnd && BPOP == 1 && RIsEq) {
+      unsigned Rotl = DTO ? DLO : DLZ + DPOP;
+      Value *NewShift1 = Builder.Insert(CallInst::Create(Fshl,
+        {A, A, ConstantInt::get(A->getType(), Rotl)}));
+      Value *NewShift2 = Builder.Insert(CallInst::Create(Fshl,
+        {A, A, ConstantInt::get(A->getType(), BLZ + 1 + DPOP)}));
+      Value *NewAnd1 = Builder.CreateAnd(NewShift1, NewShift2);
+      APInt XorMask = EI.rotl(Rotl);
+      assert(CI == 0 && "Single bit comparison not normalized to comparison against zero");
+      LIsEq ? XorMask.clearBit(DPOP) : XorMask.setBit(DPOP);
+      Value *NewXor1 = Builder.CreateXor(
+        NewAnd1, ConstantInt::get(A->getContext(), XorMask));
+      Value *NewCttz1 = Builder.Insert(CallInst::Create(Cttz, {NewXor1, I1Zero}));
+      return Builder.CreateICmp(ICmpInst::ICMP_EQ,
+        NewCttz1, ConstantInt::get(A->getType(), DPOP));
+    } else if (((IsAnd && !LIsEq) || !IsAnd) && DPOP == 1) {
+      unsigned Rotl = BTO ? BLO : BLZ + BPOP;
+      Value *NewShift1 = Builder.Insert(CallInst::Create(Fshl,
+        {A, A, ConstantInt::get(A->getType(), Rotl + 1)}));
+      Value *NewShift2 = Builder.Insert(CallInst::Create(Fshl,
+        {A, A, ConstantInt::get(A->getType(), DLZ + 1)}));
+      Value *NewAnd1 = Builder.CreateAnd(NewShift1, NewShift2);
+      APInt XorMask = CI.rotl(Rotl + 1).lshr(BW - (BPOP - 1)).zextOrTrunc(BPOP + 1);
+      XorMask.flipAllBits();
+      assert(EI == 0 && "Single bit comparison not normalized to comparison against zero");
+      RIsEq ? XorMask.clearBit(0) : XorMask.setBit(0);
+      Value *NewXor1 = Builder.CreateXor(
+        NewAnd1, ConstantInt::get(A->getContext(),
+          XorMask.zextOrTrunc(BW).shl(BW - (DPOP - 1))));
+      Value *NewCttz1 = Builder.Insert(CallInst::Create(Cttz, {NewXor1, I1Zero}));
+      // Subtract 1 so that R not matching becomes uintmax
+      Value *NewSub1 = Builder.CreateSub(NewCttz1, One);
+      return Builder.CreateICmp(LIsEq ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT,
+        NewSub1, ConstantInt::get(A->getType(), BPOP));
+    } else if (((IsAnd && !RIsEq) || !IsAnd) && BPOP == 1) {
+      unsigned Rotl = DTO ? DLO : DLZ + DPOP;
+      Value *NewShift1 = Builder.Insert(CallInst::Create(Fshl,
+        {A, A, ConstantInt::get(A->getType(), Rotl + 1)}));
+      Value *NewShift2 = Builder.Insert(CallInst::Create(Fshl,
+        {A, A, ConstantInt::get(A->getType(), BLO + 1)}));
+      Value *NewAnd1 = Builder.CreateAnd(NewShift1, NewShift2);
+      APInt tmp = EI.rotl(Rotl + 1).lshr(BW - (DPOP - 1)).zextOrTrunc(DPOP + 1);
+      assert(CI == 0 && "Single bit comparison not normalized to comparison against zero");
+      LIsEq ? tmp.clearBit(DPOP) : tmp.setBit(DPOP);
+      tmp.flipAllBits();
+      Value *NewXor1 = Builder.CreateXor(
+        NewAnd1, ConstantInt::get(A->getContext(),
+          tmp.zextOrTrunc(BW).shl(BW - (DPOP - 1))));
+      Value *NewCttz1 = Builder.Insert(CallInst::Create(Cttz, {NewXor1, I1Zero}));
+      Value *NewSub1 = Builder.CreateSub(NewCttz1, One);
+      return Builder.CreateICmp(RIsEq ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT,
+        NewSub1, ConstantInt::get(A->getType(), DPOP));
+    }
+  }
+
   return nullptr;
 }
 
Index: test/Transforms/InstCombine/and-or-icmps.ll
===================================================================
--- test/Transforms/InstCombine/and-or-icmps.ll
+++ test/Transforms/InstCombine/and-or-icmps.ll
@@ -3,7 +3,7 @@
 
 define i1 @PR1817_1(i32 %X) {
 ; CHECK-LABEL: @PR1817_1(
-; CHECK-NEXT:    [[B:%.*]] = icmp ult i32 %X, 10
+; CHECK-NEXT:    [[B:%.*]] = icmp ult i32 [[X:%.*]], 10
 ; CHECK-NEXT:    ret i1 [[B]]
 ;
   %A = icmp slt i32 %X, 10
@@ -14,7 +14,7 @@
 
 define i1 @PR1817_2(i32 %X) {
 ; CHECK-LABEL: @PR1817_2(
-; CHECK-NEXT:    [[A:%.*]] = icmp slt i32 %X, 10
+; CHECK-NEXT:    [[A:%.*]] = icmp slt i32 [[X:%.*]], 10
 ; CHECK-NEXT:    ret i1 [[A]]
 ;
   %A = icmp slt i32 %X, 10
@@ -25,7 +25,7 @@
 
 define i1 @PR2330(i32 %a, i32 %b) {
 ; CHECK-LABEL: @PR2330(
-; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %b, %a
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[B:%.*]], [[A:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 8
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
@@ -41,7 +41,7 @@
 
 define i1 @or_eq_with_one_bit_diff_constants1(i32 %x) {
 ; CHECK-LABEL: @or_eq_with_one_bit_diff_constants1(
-; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %x, 1
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[X:%.*]], 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 51
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
@@ -55,7 +55,7 @@
 
 define i1 @and_ne_with_one_bit_diff_constants1(i32 %x) {
 ; CHECK-LABEL: @and_ne_with_one_bit_diff_constants1(
-; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %x, 1
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[X:%.*]], 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 51
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
@@ -69,7 +69,7 @@
 
 define i1 @or_eq_with_one_bit_diff_constants2(i32 %x) {
 ; CHECK-LABEL: @or_eq_with_one_bit_diff_constants2(
-; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %x, 32
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[X:%.*]], 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 97
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
@@ -81,7 +81,7 @@
 
 define i1 @and_ne_with_one_bit_diff_constants2(i19 %x) {
 ; CHECK-LABEL: @and_ne_with_one_bit_diff_constants2(
-; CHECK-NEXT:    [[TMP1:%.*]] = or i19 %x, 128
+; CHECK-NEXT:    [[TMP1:%.*]] = or i19 [[X:%.*]], 128
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i19 [[TMP1]], 193
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
@@ -95,7 +95,7 @@
 
 define i1 @or_eq_with_one_bit_diff_constants3(i8 %x) {
 ; CHECK-LABEL: @or_eq_with_one_bit_diff_constants3(
-; CHECK-NEXT:    [[TMP1:%.*]] = or i8 %x, -128
+; CHECK-NEXT:    [[TMP1:%.*]] = or i8 [[X:%.*]], -128
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], -2
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
@@ -107,7 +107,7 @@
 
 define i1 @and_ne_with_one_bit_diff_constants3(i8 %x) {
 ; CHECK-LABEL: @and_ne_with_one_bit_diff_constants3(
-; CHECK-NEXT:    [[TMP1:%.*]] = or i8 %x, -128
+; CHECK-NEXT:    [[TMP1:%.*]] = or i8 [[X:%.*]], -128
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i8 [[TMP1]], -63
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
@@ -122,7 +122,7 @@
 
 define i1 @or_eq_with_diff_one(i8 %x) {
 ; CHECK-LABEL: @or_eq_with_diff_one(
-; CHECK-NEXT:    [[TMP1:%.*]] = add i8 %x, -13
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X:%.*]], -13
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 2
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
@@ -136,7 +136,7 @@
 
 define i1 @and_ne_with_diff_one(i32 %x) {
 ; CHECK-LABEL: @and_ne_with_diff_one(
-; CHECK-NEXT:    [[TMP1:%.*]] = add i32 %x, -39
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X:%.*]], -39
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt i32 [[TMP1]], 1
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
@@ -151,7 +151,7 @@
 
 define i1 @or_eq_with_diff_one_signed(i32 %x) {
 ; CHECK-LABEL: @or_eq_with_diff_one_signed(
-; CHECK-NEXT:    [[TMP1:%.*]] = add i32 %x, 1
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X:%.*]], 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 2
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
@@ -163,7 +163,7 @@
 
 define i1 @and_ne_with_diff_one_signed(i64 %x) {
 ; CHECK-LABEL: @and_ne_with_diff_one_signed(
-; CHECK-NEXT:    [[TMP1:%.*]] = add i64 %x, 1
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[X:%.*]], 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt i64 [[TMP1]], 1
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
@@ -177,7 +177,7 @@
 
 define <2 x i1> @or_eq_with_one_bit_diff_constants2_splatvec(<2 x i32> %x) {
 ; CHECK-LABEL: @or_eq_with_one_bit_diff_constants2_splatvec(
-; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> %x, <i32 32, i32 32>
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[X:%.*]], <i32 32, i32 32>
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], <i32 97, i32 97>
 ; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
 ;
@@ -189,7 +189,7 @@
 
 define <2 x i1> @and_ne_with_diff_one_splatvec(<2 x i32> %x) {
 ; CHECK-LABEL: @and_ne_with_diff_one_splatvec(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> %x, <i32 -39, i32 -39>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], <i32 -39, i32 -39>
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <2 x i32> [[TMP1]], <i32 1, i32 1>
 ; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
 ;
@@ -208,14 +208,17 @@
 ; CHECK-LABEL: @simplify_before_foldAndOfICmps(
 ; CHECK-NEXT:    [[A8:%.*]] = alloca i16, align 2
 ; CHECK-NEXT:    [[L7:%.*]] = load i16, i16* [[A8]], align 2
-; CHECK-NEXT:    [[C10:%.*]] = icmp ult i16 [[L7]], 2
-; CHECK-NEXT:    [[C7:%.*]] = icmp slt i16 [[L7]], 0
-; CHECK-NEXT:    [[C18:%.*]] = or i1 [[C7]], [[C10]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.fshl.i16(i16 [[L7]], i16 [[L7]], i16 2)
+; CHECK-NEXT:    [[TMP2:%.*]] = and i16 [[L7]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i16 [[TMP2]], -4
+; CHECK-NEXT:    [[TMP4:%.*]] = call i16 @llvm.cttz.i16(i16 [[TMP3]], i1 false), !range !0
+; CHECK-NEXT:    [[TMP5:%.*]] = add nsw i16 [[TMP4]], -1
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i16 [[TMP5]], 16
 ; CHECK-NEXT:    [[L7_LOBIT:%.*]] = ashr i16 [[L7]], 15
-; CHECK-NEXT:    [[TMP1:%.*]] = sext i16 [[L7_LOBIT]] to i64
-; CHECK-NEXT:    [[G26:%.*]] = getelementptr i1, i1* null, i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP7:%.*]] = sext i16 [[L7_LOBIT]] to i64
+; CHECK-NEXT:    [[G26:%.*]] = getelementptr i1, i1* null, i64 [[TMP7]]
 ; CHECK-NEXT:    store i16 [[L7]], i16* undef, align 2
-; CHECK-NEXT:    store i1 [[C18]], i1* undef, align 1
+; CHECK-NEXT:    store i1 [[TMP6]], i1* undef, align 1
 ; CHECK-NEXT:    store i1* [[G26]], i1** undef, align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -253,3 +256,88 @@
   ret void
 }
 
+define i1 @icmp_clz(i32 %a) {
+; CHECK-LABEL: @icmp_clz(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[A]], i32 1)
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.fshl.i32(i32 [[A]], i32 [[A]], i32 30)
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP3]], i1 false), !range !1
+; CHECK-NEXT:    [[TMP5:%.*]] = add nsw i32 [[TMP4]], -1
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ugt i32 [[TMP5]], 2
+; CHECK-NEXT:    ret i1 [[TMP6]]
+;
+  %tmp1 = and i32 %a, 3
+  %tmp2 = icmp ne i32 %tmp1, 0
+  %tmp3 = and i32 %a, 4
+  %tmp4 = icmp ne i32 %tmp3, 0
+  %tmp5 = and i1 %tmp2, %tmp4
+  ret i1 %tmp5
+}
+define i1 @icmp_clz2(i32 %a) {
+; CHECK-LABEL: @icmp_clz2(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[A]], i32 29)
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[A]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP2]], i1 false), !range !1
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw i32 [[TMP3]], -1
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ugt i32 [[TMP4]], 2
+; CHECK-NEXT:    ret i1 [[TMP5]]
+;
+  %tmp1 = and i32 %a, 6
+  %tmp2 = icmp ne i32 %tmp1, 0
+  %tmp3 = and i32 %a, 8
+  %tmp4 = icmp ne i32 %tmp3, 0
+  %tmp5 = and i1 %tmp2, %tmp4
+  ret i1 %tmp5
+}
+define i1 @icmp_clz3(i8 %a) {
+; CHECK-LABEL: @icmp_clz3(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.fshl.i8(i8 [[A:%.*]], i8 [[A]], i8 3)
+; CHECK-NEXT:    [[TMP2:%.*]] = call i8 @llvm.fshl.i8(i8 [[A]], i8 [[A]], i8 3)
+; CHECK-NEXT:    [[TMP3:%.*]] = and i8 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call i8 @llvm.cttz.i8(i8 [[TMP3]], i1 false), !range !2
+; CHECK-NEXT:    [[TMP5:%.*]] = add nsw i8 [[TMP4]], -1
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ugt i8 [[TMP5]], 2
+; CHECK-NEXT:    ret i1 [[TMP6]]
+;
+  %tmp1 = and i8 %a, 192
+  %tmp2 = icmp ne i8 %tmp1, 128
+  %tmp3 = and i8 %a, 32
+  %tmp4 = icmp ne i8 %tmp3, 0
+  %tmp5 = and i1 %tmp2, %tmp4
+  ret i1 %tmp5
+}
+define i1 @icmp_clz5(i8 %a) {
+; CHECK-LABEL: @icmp_clz5(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.fshl.i8(i8 [[A:%.*]], i8 [[A]], i8 5)
+; CHECK-NEXT:    [[TMP2:%.*]] = call i8 @llvm.fshl.i8(i8 [[A]], i8 [[A]], i8 5)
+; CHECK-NEXT:    [[TMP3:%.*]] = and i8 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call i8 @llvm.cttz.i8(i8 [[TMP3]], i1 false), !range !2
+; CHECK-NEXT:    [[TMP5:%.*]] = add nsw i8 [[TMP4]], -1
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i8 [[TMP5]], 5
+; CHECK-NEXT:    ret i1 [[TMP6]]
+;
+  %tmp1 = and i8 %a, 240 ; 0b11110000
+  %tmp2 = icmp eq i8 %tmp1, 144 ; 0b10010000
+  %tmp3 = and i8 %a, 8
+  %tmp4 = icmp ne i8 %tmp3, 0
+  %tmp5 = or i1 %tmp2, %tmp4
+  ret i1 %tmp5
+}
+define i1 @icmp_clz6(i8 %a) {
+; CHECK-LABEL: @icmp_clz6(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.fshl.i8(i8 [[A:%.*]], i8 [[A]], i8 5)
+; CHECK-NEXT:    [[TMP2:%.*]] = call i8 @llvm.fshl.i8(i8 [[A]], i8 [[A]], i8 1)
+; CHECK-NEXT:    [[TMP3:%.*]] = and i8 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i8 [[TMP3]], -32
+; CHECK-NEXT:    [[TMP5:%.*]] = call i8 @llvm.cttz.i8(i8 [[TMP4]], i1 false), !range !2
+; CHECK-NEXT:    [[TMP6:%.*]] = add nsw i8 [[TMP5]], -1
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp ult i8 [[TMP6]], 5
+; CHECK-NEXT:    ret i1 [[TMP7]]
+;
+  %tmp1 = and i8 %a, 4
+  %tmp2 = icmp ne i8 %tmp1, 0
+  %tmp3 = and i8 %a, 240
+  %tmp4 = icmp eq i8 %tmp3, 144
+  %tmp5 = or i1 %tmp2, %tmp4
+  ret i1 %tmp5
+}