Index: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -212,6 +212,10 @@ /// "Not" means that in above descriptions "==" should be replaced by "!=". /// Example: (icmp ne (A & 3), 3) -> AMask_NotAllOnes /// +/// "OnesAdjacent" means that (A & B) == C and the ones in B are adjacent, +/// possibly considering least and most-significat bits adjacent. +/// i.e. 0b00111100 +/// /// If the mask A contains a single bit, then the following is equivalent: /// (icmp eq (A & B), A) equals (icmp ne (A & B), 0) /// (icmp ne (A & B), A) equals (icmp eq (A & B), 0) @@ -225,7 +229,8 @@ AMask_Mixed = 64, AMask_NotMixed = 128, BMask_Mixed = 256, - BMask_NotMixed = 512 + BMask_NotMixed = 512, + BMask_OnesAdjacent = 1024, }; /// Return the set of patterns (from MaskedICmpType) that (icmp SCC (A & B), C) @@ -249,7 +254,6 @@ if (IsBPow2) MaskVal |= (IsEq ? (BMask_NotAllOnes | BMask_NotMixed) : (BMask_AllOnes | BMask_Mixed)); - return MaskVal; } if (A == C) { @@ -272,6 +276,19 @@ MaskVal |= (IsEq ? BMask_Mixed : BMask_NotMixed); } + if (BCst) { + APInt BI = BCst->getValue(); + unsigned POP = BI.countPopulation(), + LZ = BI.countLeadingZeros(), TZ = BI.countTrailingZeros(), + LO = BI.countLeadingOnes(), TO = BI.countTrailingOnes(), + BW = BI.getBitWidth(); + + if (POP == BW - (LZ + TZ)) + MaskVal |= BMask_OnesAdjacent; + if (POP == LO + TO) + MaskVal |= BMask_OnesAdjacent; + } + return MaskVal; } @@ -289,6 +306,11 @@ AMask_NotMixed | BMask_NotMixed)) >> 1; + // ZerosAdjacent is identical to OnesAdjacent (as wrapping is handled), + // so the conjugate is identical + if (Mask & BMask_OnesAdjacent) + NewMask |= BMask_OnesAdjacent; + return NewMask; } @@ -619,15 +641,6 @@ unsigned LHSMask = MaskPair->first; unsigned RHSMask = MaskPair->second; unsigned Mask = LHSMask & RHSMask; - if (Mask == 0) { - // Even if the two sides don't share a common pattern, check if folding can - // still happen. - if (Value *V = foldLogOpOfMaskedICmpsAsymmetric( - LHS, RHS, IsAnd, A, B, C, D, E, PredL, PredR, LHSMask, RHSMask, - Builder)) - return V; - return nullptr; - } // In full generality: // (icmp (A & B) Op C) | (icmp (A & D) Op E) @@ -711,6 +724,13 @@ return RHS; } + ConstantInt *CCst = dyn_cast(C); + if (!CCst) + return nullptr; + ConstantInt *ECst = dyn_cast(E); + if (!ECst) + return nullptr; + if (Mask & BMask_Mixed) { // (icmp eq (A & B), C) & (icmp eq (A & D), E) // We already know that B & C == C && D & E == E. @@ -722,12 +742,6 @@ // We can't simply use C and E because we might actually handle // (icmp ne (A & B), B) & (icmp eq (A & D), D) // with B and D, having a single bit set. - ConstantInt *CCst = dyn_cast(C); - if (!CCst) - return nullptr; - ConstantInt *ECst = dyn_cast(E); - if (!ECst) - return nullptr; if (PredL != NewCC) CCst = cast(ConstantExpr::getXor(BCst, CCst)); if (PredR != NewCC) @@ -745,6 +759,100 @@ return Builder.CreateICmp(NewCC, NewAnd, NewOr2); } + if (Value *V = foldLogOpOfMaskedICmpsAsymmetric( + LHS, RHS, IsAnd, A, B, C, D, E, PredL, PredR, LHSMask, RHSMask, + Builder)) + return V; + + // (icmp eq (A & 3), 0) && (icmp ne (A & 4), 0) + //-> (icmp eq (cttz A), 2) + // Our job is to remove the extra branch. Let the other optimizers clean up. + APInt BI = BCst->getValue(), CI = CCst->getValue(), + DI = DCst->getValue(), EI = ECst->getValue(); + unsigned BW = BCst->getBitWidth(), + BPOP = BI.countPopulation(), + BLO = BI.countLeadingOnes(), BLZ = BI.countLeadingZeros(), + BTO = BI.countTrailingOnes(), + DPOP = DI.countPopulation(), + DLO = DI.countLeadingOnes(), DLZ = DI.countLeadingZeros(), + DTO = DI.countTrailingOnes(); + Function *Fshl = Intrinsic::getDeclaration(LHS->getModule(), Intrinsic::fshl, A->getType()); + Function *Cttz = Intrinsic::getDeclaration(LHS->getModule(), Intrinsic::cttz, A->getType()); + bool LIsEq = (PredL == ICmpInst::ICMP_EQ); + bool RIsEq = (PredR == ICmpInst::ICMP_EQ); + if (Mask & BMask_OnesAdjacent) { + Value *I1Zero = ConstantInt::get(IntegerType::get(A->getContext(), 1), 0); + Value *One = ConstantInt::get(A->getType(), 1); + if (IsAnd && DPOP == 1 && LIsEq) { + unsigned Rotl = BTO ? BLO : BLZ + BPOP; + Value *NewShift1 = Builder.Insert(CallInst::Create(Fshl, + {A, A, ConstantInt::get(A->getType(), Rotl)})); + Value *NewShift2 = Builder.Insert(CallInst::Create(Fshl, + {A, A, ConstantInt::get(A->getType(), DLZ + 1 + BPOP)})); + Value *NewAnd1 = Builder.CreateAnd(NewShift1, NewShift2); + APInt XorMask = CI.rotl(Rotl); + assert(EI == 0 && "Single bit comparison not normalized to comparison against zero"); + RIsEq ? XorMask.clearBit(BPOP) : XorMask.setBit(BPOP); + Value *NewXor1 = Builder.CreateXor( + NewAnd1, ConstantInt::get(A->getContext(), XorMask)); + Value *NewCttz1 = Builder.Insert(CallInst::Create(Cttz, {NewXor1, I1Zero})); + return Builder.CreateICmp(ICmpInst::ICMP_EQ, + NewCttz1, ConstantInt::get(A->getType(), BPOP)); + } else if (IsAnd && BPOP == 1 && RIsEq) { + unsigned Rotl = DTO ? DLO : DLZ + DPOP; + Value *NewShift1 = Builder.Insert(CallInst::Create(Fshl, + {A, A, ConstantInt::get(A->getType(), Rotl)})); + Value *NewShift2 = Builder.Insert(CallInst::Create(Fshl, + {A, A, ConstantInt::get(A->getType(), BLZ + 1 + DPOP)})); + Value *NewAnd1 = Builder.CreateAnd(NewShift1, NewShift2); + APInt XorMask = EI.rotl(Rotl); + assert(CI == 0 && "Single bit comparison not normalized to comparison against zero"); + LIsEq ? XorMask.clearBit(DPOP) : XorMask.setBit(DPOP); + Value *NewXor1 = Builder.CreateXor( + NewAnd1, ConstantInt::get(A->getContext(), XorMask)); + Value *NewCttz1 = Builder.Insert(CallInst::Create(Cttz, {NewXor1, I1Zero})); + return Builder.CreateICmp(ICmpInst::ICMP_EQ, + NewCttz1, ConstantInt::get(A->getType(), DPOP)); + } else if (((IsAnd && !LIsEq) || !IsAnd) && DPOP == 1) { + unsigned Rotl = BTO ? BLO : BLZ + BPOP; + Value *NewShift1 = Builder.Insert(CallInst::Create(Fshl, + {A, A, ConstantInt::get(A->getType(), Rotl + 1)})); + Value *NewShift2 = Builder.Insert(CallInst::Create(Fshl, + {A, A, ConstantInt::get(A->getType(), DLZ + 1)})); + Value *NewAnd1 = Builder.CreateAnd(NewShift1, NewShift2); + APInt XorMask = CI.rotl(Rotl + 1).lshr(BW - (BPOP - 1)).zextOrTrunc(BPOP + 1); + XorMask.flipAllBits(); + assert(EI == 0 && "Single bit comparison not normalized to comparison against zero"); + RIsEq ? XorMask.clearBit(0) : XorMask.setBit(0); + Value *NewXor1 = Builder.CreateXor( + NewAnd1, ConstantInt::get(A->getContext(), + XorMask.zextOrTrunc(BW).shl(BW - (DPOP - 1)))); + Value *NewCttz1 = Builder.Insert(CallInst::Create(Cttz, {NewXor1, I1Zero})); + // Subtract 1 so that R not matching becomes uintmax + Value *NewSub1 = Builder.CreateSub(NewCttz1, One); + return Builder.CreateICmp(LIsEq ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT, + NewSub1, ConstantInt::get(A->getType(), BPOP)); + } else if (((IsAnd && !RIsEq) || !IsAnd) && BPOP == 1) { + unsigned Rotl = DTO ? DLO : DLZ + DPOP; + Value *NewShift1 = Builder.Insert(CallInst::Create(Fshl, + {A, A, ConstantInt::get(A->getType(), Rotl + 1)})); + Value *NewShift2 = Builder.Insert(CallInst::Create(Fshl, + {A, A, ConstantInt::get(A->getType(), BLO + 1)})); + Value *NewAnd1 = Builder.CreateAnd(NewShift1, NewShift2); + APInt tmp = EI.rotl(Rotl + 1).lshr(BW - (DPOP - 1)).zextOrTrunc(DPOP + 1); + assert(CI == 0 && "Single bit comparison not normalized to comparison against zero"); + LIsEq ? tmp.clearBit(DPOP) : tmp.setBit(DPOP); + tmp.flipAllBits(); + Value *NewXor1 = Builder.CreateXor( + NewAnd1, ConstantInt::get(A->getContext(), + tmp.zextOrTrunc(BW).shl(BW - (DPOP - 1)))); + Value *NewCttz1 = Builder.Insert(CallInst::Create(Cttz, {NewXor1, I1Zero})); + Value *NewSub1 = Builder.CreateSub(NewCttz1, One); + return Builder.CreateICmp(RIsEq ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT, + NewSub1, ConstantInt::get(A->getType(), DPOP)); + } + } + return nullptr; } Index: test/Transforms/InstCombine/and-or-icmps.ll =================================================================== --- test/Transforms/InstCombine/and-or-icmps.ll +++ test/Transforms/InstCombine/and-or-icmps.ll @@ -3,7 +3,7 @@ define i1 @PR1817_1(i32 %X) { ; CHECK-LABEL: @PR1817_1( -; CHECK-NEXT: [[B:%.*]] = icmp ult i32 %X, 10 +; CHECK-NEXT: [[B:%.*]] = icmp ult i32 [[X:%.*]], 10 ; CHECK-NEXT: ret i1 [[B]] ; %A = icmp slt i32 %X, 10 @@ -14,7 +14,7 @@ define i1 @PR1817_2(i32 %X) { ; CHECK-LABEL: @PR1817_2( -; CHECK-NEXT: [[A:%.*]] = icmp slt i32 %X, 10 +; CHECK-NEXT: [[A:%.*]] = icmp slt i32 [[X:%.*]], 10 ; CHECK-NEXT: ret i1 [[A]] ; %A = icmp slt i32 %X, 10 @@ -25,7 +25,7 @@ define i1 @PR2330(i32 %a, i32 %b) { ; CHECK-LABEL: @PR2330( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 %b, %a +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 8 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -41,7 +41,7 @@ define i1 @or_eq_with_one_bit_diff_constants1(i32 %x) { ; CHECK-LABEL: @or_eq_with_one_bit_diff_constants1( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 %x, 1 +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X:%.*]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 51 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -55,7 +55,7 @@ define i1 @and_ne_with_one_bit_diff_constants1(i32 %x) { ; CHECK-LABEL: @and_ne_with_one_bit_diff_constants1( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 %x, 1 +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X:%.*]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 51 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -69,7 +69,7 @@ define i1 @or_eq_with_one_bit_diff_constants2(i32 %x) { ; CHECK-LABEL: @or_eq_with_one_bit_diff_constants2( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 %x, 32 +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X:%.*]], 32 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 97 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -81,7 +81,7 @@ define i1 @and_ne_with_one_bit_diff_constants2(i19 %x) { ; CHECK-LABEL: @and_ne_with_one_bit_diff_constants2( -; CHECK-NEXT: [[TMP1:%.*]] = or i19 %x, 128 +; CHECK-NEXT: [[TMP1:%.*]] = or i19 [[X:%.*]], 128 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i19 [[TMP1]], 193 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -95,7 +95,7 @@ define i1 @or_eq_with_one_bit_diff_constants3(i8 %x) { ; CHECK-LABEL: @or_eq_with_one_bit_diff_constants3( -; CHECK-NEXT: [[TMP1:%.*]] = or i8 %x, -128 +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X:%.*]], -128 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], -2 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -107,7 +107,7 @@ define i1 @and_ne_with_one_bit_diff_constants3(i8 %x) { ; CHECK-LABEL: @and_ne_with_one_bit_diff_constants3( -; CHECK-NEXT: [[TMP1:%.*]] = or i8 %x, -128 +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X:%.*]], -128 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i8 [[TMP1]], -63 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -122,7 +122,7 @@ define i1 @or_eq_with_diff_one(i8 %x) { ; CHECK-LABEL: @or_eq_with_diff_one( -; CHECK-NEXT: [[TMP1:%.*]] = add i8 %x, -13 +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X:%.*]], -13 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 2 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -136,7 +136,7 @@ define i1 @and_ne_with_diff_one(i32 %x) { ; CHECK-LABEL: @and_ne_with_diff_one( -; CHECK-NEXT: [[TMP1:%.*]] = add i32 %x, -39 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -39 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[TMP1]], 1 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -151,7 +151,7 @@ define i1 @or_eq_with_diff_one_signed(i32 %x) { ; CHECK-LABEL: @or_eq_with_diff_one_signed( -; CHECK-NEXT: [[TMP1:%.*]] = add i32 %x, 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 2 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -163,7 +163,7 @@ define i1 @and_ne_with_diff_one_signed(i64 %x) { ; CHECK-LABEL: @and_ne_with_diff_one_signed( -; CHECK-NEXT: [[TMP1:%.*]] = add i64 %x, 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[X:%.*]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[TMP1]], 1 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -177,7 +177,7 @@ define <2 x i1> @or_eq_with_one_bit_diff_constants2_splatvec(<2 x i32> %x) { ; CHECK-LABEL: @or_eq_with_one_bit_diff_constants2_splatvec( -; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> %x, +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], ; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; @@ -189,7 +189,7 @@ define <2 x i1> @and_ne_with_diff_one_splatvec(<2 x i32> %x) { ; CHECK-LABEL: @and_ne_with_diff_one_splatvec( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> %x, +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i32> [[TMP1]], ; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; @@ -208,14 +208,17 @@ ; CHECK-LABEL: @simplify_before_foldAndOfICmps( ; CHECK-NEXT: [[A8:%.*]] = alloca i16, align 2 ; CHECK-NEXT: [[L7:%.*]] = load i16, i16* [[A8]], align 2 -; CHECK-NEXT: [[C10:%.*]] = icmp ult i16 [[L7]], 2 -; CHECK-NEXT: [[C7:%.*]] = icmp slt i16 [[L7]], 0 -; CHECK-NEXT: [[C18:%.*]] = or i1 [[C7]], [[C10]] +; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.fshl.i16(i16 [[L7]], i16 [[L7]], i16 2) +; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[L7]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = xor i16 [[TMP2]], -4 +; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.cttz.i16(i16 [[TMP3]], i1 false), !range !0 +; CHECK-NEXT: [[TMP5:%.*]] = add nsw i16 [[TMP4]], -1 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i16 [[TMP5]], 16 ; CHECK-NEXT: [[L7_LOBIT:%.*]] = ashr i16 [[L7]], 15 -; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[L7_LOBIT]] to i64 -; CHECK-NEXT: [[G26:%.*]] = getelementptr i1, i1* null, i64 [[TMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = sext i16 [[L7_LOBIT]] to i64 +; CHECK-NEXT: [[G26:%.*]] = getelementptr i1, i1* null, i64 [[TMP7]] ; CHECK-NEXT: store i16 [[L7]], i16* undef, align 2 -; CHECK-NEXT: store i1 [[C18]], i1* undef, align 1 +; CHECK-NEXT: store i1 [[TMP6]], i1* undef, align 1 ; CHECK-NEXT: store i1* [[G26]], i1** undef, align 8 ; CHECK-NEXT: ret void ; @@ -253,3 +256,88 @@ ret void } +define i1 @icmp_clz(i32 %a) { +; CHECK-LABEL: @icmp_clz( +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[A]], i32 1) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.fshl.i32(i32 [[A]], i32 [[A]], i32 30) +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP3]], i1 false), !range !1 +; CHECK-NEXT: [[TMP5:%.*]] = add nsw i32 [[TMP4]], -1 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[TMP5]], 2 +; CHECK-NEXT: ret i1 [[TMP6]] +; + %tmp1 = and i32 %a, 3 + %tmp2 = icmp ne i32 %tmp1, 0 + %tmp3 = and i32 %a, 4 + %tmp4 = icmp ne i32 %tmp3, 0 + %tmp5 = and i1 %tmp2, %tmp4 + ret i1 %tmp5 +} +define i1 @icmp_clz2(i32 %a) { +; CHECK-LABEL: @icmp_clz2( +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[A]], i32 29) +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A]] +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP2]], i1 false), !range !1 +; CHECK-NEXT: [[TMP4:%.*]] = add nsw i32 [[TMP3]], -1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP4]], 2 +; CHECK-NEXT: ret i1 [[TMP5]] +; + %tmp1 = and i32 %a, 6 + %tmp2 = icmp ne i32 %tmp1, 0 + %tmp3 = and i32 %a, 8 + %tmp4 = icmp ne i32 %tmp3, 0 + %tmp5 = and i1 %tmp2, %tmp4 + ret i1 %tmp5 +} +define i1 @icmp_clz3(i8 %a) { +; CHECK-LABEL: @icmp_clz3( +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.fshl.i8(i8 [[A:%.*]], i8 [[A]], i8 3) +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.fshl.i8(i8 [[A]], i8 [[A]], i8 3) +; CHECK-NEXT: [[TMP3:%.*]] = and i8 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call i8 @llvm.cttz.i8(i8 [[TMP3]], i1 false), !range !2 +; CHECK-NEXT: [[TMP5:%.*]] = add nsw i8 [[TMP4]], -1 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[TMP5]], 2 +; CHECK-NEXT: ret i1 [[TMP6]] +; + %tmp1 = and i8 %a, 192 + %tmp2 = icmp ne i8 %tmp1, 128 + %tmp3 = and i8 %a, 32 + %tmp4 = icmp ne i8 %tmp3, 0 + %tmp5 = and i1 %tmp2, %tmp4 + ret i1 %tmp5 +} +define i1 @icmp_clz5(i8 %a) { +; CHECK-LABEL: @icmp_clz5( +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.fshl.i8(i8 [[A:%.*]], i8 [[A]], i8 5) +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.fshl.i8(i8 [[A]], i8 [[A]], i8 5) +; CHECK-NEXT: [[TMP3:%.*]] = and i8 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call i8 @llvm.cttz.i8(i8 [[TMP3]], i1 false), !range !2 +; CHECK-NEXT: [[TMP5:%.*]] = add nsw i8 [[TMP4]], -1 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i8 [[TMP5]], 5 +; CHECK-NEXT: ret i1 [[TMP6]] +; + %tmp1 = and i8 %a, 240 ; 0b11110000 + %tmp2 = icmp eq i8 %tmp1, 144 ; 0b10010000 + %tmp3 = and i8 %a, 8 + %tmp4 = icmp ne i8 %tmp3, 0 + %tmp5 = or i1 %tmp2, %tmp4 + ret i1 %tmp5 +} +define i1 @icmp_clz6(i8 %a) { +; CHECK-LABEL: @icmp_clz6( +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.fshl.i8(i8 [[A:%.*]], i8 [[A]], i8 5) +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.fshl.i8(i8 [[A]], i8 [[A]], i8 1) +; CHECK-NEXT: [[TMP3:%.*]] = and i8 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = xor i8 [[TMP3]], -32 +; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.cttz.i8(i8 [[TMP4]], i1 false), !range !2 +; CHECK-NEXT: [[TMP6:%.*]] = add nsw i8 [[TMP5]], -1 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i8 [[TMP6]], 5 +; CHECK-NEXT: ret i1 [[TMP7]] +; + %tmp1 = and i8 %a, 4 + %tmp2 = icmp ne i8 %tmp1, 0 + %tmp3 = and i8 %a, 240 + %tmp4 = icmp eq i8 %tmp3, 144 + %tmp5 = or i1 %tmp2, %tmp4 + ret i1 %tmp5 +}