diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -79,6 +79,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" @@ -106,6 +107,9 @@ STATISTIC(NumMemSet, "Number of memset's formed from loop stores"); STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores"); +STATISTIC( + NumShiftUntilBitTest, + "Number of uncountable loops recognized as 'shift until bitttest' idiom"); bool DisableLIRP::All; static cl::opt @@ -227,6 +231,8 @@ const DebugLoc &DL, bool ZeroCheck, bool IsCntPhiUsedOutsideLoop); + bool recognizeShiftUntilBitTest(); + /// @} }; @@ -1228,7 +1234,8 @@ << "] Noncountable Loop %" << CurLoop->getHeader()->getName() << "\n"); - return recognizePopcount() || recognizeAndInsertFFS(); + return recognizePopcount() || recognizeAndInsertFFS() || + recognizeShiftUntilBitTest(); } /// Check if the given conditional branch is based on the comparison between @@ -1903,3 +1910,345 @@ // loop. The loop would otherwise not be deleted even if it becomes empty. SE->forgetLoop(CurLoop); } + +/// Match loop-invariant value. +template struct match_LoopInvariant { + SubPattern_t SubPattern; + const Loop *L; + + match_LoopInvariant(const SubPattern_t &SP, const Loop *L) + : SubPattern(SP), L(L) {} + + template bool match(ITy *V) { + return L->isLoopInvariant(V) && SubPattern.match(V); + } +}; + +/// Matches if the value is loop-invariant. +template +inline match_LoopInvariant m_LoopInvariant(const Ty &M, const Loop *L) { + return match_LoopInvariant(M, L); +} + +/// Match PHI incoming block/value pair +template +struct match_Incoming { + BlockSubPattern_t BlockSubPattern; + ValueSubPattern_t ValueSubPattern; + + match_Incoming(BlockSubPattern_t BSP, ValueSubPattern_t VSP) + : BlockSubPattern(BSP), ValueSubPattern(VSP) {} + + template bool match(ITy *V, int IncomingIdx) { + auto *PHI = dyn_cast(V); + if (!PHI) + return false; + return BlockSubPattern.match(PHI->getIncomingBlock(IncomingIdx)) && + ValueSubPattern.match(PHI->getIncomingValue(IncomingIdx)); + } +}; + +/// Matches if the PHI node has the following incoming block/value pair. +template +inline match_Incoming m_Incoming(const BTy &BM, const VTy &VM) { + return match_Incoming(BM, VM); +} + +/// Match PHI node. +/// FIXME: generalize to the variadic number of incomings. +template +struct match_PHI { + FirstIncomingTy FirstSubPattern; + SecondIncomingTy SecondSubPattern; + + match_PHI(FirstIncomingTy FirstSubPattern, SecondIncomingTy SecondSubPattern) + : FirstSubPattern(FirstSubPattern), SecondSubPattern(SecondSubPattern) {} + + template bool match(ITy *V) { + auto *PHI = dyn_cast(V); + if (!PHI || PHI->getNumOperands() != 2) + return false; + return (FirstSubPattern.match(PHI, 0) && SecondSubPattern.match(PHI, 1)) || + (FirstSubPattern.match(PHI, 1) && SecondSubPattern.match(PHI, 0)); + } +}; + +/// Matches if the value is a PHI. +template +inline match_PHI +m_PHI(FirstIncomingTy FirstSubPattern, SecondIncomingTy SecondSubPattern) { + return match_PHI(FirstSubPattern, + SecondSubPattern); +} + +/// Return true if the idiom is detected in the loop. +/// +/// The core idiom we are trying to detect is: +/// \code +/// entry: +/// <...> +/// %bitmask = shl i32 1, %bitpos +/// br label %loop +/// +/// loop: +/// %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ] +/// %x.curr.bitmasked = and i32 %x.curr, %bitmask +/// %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0 +/// %x.next = shl i32 %x.curr, 1 +/// br i1 %x.curr.isbitunset, label %loop, label %end +/// +/// end: +/// %x.res = phi i32 [ %x.curr, %loop ] <...> +/// <...> +/// \endcode +static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX, + Value *&BitMask, Value *&BitPos, + Value *&CurrX, Value *&NextX) { + LLVM_DEBUG(dbgs() << DEBUG_TYPE + " Performing shift-until-bittest idiom detection.\n"); + + // Give up if the loop has multiple blocks or multiple backedges. + if (CurLoop->getNumBlocks() != 1 || CurLoop->getNumBackEdges() != 1) { + LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad block/backedge count.\n"); + return false; + } + + BasicBlock *LoopHeaderBB = CurLoop->getHeader(); + BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader(); + assert(LoopPreheaderBB && "There is always a loop preheader."); + + using namespace PatternMatch; + + // Step 1: Check if the loop backedge is in desirable form. + + ICmpInst::Predicate Pred; + Value *CmpLHS, *CmpRHS; + BasicBlock *TrueBB, *FalseBB; + if (!match(LoopHeaderBB->getTerminator(), + m_Br(m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)), + m_BasicBlock(TrueBB), m_BasicBlock(FalseBB)))) { + LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge structure.\n"); + return false; + } + + // Step 2: Check if the backedge's condition is in desirable form. + + auto MatchVariableBitMask = [&]() { + return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) && + match(CmpLHS, + m_c_And(m_Value(CurrX), + m_CombineAnd( + m_Value(BitMask), + m_LoopInvariant(m_Shl(m_One(), m_Value(BitPos)), + CurLoop)))); + }; + + if (!MatchVariableBitMask()) { + // FIXME: support constant bit mask. + // FIXME: support sign bit test (use llvm::decomposeBitTestICmp()). + LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge comparison.\n"); + return false; + } + + // Step 3: Check if the recurrence is in desirable form. + if (!match( + CurrX, + m_PHI(m_Incoming(m_SpecificBB(LoopPreheaderBB), m_Value(BaseX)), + m_Incoming(m_SpecificBB(LoopHeaderBB), + m_CombineAnd(m_Value(NextX), + m_Shl(m_Deferred(CurrX), m_One())))))) { + // FIXME: support right-shift? + LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad recurrence.\n"); + return false; + } + + // Step 4: Check if the backedge's destinations are in desirable form. + + assert(ICmpInst::isEquality(Pred) && + "Should only get equality predicates here."); + + // cmp-br is commutative, so canonicalize to a single variant. + if (Pred != ICmpInst::Predicate::ICMP_EQ) { + Pred = ICmpInst::getInversePredicate(Pred); + std::swap(TrueBB, FalseBB); + } + + // We expect to exit loop when comparison yields false, + // so when it yields true we should branch back to loop header. + if (TrueBB != LoopHeaderBB) { + LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge flow.\n"); + return false; + } + + // Okay, idiom checks out. + return true; +} + +/// Look for the following loop: +/// \code +/// entry: +/// <...> +/// %bitmask = shl i32 1, %bitpos +/// br label %loop +/// +/// loop: +/// %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ] +/// %x.curr.bitmasked = and i32 %x.curr, %bitmask +/// %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0 +/// %x.next = shl i32 %x.curr, 1 +/// br i1 %x.curr.isbitunset, label %loop, label %end +/// +/// end: +/// %x.res = phi i32 [ %x.curr, %loop ] <...> +/// <...> +/// \endcode +/// +/// And transform it into: +/// \code +/// entry: +/// %bitmask = shl i32 1, %bitpos +/// %lowbitmask = add i32 %bitmask, -1 +/// %mask = or i32 %lowbitmask, %bitmask +/// %x.masked = and i32 %x, %mask +/// %x.masked.numleadingzeros = call i32 @llvm.ctlz.i32(i32 %x.masked, +/// i1 true) +/// %x.masked.numactivebits = sub i32 32, %x.masked.numleadingzeros +/// %x.masked.leadingonepos = sub i32 %x.masked.numactivebits, 1 +/// %backedgetakencount = sub i32 %bitpos, %x.masked.leadingonepos +/// %tripcount = add i32 %backedgetakencount, 1 +/// %x.curr = shl i32 %x, %backedgetakencount +/// %x.next = shl i32 %x, %tripcount +/// br label %end +/// +/// end: +/// %x.res0 = phi i32 [ %x.curr, %loop ] <...> +/// %x.res1 = phi i32 [ %x.next, %loop ] <...> +/// <...> +/// \endcode +bool LoopIdiomRecognize::recognizeShiftUntilBitTest() { + bool MadeChange = false; + + Value *X, *BitMask, *BitPos, *XCurr, *XNext; + if (!detectShiftUntilBitTestIdiom(CurLoop, X, BitMask, BitPos, XCurr, + XNext)) { + LLVM_DEBUG(dbgs() << DEBUG_TYPE + " shift-until-bittest idiom detection failed.\n"); + return MadeChange; + } + LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom detected!\n"); + + // Ok, it is the idiom we were looking for, we *could* transform this loop, + // but is it profitable to transform? + + BasicBlock *LoopHeaderBB = CurLoop->getHeader(); + BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader(); + assert(LoopPreheaderBB && "There is always a loop preheader."); + + BasicBlock *SuccessorBB = CurLoop->getExitBlock(); + assert(LoopPreheaderBB && "There is only a single successor."); + + // The loop must not have any other instructions other than the idiom itself. + // FIXME: we could just rewrite the loop with countable trip count. + size_t HeaderSize = LoopHeaderBB->sizeWithoutDebug(); + constexpr size_t CanonicalHeaderSize = 5; + assert(HeaderSize >= CanonicalHeaderSize); + if (HeaderSize > CanonicalHeaderSize) { + LLVM_DEBUG(dbgs() << DEBUG_TYPE " Won't be able to delete loop!\n"); + return MadeChange; + } + + // Only the recurrence itself is allowed to have uses outside of the loop. + if (any_of(SuccessorBB->phis(), [&](PHINode &PN) { + Value *IV = PN.getIncomingValueForBlock(LoopHeaderBB); + return IV != XCurr && IV != XNext && !CurLoop->isLoopInvariant(IV); + })) { + LLVM_DEBUG(dbgs() << DEBUG_TYPE " In-loop value is live-out!\n"); + return MadeChange; + } + // FIXME: we *could* allow this. + + IRBuilder<> Builder(LoopPreheaderBB->getTerminator()); + Builder.SetCurrentDebugLocation(cast(XCurr)->getDebugLoc()); + + Intrinsic::ID IntrID = Intrinsic::ctlz; + Type *Ty = X->getType(); + + TargetTransformInfo::TargetCostKind CostKind = + TargetTransformInfo::TCK_SizeAndLatency; + + // Also, the intrinsic and shift we'll use must be cheap. + IntrinsicCostAttributes Attrs( + IntrID, Ty, {UndefValue::get(Ty), /*is_zero_undef=*/Builder.getTrue()}); + int Cost = TTI->getIntrinsicInstrCost(Attrs, CostKind); + if (Cost > TargetTransformInfo::TCC_Basic) { + LLVM_DEBUG(dbgs() << DEBUG_TYPE + " Intrinsic is too costly, not beneficial\n"); + return MadeChange; + } + if (TTI->getArithmeticInstrCost(Instruction::Shl, Ty, CostKind) > + TargetTransformInfo::TCC_Basic) { + LLVM_DEBUG(dbgs() << DEBUG_TYPE " Shift is too costly, not beneficial\n"); + return MadeChange; + } + + // Ok, transform appears worthwhile. + MadeChange = true; + + // Step 1: Compute the loop trip count. + + Value *LowBitMask = + Builder.CreateAdd(BitMask, Constant::getAllOnesValue(BitMask->getType()), + BitPos->getName() + ".lowbitmask"); + Value *Mask = + Builder.CreateOr(LowBitMask, BitMask, BitPos->getName() + ".mask"); + Value *XMasked = Builder.CreateAnd(X, Mask, X->getName() + ".masked"); + CallInst *XMaskedNumLeadingZeros = Builder.CreateIntrinsic( + IntrID, Ty, {XMasked, /*is_zero_undef=*/Builder.getTrue()}, + /*FMFSource=*/nullptr, XMasked->getName() + ".numleadingzeros"); + Value *XMaskedNumActiveBits = Builder.CreateSub( + ConstantInt::get(X->getType(), X->getType()->getScalarSizeInBits()), + XMaskedNumLeadingZeros, XMasked->getName() + ".numactivebits"); + Value *XMaskedLeadingOnePos = Builder.CreateAdd( + XMaskedNumActiveBits, Constant::getAllOnesValue(BitMask->getType()), + XMasked->getName() + ".numleadingonepos"); + + Value *LoopBackedgeTakenCount = Builder.CreateSub( + BitPos, XMaskedLeadingOnePos, CurLoop->getName() + ".backedgetakencount"); + // We know loop's backedge-taken count, but what's loop's trip count? + // Note that while NUW is always safe, while NSW is only for bitwidths != 2. + Value *LoopTripCount = + Builder.CreateNUWAdd(LoopBackedgeTakenCount, ConstantInt::get(Ty, 1), + CurLoop->getName() + ".tripcount"); + + // Step 2: Compute the recurrence's final value without a loop. + + Value *NewX = Builder.CreateShl(X, LoopBackedgeTakenCount); + NewX->takeName(XCurr); + + Value *NewXNext = Builder.CreateShl(X, LoopTripCount); + NewXNext->takeName(XNext); + + // Step 3: Replace all references to the recurrence with + // computed recurrence's final value. + + XCurr->replaceAllUsesWith(NewX); + XNext->replaceAllUsesWith(NewXNext); + + // Step 4: Fix the loop back-edge to always exit upon first iteration. + + Builder.SetInsertPoint(LoopHeaderBB->getTerminator()); + Builder.CreateCondBr(Builder.getTrue(), SuccessorBB, LoopHeaderBB); + LoopHeaderBB->getTerminator()->eraseFromParent(); + + // Step 5: Forget the "non-computable" trip-count SCEV associated with the + // loop. The loop would otherwise not be deleted even if it becomes empty. + + SE->forgetLoop(CurLoop); + + // Other passes will take care of actually deleting the loop. + + LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom optimized!\n"); + + ++NumShiftUntilBitTest; + return MadeChange; +} diff --git a/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll b/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll --- a/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll +++ b/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll @@ -15,25 +15,55 @@ ; Most basic example. define i32 @p0_i32(i32 %x, i32 %bit) { -; ALL-LABEL: @p0_i32( -; ALL-NEXT: entry: -; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], [[DBG16:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], [[META9:metadata !.*]], metadata !DIExpression()), [[DBG16]] -; ALL-NEXT: br label [[LOOP:%.*]], [[DBG17:!dbg !.*]] -; ALL: loop: -; ALL-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG18:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META11:metadata !.*]], metadata !DIExpression()), [[DBG18]] -; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], [[DBG19:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META12:metadata !.*]], metadata !DIExpression()), [[DBG19]] -; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG20:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META13:metadata !.*]], metadata !DIExpression()), [[DBG20]] -; ALL-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG21:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META15:metadata !.*]], metadata !DIExpression()), [[DBG21]] -; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], [[DBG22:!dbg !.*]] -; ALL: end: -; ALL-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG18]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_LCSSA]], [[META11]], metadata !DIExpression()), [[DBG18]] -; ALL-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG23:!dbg !.*]] +; LZCNT-LABEL: @p0_i32( +; LZCNT-NEXT: entry: +; LZCNT-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], [[DBG16:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], [[META9:metadata !.*]], metadata !DIExpression()), [[DBG16]] +; LZCNT-NEXT: [[BIT_LOWBITMASK:%.*]] = add i32 [[BITMASK]], -1, [[DBG17:!dbg !.*]] +; LZCNT-NEXT: [[BIT_MASK:%.*]] = or i32 [[BIT_LOWBITMASK]], [[BITMASK]], [[DBG17]] +; LZCNT-NEXT: [[X_MASKED:%.*]] = and i32 [[X:%.*]], [[BIT_MASK]], [[DBG17]] +; LZCNT-NEXT: [[X_MASKED_NUMLEADINGZEROS:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X_MASKED]], i1 true), [[DBG17]] +; LZCNT-NEXT: [[X_MASKED_NUMACTIVEBITS:%.*]] = sub i32 32, [[X_MASKED_NUMLEADINGZEROS]], [[DBG17]] +; LZCNT-NEXT: [[X_MASKED_NUMLEADINGONEPOS:%.*]] = add i32 [[X_MASKED_NUMACTIVEBITS]], -1, [[DBG17]] +; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_NUMLEADINGONEPOS]], [[DBG17]] +; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG17]] +; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG17]] +; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG17]] +; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG18:!dbg !.*]] +; LZCNT: loop: +; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[X_NEXT]], [[LOOP]] ], [[DBG17]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META11:metadata !.*]], metadata !DIExpression()), [[DBG17]] +; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], [[DBG19:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META12:metadata !.*]], metadata !DIExpression()), [[DBG19]] +; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG20:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META13:metadata !.*]], metadata !DIExpression()), [[DBG20]] +; LZCNT-NEXT: [[TMP1:%.*]] = shl i32 [[X_CURR]], 1, [[DBG21:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META15:metadata !.*]], metadata !DIExpression()), [[DBG21]] +; LZCNT-NEXT: br i1 true, label [[END:%.*]], label [[LOOP]], [[DBG22:!dbg !.*]] +; LZCNT: end: +; LZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG17]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_LCSSA]], [[META11]], metadata !DIExpression()), [[DBG17]] +; LZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG23:!dbg !.*]] +; +; NOLZCNT-LABEL: @p0_i32( +; NOLZCNT-NEXT: entry: +; NOLZCNT-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], [[DBG16:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], [[META9:metadata !.*]], metadata !DIExpression()), [[DBG16]] +; NOLZCNT-NEXT: br label [[LOOP:%.*]], [[DBG17:!dbg !.*]] +; NOLZCNT: loop: +; NOLZCNT-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG18:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META11:metadata !.*]], metadata !DIExpression()), [[DBG18]] +; NOLZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], [[DBG19:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META12:metadata !.*]], metadata !DIExpression()), [[DBG19]] +; NOLZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG20:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META13:metadata !.*]], metadata !DIExpression()), [[DBG20]] +; NOLZCNT-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG21:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META15:metadata !.*]], metadata !DIExpression()), [[DBG21]] +; NOLZCNT-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], [[DBG22:!dbg !.*]] +; NOLZCNT: end: +; NOLZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG18]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_LCSSA]], [[META11]], metadata !DIExpression()), [[DBG18]] +; NOLZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG23:!dbg !.*]] ; entry: %bitmask = shl i32 1, %bit @@ -52,25 +82,55 @@ ; Same, but in some other bit width. define i16 @p1_i16(i16 %x, i16 %bit) { -; ALL-LABEL: @p1_i16( -; ALL-NEXT: entry: -; ALL-NEXT: [[BITMASK:%.*]] = shl i16 1, [[BIT:%.*]], [[DBG32:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i16 [[BITMASK]], [[META26:metadata !.*]], metadata !DIExpression()), [[DBG32]] -; ALL-NEXT: br label [[LOOP:%.*]], [[DBG33:!dbg !.*]] -; ALL: loop: -; ALL-NEXT: [[X_CURR:%.*]] = phi i16 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG34:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i16 [[X_CURR]], [[META28:metadata !.*]], metadata !DIExpression()), [[DBG34]] -; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i16 [[X_CURR]], [[BITMASK]], [[DBG35:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i16 [[X_CURR_BITMASKED]], [[META29:metadata !.*]], metadata !DIExpression()), [[DBG35]] -; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i16 [[X_CURR_BITMASKED]], 0, [[DBG36:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META30:metadata !.*]], metadata !DIExpression()), [[DBG36]] -; ALL-NEXT: [[X_NEXT]] = shl i16 [[X_CURR]], 1, [[DBG37:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i16 [[X_NEXT]], [[META31:metadata !.*]], metadata !DIExpression()), [[DBG37]] -; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], [[DBG38:!dbg !.*]] -; ALL: end: -; ALL-NEXT: [[X_CURR_LCSSA:%.*]] = phi i16 [ [[X_CURR]], [[LOOP]] ], [[DBG34]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i16 [[X_CURR_LCSSA]], [[META28]], metadata !DIExpression()), [[DBG34]] -; ALL-NEXT: ret i16 [[X_CURR_LCSSA]], [[DBG39:!dbg !.*]] +; LZCNT-LABEL: @p1_i16( +; LZCNT-NEXT: entry: +; LZCNT-NEXT: [[BITMASK:%.*]] = shl i16 1, [[BIT:%.*]], [[DBG32:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i16 [[BITMASK]], [[META26:metadata !.*]], metadata !DIExpression()), [[DBG32]] +; LZCNT-NEXT: [[BIT_LOWBITMASK:%.*]] = add i16 [[BITMASK]], -1, [[DBG33:!dbg !.*]] +; LZCNT-NEXT: [[BIT_MASK:%.*]] = or i16 [[BIT_LOWBITMASK]], [[BITMASK]], [[DBG33]] +; LZCNT-NEXT: [[X_MASKED:%.*]] = and i16 [[X:%.*]], [[BIT_MASK]], [[DBG33]] +; LZCNT-NEXT: [[X_MASKED_NUMLEADINGZEROS:%.*]] = call i16 @llvm.ctlz.i16(i16 [[X_MASKED]], i1 true), [[DBG33]] +; LZCNT-NEXT: [[X_MASKED_NUMACTIVEBITS:%.*]] = sub i16 16, [[X_MASKED_NUMLEADINGZEROS]], [[DBG33]] +; LZCNT-NEXT: [[X_MASKED_NUMLEADINGONEPOS:%.*]] = add i16 [[X_MASKED_NUMACTIVEBITS]], -1, [[DBG33]] +; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i16 [[BIT]], [[X_MASKED_NUMLEADINGONEPOS]], [[DBG33]] +; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i16 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG33]] +; LZCNT-NEXT: [[X_CURR:%.*]] = shl i16 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG33]] +; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i16 [[X]], [[LOOP_TRIPCOUNT]], [[DBG33]] +; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG34:!dbg !.*]] +; LZCNT: loop: +; LZCNT-NEXT: [[TMP0:%.*]] = phi i16 [ [[X]], [[ENTRY:%.*]] ], [ [[X_NEXT]], [[LOOP]] ], [[DBG33]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i16 [[X_CURR]], [[META28:metadata !.*]], metadata !DIExpression()), [[DBG33]] +; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i16 [[X_CURR]], [[BITMASK]], [[DBG35:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i16 [[X_CURR_BITMASKED]], [[META29:metadata !.*]], metadata !DIExpression()), [[DBG35]] +; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i16 [[X_CURR_BITMASKED]], 0, [[DBG36:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META30:metadata !.*]], metadata !DIExpression()), [[DBG36]] +; LZCNT-NEXT: [[TMP1:%.*]] = shl i16 [[X_CURR]], 1, [[DBG37:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i16 [[X_NEXT]], [[META31:metadata !.*]], metadata !DIExpression()), [[DBG37]] +; LZCNT-NEXT: br i1 true, label [[END:%.*]], label [[LOOP]], [[DBG38:!dbg !.*]] +; LZCNT: end: +; LZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i16 [ [[X_CURR]], [[LOOP]] ], [[DBG33]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i16 [[X_CURR_LCSSA]], [[META28]], metadata !DIExpression()), [[DBG33]] +; LZCNT-NEXT: ret i16 [[X_CURR_LCSSA]], [[DBG39:!dbg !.*]] +; +; NOLZCNT-LABEL: @p1_i16( +; NOLZCNT-NEXT: entry: +; NOLZCNT-NEXT: [[BITMASK:%.*]] = shl i16 1, [[BIT:%.*]], [[DBG32:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i16 [[BITMASK]], [[META26:metadata !.*]], metadata !DIExpression()), [[DBG32]] +; NOLZCNT-NEXT: br label [[LOOP:%.*]], [[DBG33:!dbg !.*]] +; NOLZCNT: loop: +; NOLZCNT-NEXT: [[X_CURR:%.*]] = phi i16 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG34:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i16 [[X_CURR]], [[META28:metadata !.*]], metadata !DIExpression()), [[DBG34]] +; NOLZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i16 [[X_CURR]], [[BITMASK]], [[DBG35:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i16 [[X_CURR_BITMASKED]], [[META29:metadata !.*]], metadata !DIExpression()), [[DBG35]] +; NOLZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i16 [[X_CURR_BITMASKED]], 0, [[DBG36:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META30:metadata !.*]], metadata !DIExpression()), [[DBG36]] +; NOLZCNT-NEXT: [[X_NEXT]] = shl i16 [[X_CURR]], 1, [[DBG37:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i16 [[X_NEXT]], [[META31:metadata !.*]], metadata !DIExpression()), [[DBG37]] +; NOLZCNT-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], [[DBG38:!dbg !.*]] +; NOLZCNT: end: +; NOLZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i16 [ [[X_CURR]], [[LOOP]] ], [[DBG34]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i16 [[X_CURR_LCSSA]], [[META28]], metadata !DIExpression()), [[DBG34]] +; NOLZCNT-NEXT: ret i16 [[X_CURR_LCSSA]], [[DBG39:!dbg !.*]] ; entry: %bitmask = shl i16 1, %bit @@ -155,24 +215,53 @@ ; We don't particularly care whether %x.curr or %x.curr will live-out. define i32 @p4_different_liveout(i32 %x, i32 %bit) { -; ALL-LABEL: @p4_different_liveout( -; ALL-NEXT: entry: -; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], [[DBG73:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], [[META68:metadata !.*]], metadata !DIExpression()), [[DBG73]] -; ALL-NEXT: br label [[LOOP:%.*]], [[DBG74:!dbg !.*]] -; ALL: loop: -; ALL-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG75:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META69:metadata !.*]], metadata !DIExpression()), [[DBG75]] -; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], [[DBG76:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META70:metadata !.*]], metadata !DIExpression()), [[DBG76]] -; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG77:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META71:metadata !.*]], metadata !DIExpression()), [[DBG77]] -; ALL-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG78:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META72:metadata !.*]], metadata !DIExpression()), [[DBG78]] -; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], [[DBG79:!dbg !.*]] -; ALL: end: -; ALL-NEXT: [[X_NEXT_LCSSA:%.*]] = phi i32 [ [[X_NEXT]], [[LOOP]] ], [[DBG78]] -; ALL-NEXT: ret i32 [[X_NEXT_LCSSA]], [[DBG80:!dbg !.*]] +; LZCNT-LABEL: @p4_different_liveout( +; LZCNT-NEXT: entry: +; LZCNT-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], [[DBG73:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], [[META68:metadata !.*]], metadata !DIExpression()), [[DBG73]] +; LZCNT-NEXT: [[BIT_LOWBITMASK:%.*]] = add i32 [[BITMASK]], -1, [[DBG74:!dbg !.*]] +; LZCNT-NEXT: [[BIT_MASK:%.*]] = or i32 [[BIT_LOWBITMASK]], [[BITMASK]], [[DBG74]] +; LZCNT-NEXT: [[X_MASKED:%.*]] = and i32 [[X:%.*]], [[BIT_MASK]], [[DBG74]] +; LZCNT-NEXT: [[X_MASKED_NUMLEADINGZEROS:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X_MASKED]], i1 true), [[DBG74]] +; LZCNT-NEXT: [[X_MASKED_NUMACTIVEBITS:%.*]] = sub i32 32, [[X_MASKED_NUMLEADINGZEROS]], [[DBG74]] +; LZCNT-NEXT: [[X_MASKED_NUMLEADINGONEPOS:%.*]] = add i32 [[X_MASKED_NUMACTIVEBITS]], -1, [[DBG74]] +; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_NUMLEADINGONEPOS]], [[DBG74]] +; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG74]] +; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG74]] +; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG74]] +; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG75:!dbg !.*]] +; LZCNT: loop: +; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[X_NEXT]], [[LOOP]] ], [[DBG74]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META69:metadata !.*]], metadata !DIExpression()), [[DBG74]] +; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], [[DBG76:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META70:metadata !.*]], metadata !DIExpression()), [[DBG76]] +; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG77:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META71:metadata !.*]], metadata !DIExpression()), [[DBG77]] +; LZCNT-NEXT: [[TMP1:%.*]] = shl i32 [[X_CURR]], 1, [[DBG78:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META72:metadata !.*]], metadata !DIExpression()), [[DBG78]] +; LZCNT-NEXT: br i1 true, label [[END:%.*]], label [[LOOP]], [[DBG79:!dbg !.*]] +; LZCNT: end: +; LZCNT-NEXT: [[X_NEXT_LCSSA:%.*]] = phi i32 [ [[X_NEXT]], [[LOOP]] ], [[DBG78]] +; LZCNT-NEXT: ret i32 [[X_NEXT_LCSSA]], [[DBG80:!dbg !.*]] +; +; NOLZCNT-LABEL: @p4_different_liveout( +; NOLZCNT-NEXT: entry: +; NOLZCNT-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], [[DBG73:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], [[META68:metadata !.*]], metadata !DIExpression()), [[DBG73]] +; NOLZCNT-NEXT: br label [[LOOP:%.*]], [[DBG74:!dbg !.*]] +; NOLZCNT: loop: +; NOLZCNT-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG75:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META69:metadata !.*]], metadata !DIExpression()), [[DBG75]] +; NOLZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], [[DBG76:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META70:metadata !.*]], metadata !DIExpression()), [[DBG76]] +; NOLZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG77:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META71:metadata !.*]], metadata !DIExpression()), [[DBG77]] +; NOLZCNT-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG78:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META72:metadata !.*]], metadata !DIExpression()), [[DBG78]] +; NOLZCNT-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], [[DBG79:!dbg !.*]] +; NOLZCNT: end: +; NOLZCNT-NEXT: [[X_NEXT_LCSSA:%.*]] = phi i32 [ [[X_NEXT]], [[LOOP]] ], [[DBG78]] +; NOLZCNT-NEXT: ret i32 [[X_NEXT_LCSSA]], [[DBG80:!dbg !.*]] ; entry: %bitmask = shl i32 1, %bit @@ -190,28 +279,61 @@ } ; Even both of them being live-out is fine. define void @p5_both_liveout(i32 %x, i32 %bit, i32* %p0, i32* %p1) { -; ALL-LABEL: @p5_both_liveout( -; ALL-NEXT: entry: -; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], [[DBG88:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], [[META83:metadata !.*]], metadata !DIExpression()), [[DBG88]] -; ALL-NEXT: br label [[LOOP:%.*]], [[DBG89:!dbg !.*]] -; ALL: loop: -; ALL-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG90:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META84:metadata !.*]], metadata !DIExpression()), [[DBG90]] -; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], [[DBG91:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META85:metadata !.*]], metadata !DIExpression()), [[DBG91]] -; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG92:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META86:metadata !.*]], metadata !DIExpression()), [[DBG92]] -; ALL-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG93:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META87:metadata !.*]], metadata !DIExpression()), [[DBG93]] -; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], [[DBG94:!dbg !.*]] -; ALL: end: -; ALL-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG90]] -; ALL-NEXT: [[X_NEXT_LCSSA:%.*]] = phi i32 [ [[X_NEXT]], [[LOOP]] ], [[DBG93]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_LCSSA]], [[META84]], metadata !DIExpression()), [[DBG90]] -; ALL-NEXT: store i32 [[X_CURR_LCSSA]], i32* [[P0:%.*]], align 4, [[DBG95:!dbg !.*]] -; ALL-NEXT: store i32 [[X_NEXT_LCSSA]], i32* [[P1:%.*]], align 4, [[DBG96:!dbg !.*]] -; ALL-NEXT: ret void, [[DBG97:!dbg !.*]] +; LZCNT-LABEL: @p5_both_liveout( +; LZCNT-NEXT: entry: +; LZCNT-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], [[DBG88:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], [[META83:metadata !.*]], metadata !DIExpression()), [[DBG88]] +; LZCNT-NEXT: [[BIT_LOWBITMASK:%.*]] = add i32 [[BITMASK]], -1, [[DBG89:!dbg !.*]] +; LZCNT-NEXT: [[BIT_MASK:%.*]] = or i32 [[BIT_LOWBITMASK]], [[BITMASK]], [[DBG89]] +; LZCNT-NEXT: [[X_MASKED:%.*]] = and i32 [[X:%.*]], [[BIT_MASK]], [[DBG89]] +; LZCNT-NEXT: [[X_MASKED_NUMLEADINGZEROS:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X_MASKED]], i1 true), [[DBG89]] +; LZCNT-NEXT: [[X_MASKED_NUMACTIVEBITS:%.*]] = sub i32 32, [[X_MASKED_NUMLEADINGZEROS]], [[DBG89]] +; LZCNT-NEXT: [[X_MASKED_NUMLEADINGONEPOS:%.*]] = add i32 [[X_MASKED_NUMACTIVEBITS]], -1, [[DBG89]] +; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_NUMLEADINGONEPOS]], [[DBG89]] +; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG89]] +; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG89]] +; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG89]] +; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG90:!dbg !.*]] +; LZCNT: loop: +; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[X_NEXT]], [[LOOP]] ], [[DBG89]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META84:metadata !.*]], metadata !DIExpression()), [[DBG89]] +; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], [[DBG91:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META85:metadata !.*]], metadata !DIExpression()), [[DBG91]] +; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG92:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META86:metadata !.*]], metadata !DIExpression()), [[DBG92]] +; LZCNT-NEXT: [[TMP1:%.*]] = shl i32 [[X_CURR]], 1, [[DBG93:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META87:metadata !.*]], metadata !DIExpression()), [[DBG93]] +; LZCNT-NEXT: br i1 true, label [[END:%.*]], label [[LOOP]], [[DBG94:!dbg !.*]] +; LZCNT: end: +; LZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG89]] +; LZCNT-NEXT: [[X_NEXT_LCSSA:%.*]] = phi i32 [ [[X_NEXT]], [[LOOP]] ], [[DBG93]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_LCSSA]], [[META84]], metadata !DIExpression()), [[DBG89]] +; LZCNT-NEXT: store i32 [[X_CURR_LCSSA]], i32* [[P0:%.*]], align 4, [[DBG95:!dbg !.*]] +; LZCNT-NEXT: store i32 [[X_NEXT_LCSSA]], i32* [[P1:%.*]], align 4, [[DBG96:!dbg !.*]] +; LZCNT-NEXT: ret void, [[DBG97:!dbg !.*]] +; +; NOLZCNT-LABEL: @p5_both_liveout( +; NOLZCNT-NEXT: entry: +; NOLZCNT-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], [[DBG88:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], [[META83:metadata !.*]], metadata !DIExpression()), [[DBG88]] +; NOLZCNT-NEXT: br label [[LOOP:%.*]], [[DBG89:!dbg !.*]] +; NOLZCNT: loop: +; NOLZCNT-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG90:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META84:metadata !.*]], metadata !DIExpression()), [[DBG90]] +; NOLZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], [[DBG91:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META85:metadata !.*]], metadata !DIExpression()), [[DBG91]] +; NOLZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG92:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META86:metadata !.*]], metadata !DIExpression()), [[DBG92]] +; NOLZCNT-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG93:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META87:metadata !.*]], metadata !DIExpression()), [[DBG93]] +; NOLZCNT-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], [[DBG94:!dbg !.*]] +; NOLZCNT: end: +; NOLZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG90]] +; NOLZCNT-NEXT: [[X_NEXT_LCSSA:%.*]] = phi i32 [ [[X_NEXT]], [[LOOP]] ], [[DBG93]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_LCSSA]], [[META84]], metadata !DIExpression()), [[DBG90]] +; NOLZCNT-NEXT: store i32 [[X_CURR_LCSSA]], i32* [[P0:%.*]], align 4, [[DBG95:!dbg !.*]] +; NOLZCNT-NEXT: store i32 [[X_NEXT_LCSSA]], i32* [[P1:%.*]], align 4, [[DBG96:!dbg !.*]] +; NOLZCNT-NEXT: ret void, [[DBG97:!dbg !.*]] ; entry: %bitmask = shl i32 1, %bit @@ -295,25 +417,55 @@ ; Check that loop backedge's cmp-br order is correctly handled define i32 @p8(i32 %x, i32 %bit) { -; ALL-LABEL: @p8( -; ALL-NEXT: entry: -; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], [[DBG129:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], [[META124:metadata !.*]], metadata !DIExpression()), [[DBG129]] -; ALL-NEXT: br label [[LOOP:%.*]], [[DBG130:!dbg !.*]] -; ALL: loop: -; ALL-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG131:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META125:metadata !.*]], metadata !DIExpression()), [[DBG131]] -; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], [[DBG132:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META126:metadata !.*]], metadata !DIExpression()), [[DBG132]] -; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp ne i32 [[X_CURR_BITMASKED]], 0, [[DBG133:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META127:metadata !.*]], metadata !DIExpression()), [[DBG133]] -; ALL-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG134:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META128:metadata !.*]], metadata !DIExpression()), [[DBG134]] -; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[END:%.*]], label [[LOOP]], [[DBG135:!dbg !.*]] -; ALL: end: -; ALL-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG131]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_LCSSA]], [[META125]], metadata !DIExpression()), [[DBG131]] -; ALL-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG136:!dbg !.*]] +; LZCNT-LABEL: @p8( +; LZCNT-NEXT: entry: +; LZCNT-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], [[DBG129:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], [[META124:metadata !.*]], metadata !DIExpression()), [[DBG129]] +; LZCNT-NEXT: [[BIT_LOWBITMASK:%.*]] = add i32 [[BITMASK]], -1, [[DBG130:!dbg !.*]] +; LZCNT-NEXT: [[BIT_MASK:%.*]] = or i32 [[BIT_LOWBITMASK]], [[BITMASK]], [[DBG130]] +; LZCNT-NEXT: [[X_MASKED:%.*]] = and i32 [[X:%.*]], [[BIT_MASK]], [[DBG130]] +; LZCNT-NEXT: [[X_MASKED_NUMLEADINGZEROS:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X_MASKED]], i1 true), [[DBG130]] +; LZCNT-NEXT: [[X_MASKED_NUMACTIVEBITS:%.*]] = sub i32 32, [[X_MASKED_NUMLEADINGZEROS]], [[DBG130]] +; LZCNT-NEXT: [[X_MASKED_NUMLEADINGONEPOS:%.*]] = add i32 [[X_MASKED_NUMACTIVEBITS]], -1, [[DBG130]] +; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_NUMLEADINGONEPOS]], [[DBG130]] +; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG130]] +; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG130]] +; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG130]] +; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG131:!dbg !.*]] +; LZCNT: loop: +; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[X_NEXT]], [[LOOP]] ], [[DBG130]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META125:metadata !.*]], metadata !DIExpression()), [[DBG130]] +; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], [[DBG132:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META126:metadata !.*]], metadata !DIExpression()), [[DBG132]] +; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp ne i32 [[X_CURR_BITMASKED]], 0, [[DBG133:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META127:metadata !.*]], metadata !DIExpression()), [[DBG133]] +; LZCNT-NEXT: [[TMP1:%.*]] = shl i32 [[X_CURR]], 1, [[DBG134:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META128:metadata !.*]], metadata !DIExpression()), [[DBG134]] +; LZCNT-NEXT: br i1 true, label [[END:%.*]], label [[LOOP]], [[DBG135:!dbg !.*]] +; LZCNT: end: +; LZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG130]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_LCSSA]], [[META125]], metadata !DIExpression()), [[DBG130]] +; LZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG136:!dbg !.*]] +; +; NOLZCNT-LABEL: @p8( +; NOLZCNT-NEXT: entry: +; NOLZCNT-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], [[DBG129:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], [[META124:metadata !.*]], metadata !DIExpression()), [[DBG129]] +; NOLZCNT-NEXT: br label [[LOOP:%.*]], [[DBG130:!dbg !.*]] +; NOLZCNT: loop: +; NOLZCNT-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG131:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META125:metadata !.*]], metadata !DIExpression()), [[DBG131]] +; NOLZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], [[DBG132:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META126:metadata !.*]], metadata !DIExpression()), [[DBG132]] +; NOLZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp ne i32 [[X_CURR_BITMASKED]], 0, [[DBG133:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META127:metadata !.*]], metadata !DIExpression()), [[DBG133]] +; NOLZCNT-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG134:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META128:metadata !.*]], metadata !DIExpression()), [[DBG134]] +; NOLZCNT-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[END:%.*]], label [[LOOP]], [[DBG135:!dbg !.*]] +; NOLZCNT: end: +; NOLZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG131]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_LCSSA]], [[META125]], metadata !DIExpression()), [[DBG131]] +; NOLZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG136:!dbg !.*]] ; entry: %bitmask = shl i32 1, %bit @@ -332,25 +484,55 @@ ; `and` is commutative, so ensure that order is irrelevant define i32 @p9(i32 %x, i32 %bit) { -; ALL-LABEL: @p9( -; ALL-NEXT: entry: -; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], [[DBG144:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], [[META139:metadata !.*]], metadata !DIExpression()), [[DBG144]] -; ALL-NEXT: br label [[LOOP:%.*]], [[DBG145:!dbg !.*]] -; ALL: loop: -; ALL-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG146:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META140:metadata !.*]], metadata !DIExpression()), [[DBG146]] -; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[BITMASK]], [[X_CURR]], [[DBG147:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META141:metadata !.*]], metadata !DIExpression()), [[DBG147]] -; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG148:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META142:metadata !.*]], metadata !DIExpression()), [[DBG148]] -; ALL-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG149:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META143:metadata !.*]], metadata !DIExpression()), [[DBG149]] -; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], [[DBG150:!dbg !.*]] -; ALL: end: -; ALL-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG146]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_LCSSA]], [[META140]], metadata !DIExpression()), [[DBG146]] -; ALL-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG151:!dbg !.*]] +; LZCNT-LABEL: @p9( +; LZCNT-NEXT: entry: +; LZCNT-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], [[DBG144:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], [[META139:metadata !.*]], metadata !DIExpression()), [[DBG144]] +; LZCNT-NEXT: [[BIT_LOWBITMASK:%.*]] = add i32 [[BITMASK]], -1, [[DBG145:!dbg !.*]] +; LZCNT-NEXT: [[BIT_MASK:%.*]] = or i32 [[BIT_LOWBITMASK]], [[BITMASK]], [[DBG145]] +; LZCNT-NEXT: [[X_MASKED:%.*]] = and i32 [[X:%.*]], [[BIT_MASK]], [[DBG145]] +; LZCNT-NEXT: [[X_MASKED_NUMLEADINGZEROS:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X_MASKED]], i1 true), [[DBG145]] +; LZCNT-NEXT: [[X_MASKED_NUMACTIVEBITS:%.*]] = sub i32 32, [[X_MASKED_NUMLEADINGZEROS]], [[DBG145]] +; LZCNT-NEXT: [[X_MASKED_NUMLEADINGONEPOS:%.*]] = add i32 [[X_MASKED_NUMACTIVEBITS]], -1, [[DBG145]] +; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_NUMLEADINGONEPOS]], [[DBG145]] +; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG145]] +; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG145]] +; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG145]] +; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG146:!dbg !.*]] +; LZCNT: loop: +; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[X_NEXT]], [[LOOP]] ], [[DBG145]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META140:metadata !.*]], metadata !DIExpression()), [[DBG145]] +; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[BITMASK]], [[X_CURR]], [[DBG147:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META141:metadata !.*]], metadata !DIExpression()), [[DBG147]] +; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG148:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META142:metadata !.*]], metadata !DIExpression()), [[DBG148]] +; LZCNT-NEXT: [[TMP1:%.*]] = shl i32 [[X_CURR]], 1, [[DBG149:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META143:metadata !.*]], metadata !DIExpression()), [[DBG149]] +; LZCNT-NEXT: br i1 true, label [[END:%.*]], label [[LOOP]], [[DBG150:!dbg !.*]] +; LZCNT: end: +; LZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG145]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_LCSSA]], [[META140]], metadata !DIExpression()), [[DBG145]] +; LZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG151:!dbg !.*]] +; +; NOLZCNT-LABEL: @p9( +; NOLZCNT-NEXT: entry: +; NOLZCNT-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], [[DBG144:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], [[META139:metadata !.*]], metadata !DIExpression()), [[DBG144]] +; NOLZCNT-NEXT: br label [[LOOP:%.*]], [[DBG145:!dbg !.*]] +; NOLZCNT: loop: +; NOLZCNT-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG146:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META140:metadata !.*]], metadata !DIExpression()), [[DBG146]] +; NOLZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[BITMASK]], [[X_CURR]], [[DBG147:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META141:metadata !.*]], metadata !DIExpression()), [[DBG147]] +; NOLZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG148:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META142:metadata !.*]], metadata !DIExpression()), [[DBG148]] +; NOLZCNT-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG149:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META143:metadata !.*]], metadata !DIExpression()), [[DBG149]] +; NOLZCNT-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], [[DBG150:!dbg !.*]] +; NOLZCNT: end: +; NOLZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG146]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_LCSSA]], [[META140]], metadata !DIExpression()), [[DBG146]] +; NOLZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG151:!dbg !.*]] ; entry: %bitmask = shl i32 1, %bit @@ -370,25 +552,55 @@ ; PHI node does not have any particular order for it's incomings, ; but check that the other order still works. define i32 @p10(i32 %x, i32 %bit) { -; ALL-LABEL: @p10( -; ALL-NEXT: entry: -; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], [[DBG159:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], [[META154:metadata !.*]], metadata !DIExpression()), [[DBG159]] -; ALL-NEXT: br label [[LOOP:%.*]], [[DBG160:!dbg !.*]] -; ALL: loop: -; ALL-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X_NEXT:%.*]], [[LOOP]] ], [ [[X:%.*]], [[ENTRY:%.*]] ], [[DBG161:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META155:metadata !.*]], metadata !DIExpression()), [[DBG161]] -; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], [[DBG162:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META156:metadata !.*]], metadata !DIExpression()), [[DBG162]] -; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG163:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META157:metadata !.*]], metadata !DIExpression()), [[DBG163]] -; ALL-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG164:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META158:metadata !.*]], metadata !DIExpression()), [[DBG164]] -; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], [[DBG165:!dbg !.*]] -; ALL: end: -; ALL-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG161]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_LCSSA]], [[META155]], metadata !DIExpression()), [[DBG161]] -; ALL-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG166:!dbg !.*]] +; LZCNT-LABEL: @p10( +; LZCNT-NEXT: entry: +; LZCNT-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], [[DBG159:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], [[META154:metadata !.*]], metadata !DIExpression()), [[DBG159]] +; LZCNT-NEXT: [[BIT_LOWBITMASK:%.*]] = add i32 [[BITMASK]], -1, [[DBG160:!dbg !.*]] +; LZCNT-NEXT: [[BIT_MASK:%.*]] = or i32 [[BIT_LOWBITMASK]], [[BITMASK]], [[DBG160]] +; LZCNT-NEXT: [[X_MASKED:%.*]] = and i32 [[X:%.*]], [[BIT_MASK]], [[DBG160]] +; LZCNT-NEXT: [[X_MASKED_NUMLEADINGZEROS:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X_MASKED]], i1 true), [[DBG160]] +; LZCNT-NEXT: [[X_MASKED_NUMACTIVEBITS:%.*]] = sub i32 32, [[X_MASKED_NUMLEADINGZEROS]], [[DBG160]] +; LZCNT-NEXT: [[X_MASKED_NUMLEADINGONEPOS:%.*]] = add i32 [[X_MASKED_NUMACTIVEBITS]], -1, [[DBG160]] +; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_NUMLEADINGONEPOS]], [[DBG160]] +; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG160]] +; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG160]] +; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG160]] +; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG161:!dbg !.*]] +; LZCNT: loop: +; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X_NEXT]], [[LOOP]] ], [ [[X]], [[ENTRY:%.*]] ], [[DBG160]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META155:metadata !.*]], metadata !DIExpression()), [[DBG160]] +; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], [[DBG162:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META156:metadata !.*]], metadata !DIExpression()), [[DBG162]] +; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG163:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META157:metadata !.*]], metadata !DIExpression()), [[DBG163]] +; LZCNT-NEXT: [[TMP1:%.*]] = shl i32 [[X_CURR]], 1, [[DBG164:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META158:metadata !.*]], metadata !DIExpression()), [[DBG164]] +; LZCNT-NEXT: br i1 true, label [[END:%.*]], label [[LOOP]], [[DBG165:!dbg !.*]] +; LZCNT: end: +; LZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG160]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_LCSSA]], [[META155]], metadata !DIExpression()), [[DBG160]] +; LZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG166:!dbg !.*]] +; +; NOLZCNT-LABEL: @p10( +; NOLZCNT-NEXT: entry: +; NOLZCNT-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], [[DBG159:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], [[META154:metadata !.*]], metadata !DIExpression()), [[DBG159]] +; NOLZCNT-NEXT: br label [[LOOP:%.*]], [[DBG160:!dbg !.*]] +; NOLZCNT: loop: +; NOLZCNT-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X_NEXT:%.*]], [[LOOP]] ], [ [[X:%.*]], [[ENTRY:%.*]] ], [[DBG161:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META155:metadata !.*]], metadata !DIExpression()), [[DBG161]] +; NOLZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], [[DBG162:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META156:metadata !.*]], metadata !DIExpression()), [[DBG162]] +; NOLZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG163:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META157:metadata !.*]], metadata !DIExpression()), [[DBG163]] +; NOLZCNT-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG164:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META158:metadata !.*]], metadata !DIExpression()), [[DBG164]] +; NOLZCNT-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], [[DBG165:!dbg !.*]] +; NOLZCNT: end: +; NOLZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG161]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_LCSSA]], [[META155]], metadata !DIExpression()), [[DBG161]] +; NOLZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG166:!dbg !.*]] ; entry: %bitmask = shl i32 1, %bit