diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1953,6 +1953,7 @@ /// %x.curr.bitmasked = and i32 %x.curr, %bitmask /// %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0 /// %x.next = shl i32 %x.curr, 1 +/// <...> /// br i1 %x.curr.isbitunset, label %loop, label %end /// /// end: @@ -1962,8 +1963,7 @@ /// \endcode static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX, Value *&BitMask, Value *&BitPos, - Value *&CurrX, Value *&NextX, - size_t &CanonicalHeaderSize) { + Value *&CurrX, Value *&NextX) { LLVM_DEBUG(dbgs() << DEBUG_TYPE " Performing shift-until-bittest idiom detection.\n"); @@ -1994,7 +1994,6 @@ // Step 2: Check if the backedge's condition is in desirable form. auto MatchVariableBitMask = [&]() { - CanonicalHeaderSize = 5; return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) && match(CmpLHS, m_c_And(m_Value(CurrX), @@ -2004,15 +2003,12 @@ CurLoop)))); }; auto MatchConstantBitMask = [&]() { - CanonicalHeaderSize = 5; return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) && match(CmpLHS, m_And(m_Value(CurrX), m_CombineAnd(m_Value(BitMask), m_Power2()))) && (BitPos = ConstantExpr::getExactLogBase2(cast(BitMask))); }; auto MatchDecomposableConstantBitMask = [&]() { - CanonicalHeaderSize = 4; - APInt Mask; return llvm::decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, CurrX, Mask) && ICmpInst::isEquality(Pred) && Mask.isPowerOf2() && @@ -2076,6 +2072,7 @@ /// %x.curr.bitmasked = and i32 %x.curr, %bitmask /// %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0 /// %x.next = shl i32 %x.curr, 1 +/// <...> /// br i1 %x.curr.isbitunset, label %loop, label %end /// /// end: @@ -2099,7 +2096,14 @@ /// %tripcount = add i32 %backedgetakencount, 1 /// %x.curr = shl i32 %x, %backedgetakencount /// %x.next = shl i32 %x, %tripcount -/// br label %end +/// br label %loop +/// +/// loop: +/// %loop.iv = phi i32 [ 0, %entry ], [ %loop.iv.next, %loop ] +/// %loop.iv.next = add nuw i32 %loop.iv, 1 +/// %loop.ivcheck = icmp eq i32 %loop.iv.next, %tripcount +/// <...> +/// br i1 %loop.ivcheck, label %end, label %loop /// /// end: /// %x.curr.res = phi i32 [ %x.curr, %loop ] <...> @@ -2110,9 +2114,8 @@ bool MadeChange = false; Value *X, *BitMask, *BitPos, *XCurr, *XNext; - size_t CanonicalHeaderSize; - if (!detectShiftUntilBitTestIdiom(CurLoop, X, BitMask, BitPos, XCurr, XNext, - CanonicalHeaderSize)) { + if (!detectShiftUntilBitTestIdiom(CurLoop, X, BitMask, BitPos, XCurr, + XNext)) { LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom detection failed.\n"); return MadeChange; @@ -2129,25 +2132,6 @@ BasicBlock *SuccessorBB = CurLoop->getExitBlock(); assert(LoopPreheaderBB && "There is only a single successor."); - // The loop must not have any other instructions other than the idiom itself. - // FIXME: we could just rewrite the loop with countable trip count. - size_t HeaderSize = LoopHeaderBB->sizeWithoutDebug(); - assert(HeaderSize >= CanonicalHeaderSize); - if (HeaderSize > CanonicalHeaderSize) { - LLVM_DEBUG(dbgs() << DEBUG_TYPE " Won't be able to delete loop!\n"); - return MadeChange; - } - - // Only the recurrence itself is allowed to have uses outside of the loop. - if (any_of(SuccessorBB->phis(), [&](PHINode &PN) { - Value *IV = PN.getIncomingValueForBlock(LoopHeaderBB); - return IV != XCurr && IV != XNext && !CurLoop->isLoopInvariant(IV); - })) { - LLVM_DEBUG(dbgs() << DEBUG_TYPE " In-loop value is live-out!\n"); - return MadeChange; - } - // FIXME: we *could* allow this. - IRBuilder<> Builder(LoopPreheaderBB->getTerminator()); Builder.SetCurrentDebugLocation(cast(XCurr)->getDebugLoc()); @@ -2157,7 +2141,9 @@ TargetTransformInfo::TargetCostKind CostKind = TargetTransformInfo::TCK_SizeAndLatency; - // Also, the intrinsic and shift we'll use must be cheap. + // The rewrite is considered to be unprofitable iff and only iff the + // intrinsic/shift we'll use are not cheap. Note that we are okay with *just* + // making the loop countable, even if nothing else changes. IntrinsicCostAttributes Attrs( IntrID, Ty, {UndefValue::get(Ty), /*is_zero_undef=*/Builder.getTrue()}); int Cost = TTI->getIntrinsicInstrCost(Attrs, CostKind); @@ -2209,24 +2195,40 @@ Value *NewXNext = Builder.CreateShl(X, LoopTripCount); NewXNext->takeName(XNext); - // Step 3: Replace all references to the recurrence with - // computed recurrence's final value. + // Step 3: Adjust the successor basic block to recieve the computed + // recurrence's final value instead of the recurrence itself. - XCurr->replaceAllUsesWith(NewX); - XNext->replaceAllUsesWith(NewXNext); + XCurr->replaceUsesOutsideBlock(NewX, LoopHeaderBB); + XNext->replaceUsesOutsideBlock(NewXNext, LoopHeaderBB); - // Step 4: Fix the loop back-edge to always exit upon first iteration. + // Step 4: Rewrite the loop into a countable form, with canonical IV. + // The new canonical induction variable. + Builder.SetInsertPoint(&LoopHeaderBB->front()); + auto *IV = Builder.CreatePHI(Ty, 2, CurLoop->getName() + ".iv"); + + // The induction itself. + // Note that while NUW is always safe, while NSW is only for bitwidths != 2. Builder.SetInsertPoint(LoopHeaderBB->getTerminator()); - Builder.CreateCondBr(Builder.getTrue(), SuccessorBB, LoopHeaderBB); + auto *IVNext = Builder.CreateNUWAdd(IV, ConstantInt::get(Ty, 1), + IV->getName() + ".next"); + + // The loop trip count check. + auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount, + CurLoop->getName() + ".ivcheck"); + Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB); LoopHeaderBB->getTerminator()->eraseFromParent(); + // Populate the IV PHI. + IV->addIncoming(ConstantInt::get(Ty, 0), LoopPreheaderBB); + IV->addIncoming(IVNext, LoopHeaderBB); + // Step 5: Forget the "non-computable" trip-count SCEV associated with the // loop. The loop would otherwise not be deleted even if it becomes empty. SE->forgetLoop(CurLoop); - // Other passes will take care of actually deleting the loop. + // Other passes will take care of actually deleting the loop if possible. LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom optimized!\n"); diff --git a/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll b/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll --- a/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll +++ b/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll @@ -31,15 +31,18 @@ ; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG17]] ; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG18:!dbg !.*]] ; LZCNT: loop: -; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[X_NEXT]], [[LOOP]] ], [[DBG17]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META11:metadata !.*]], metadata !DIExpression()), [[DBG17]] -; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], [[DBG19:!dbg !.*]] +; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG17]] +; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], [[DBG17]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], [[META11:metadata !.*]], metadata !DIExpression()), [[DBG17]] +; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[TMP0]], [[BITMASK]], [[DBG19:!dbg !.*]] ; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META12:metadata !.*]], metadata !DIExpression()), [[DBG19]] ; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG20:!dbg !.*]] ; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META13:metadata !.*]], metadata !DIExpression()), [[DBG20]] -; LZCNT-NEXT: [[TMP1:%.*]] = shl i32 [[X_CURR]], 1, [[DBG21:!dbg !.*]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META15:metadata !.*]], metadata !DIExpression()), [[DBG21]] -; LZCNT-NEXT: br i1 true, label [[END:%.*]], label [[LOOP]], [[DBG22:!dbg !.*]] +; LZCNT-NEXT: [[TMP1]] = shl i32 [[TMP0]], 1, [[DBG21:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], [[META15:metadata !.*]], metadata !DIExpression()), [[DBG21]] +; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw i32 [[LOOP_IV]], 1, [[DBG22:!dbg !.*]] +; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], [[DBG22]] +; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], [[DBG22]] ; LZCNT: end: ; LZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG17]] ; LZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG23:!dbg !.*]] @@ -96,15 +99,18 @@ ; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i16 [[X]], [[LOOP_TRIPCOUNT]], [[DBG33]] ; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG34:!dbg !.*]] ; LZCNT: loop: -; LZCNT-NEXT: [[TMP0:%.*]] = phi i16 [ [[X]], [[ENTRY:%.*]] ], [ [[X_NEXT]], [[LOOP]] ], [[DBG33]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i16 [[X_CURR]], [[META28:metadata !.*]], metadata !DIExpression()), [[DBG33]] -; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i16 [[X_CURR]], [[BITMASK]], [[DBG35:!dbg !.*]] +; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG33]] +; LZCNT-NEXT: [[TMP0:%.*]] = phi i16 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], [[DBG33]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i16 [[TMP0]], [[META28:metadata !.*]], metadata !DIExpression()), [[DBG33]] +; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i16 [[TMP0]], [[BITMASK]], [[DBG35:!dbg !.*]] ; LZCNT-NEXT: call void @llvm.dbg.value(metadata i16 [[X_CURR_BITMASKED]], [[META29:metadata !.*]], metadata !DIExpression()), [[DBG35]] ; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i16 [[X_CURR_BITMASKED]], 0, [[DBG36:!dbg !.*]] ; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META30:metadata !.*]], metadata !DIExpression()), [[DBG36]] -; LZCNT-NEXT: [[TMP1:%.*]] = shl i16 [[X_CURR]], 1, [[DBG37:!dbg !.*]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i16 [[X_NEXT]], [[META31:metadata !.*]], metadata !DIExpression()), [[DBG37]] -; LZCNT-NEXT: br i1 true, label [[END:%.*]], label [[LOOP]], [[DBG38:!dbg !.*]] +; LZCNT-NEXT: [[TMP1]] = shl i16 [[TMP0]], 1, [[DBG37:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i16 [[TMP1]], [[META31:metadata !.*]], metadata !DIExpression()), [[DBG37]] +; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw i16 [[LOOP_IV]], 1, [[DBG38:!dbg !.*]] +; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i16 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], [[DBG38]] +; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], [[DBG38]] ; LZCNT: end: ; LZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i16 [ [[X_CURR]], [[LOOP]] ], [[DBG33]] ; LZCNT-NEXT: ret i16 [[X_CURR_LCSSA]], [[DBG39:!dbg !.*]] @@ -156,15 +162,18 @@ ; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG46]] ; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG47:!dbg !.*]] ; LZCNT: loop: -; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[X_NEXT]], [[LOOP]] ], [[DBG46]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META42:metadata !.*]], metadata !DIExpression()), [[DBG46]] -; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], 16777216, [[DBG48:!dbg !.*]] +; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG46]] +; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], [[DBG46]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], [[META42:metadata !.*]], metadata !DIExpression()), [[DBG46]] +; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[TMP0]], 16777216, [[DBG48:!dbg !.*]] ; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META43:metadata !.*]], metadata !DIExpression()), [[DBG48]] ; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG49:!dbg !.*]] ; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META44:metadata !.*]], metadata !DIExpression()), [[DBG49]] -; LZCNT-NEXT: [[TMP1:%.*]] = shl i32 [[X_CURR]], 1, [[DBG50:!dbg !.*]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META45:metadata !.*]], metadata !DIExpression()), [[DBG50]] -; LZCNT-NEXT: br i1 true, label [[END:%.*]], label [[LOOP]], [[DBG51:!dbg !.*]] +; LZCNT-NEXT: [[TMP1]] = shl i32 [[TMP0]], 1, [[DBG50:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], [[META45:metadata !.*]], metadata !DIExpression()), [[DBG50]] +; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw i32 [[LOOP_IV]], 1, [[DBG51:!dbg !.*]] +; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], [[DBG51]] +; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], [[DBG51]] ; LZCNT: end: ; LZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG46]] ; LZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG52:!dbg !.*]] @@ -213,15 +222,18 @@ ; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG59]] ; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG60:!dbg !.*]] ; LZCNT: loop: -; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[X_NEXT]], [[LOOP]] ], [[DBG59]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META55:metadata !.*]], metadata !DIExpression()), [[DBG59]] -; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], 32768, [[DBG61:!dbg !.*]] +; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG59]] +; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], [[DBG59]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], [[META55:metadata !.*]], metadata !DIExpression()), [[DBG59]] +; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[TMP0]], 32768, [[DBG61:!dbg !.*]] ; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META56:metadata !.*]], metadata !DIExpression()), [[DBG61]] ; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG62:!dbg !.*]] ; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META57:metadata !.*]], metadata !DIExpression()), [[DBG62]] -; LZCNT-NEXT: [[TMP1:%.*]] = shl i32 [[X_CURR]], 1, [[DBG63:!dbg !.*]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META58:metadata !.*]], metadata !DIExpression()), [[DBG63]] -; LZCNT-NEXT: br i1 true, label [[END:%.*]], label [[LOOP]], [[DBG64:!dbg !.*]] +; LZCNT-NEXT: [[TMP1]] = shl i32 [[TMP0]], 1, [[DBG63:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], [[META58:metadata !.*]], metadata !DIExpression()), [[DBG63]] +; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw i32 [[LOOP_IV]], 1, [[DBG64:!dbg !.*]] +; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], [[DBG64]] +; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], [[DBG64]] ; LZCNT: end: ; LZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG59]] ; LZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG65:!dbg !.*]] @@ -275,15 +287,18 @@ ; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG74]] ; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG75:!dbg !.*]] ; LZCNT: loop: -; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[X_NEXT]], [[LOOP]] ], [[DBG74]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META69:metadata !.*]], metadata !DIExpression()), [[DBG74]] -; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], [[DBG76:!dbg !.*]] +; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG74]] +; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], [[DBG74]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], [[META69:metadata !.*]], metadata !DIExpression()), [[DBG74]] +; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[TMP0]], [[BITMASK]], [[DBG76:!dbg !.*]] ; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META70:metadata !.*]], metadata !DIExpression()), [[DBG76]] ; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG77:!dbg !.*]] ; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META71:metadata !.*]], metadata !DIExpression()), [[DBG77]] -; LZCNT-NEXT: [[TMP1:%.*]] = shl i32 [[X_CURR]], 1, [[DBG78:!dbg !.*]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META72:metadata !.*]], metadata !DIExpression()), [[DBG78]] -; LZCNT-NEXT: br i1 true, label [[END:%.*]], label [[LOOP]], [[DBG79:!dbg !.*]] +; LZCNT-NEXT: [[TMP1]] = shl i32 [[TMP0]], 1, [[DBG78:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], [[META72:metadata !.*]], metadata !DIExpression()), [[DBG78]] +; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw i32 [[LOOP_IV]], 1, [[DBG79:!dbg !.*]] +; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], [[DBG79]] +; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], [[DBG79]] ; LZCNT: end: ; LZCNT-NEXT: [[X_NEXT_LCSSA:%.*]] = phi i32 [ [[X_NEXT]], [[LOOP]] ], [[DBG78]] ; LZCNT-NEXT: ret i32 [[X_NEXT_LCSSA]], [[DBG80:!dbg !.*]] @@ -339,15 +354,18 @@ ; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG89]] ; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG90:!dbg !.*]] ; LZCNT: loop: -; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[X_NEXT]], [[LOOP]] ], [[DBG89]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META84:metadata !.*]], metadata !DIExpression()), [[DBG89]] -; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], [[DBG91:!dbg !.*]] +; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG89]] +; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], [[DBG89]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], [[META84:metadata !.*]], metadata !DIExpression()), [[DBG89]] +; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[TMP0]], [[BITMASK]], [[DBG91:!dbg !.*]] ; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META85:metadata !.*]], metadata !DIExpression()), [[DBG91]] ; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG92:!dbg !.*]] ; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META86:metadata !.*]], metadata !DIExpression()), [[DBG92]] -; LZCNT-NEXT: [[TMP1:%.*]] = shl i32 [[X_CURR]], 1, [[DBG93:!dbg !.*]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META87:metadata !.*]], metadata !DIExpression()), [[DBG93]] -; LZCNT-NEXT: br i1 true, label [[END:%.*]], label [[LOOP]], [[DBG94:!dbg !.*]] +; LZCNT-NEXT: [[TMP1]] = shl i32 [[TMP0]], 1, [[DBG93:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], [[META87:metadata !.*]], metadata !DIExpression()), [[DBG93]] +; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw i32 [[LOOP_IV]], 1, [[DBG94:!dbg !.*]] +; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], [[DBG94]] +; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], [[DBG94]] ; LZCNT: end: ; LZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG89]] ; LZCNT-NEXT: [[X_NEXT_LCSSA:%.*]] = phi i32 [ [[X_NEXT]], [[LOOP]] ], [[DBG93]] @@ -406,15 +424,18 @@ ; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG104]] ; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG105:!dbg !.*]] ; LZCNT: loop: -; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[X_NEXT]], [[LOOP]] ], [[DBG104]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META100:metadata !.*]], metadata !DIExpression()), [[DBG104]] -; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], -2147483648, [[DBG106:!dbg !.*]] +; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG104]] +; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], [[DBG104]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], [[META100:metadata !.*]], metadata !DIExpression()), [[DBG104]] +; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[TMP0]], -2147483648, [[DBG106:!dbg !.*]] ; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META101:metadata !.*]], metadata !DIExpression()), [[DBG106]] ; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG107:!dbg !.*]] ; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META102:metadata !.*]], metadata !DIExpression()), [[DBG107]] -; LZCNT-NEXT: [[TMP1:%.*]] = shl i32 [[X_CURR]], 1, [[DBG108:!dbg !.*]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META103:metadata !.*]], metadata !DIExpression()), [[DBG108]] -; LZCNT-NEXT: br i1 true, label [[END:%.*]], label [[LOOP]], [[DBG109:!dbg !.*]] +; LZCNT-NEXT: [[TMP1]] = shl i32 [[TMP0]], 1, [[DBG108:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], [[META103:metadata !.*]], metadata !DIExpression()), [[DBG108]] +; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw i32 [[LOOP_IV]], 1, [[DBG109:!dbg !.*]] +; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], [[DBG109]] +; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], [[DBG109]] ; LZCNT: end: ; LZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG104]] ; LZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG110:!dbg !.*]] @@ -462,13 +483,16 @@ ; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG116]] ; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG117:!dbg !.*]] ; LZCNT: loop: -; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[X_NEXT]], [[LOOP]] ], [[DBG116]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META113:metadata !.*]], metadata !DIExpression()), [[DBG116]] -; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp sgt i32 [[X_CURR]], -1, [[DBG118:!dbg !.*]] +; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG116]] +; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], [[DBG116]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], [[META113:metadata !.*]], metadata !DIExpression()), [[DBG116]] +; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp sgt i32 [[TMP0]], -1, [[DBG118:!dbg !.*]] ; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META114:metadata !.*]], metadata !DIExpression()), [[DBG118]] -; LZCNT-NEXT: [[TMP1:%.*]] = shl i32 [[X_CURR]], 1, [[DBG119:!dbg !.*]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META115:metadata !.*]], metadata !DIExpression()), [[DBG119]] -; LZCNT-NEXT: br i1 true, label [[END:%.*]], label [[LOOP]], [[DBG120:!dbg !.*]] +; LZCNT-NEXT: [[TMP1]] = shl i32 [[TMP0]], 1, [[DBG119:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], [[META115:metadata !.*]], metadata !DIExpression()), [[DBG119]] +; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw i32 [[LOOP_IV]], 1, [[DBG120:!dbg !.*]] +; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], [[DBG120]] +; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], [[DBG120]] ; LZCNT: end: ; LZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG116]] ; LZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG121:!dbg !.*]] @@ -519,15 +543,18 @@ ; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG130]] ; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG131:!dbg !.*]] ; LZCNT: loop: -; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[X_NEXT]], [[LOOP]] ], [[DBG130]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META125:metadata !.*]], metadata !DIExpression()), [[DBG130]] -; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], [[DBG132:!dbg !.*]] +; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG130]] +; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], [[DBG130]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], [[META125:metadata !.*]], metadata !DIExpression()), [[DBG130]] +; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[TMP0]], [[BITMASK]], [[DBG132:!dbg !.*]] ; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META126:metadata !.*]], metadata !DIExpression()), [[DBG132]] ; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp ne i32 [[X_CURR_BITMASKED]], 0, [[DBG133:!dbg !.*]] ; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META127:metadata !.*]], metadata !DIExpression()), [[DBG133]] -; LZCNT-NEXT: [[TMP1:%.*]] = shl i32 [[X_CURR]], 1, [[DBG134:!dbg !.*]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META128:metadata !.*]], metadata !DIExpression()), [[DBG134]] -; LZCNT-NEXT: br i1 true, label [[END:%.*]], label [[LOOP]], [[DBG135:!dbg !.*]] +; LZCNT-NEXT: [[TMP1]] = shl i32 [[TMP0]], 1, [[DBG134:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], [[META128:metadata !.*]], metadata !DIExpression()), [[DBG134]] +; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw i32 [[LOOP_IV]], 1, [[DBG135:!dbg !.*]] +; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], [[DBG135]] +; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], [[DBG135]] ; LZCNT: end: ; LZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG130]] ; LZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG136:!dbg !.*]] @@ -584,15 +611,18 @@ ; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG145]] ; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG146:!dbg !.*]] ; LZCNT: loop: -; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[X_NEXT]], [[LOOP]] ], [[DBG145]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META140:metadata !.*]], metadata !DIExpression()), [[DBG145]] -; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[BITMASK]], [[X_CURR]], [[DBG147:!dbg !.*]] +; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG145]] +; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], [[DBG145]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], [[META140:metadata !.*]], metadata !DIExpression()), [[DBG145]] +; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[BITMASK]], [[TMP0]], [[DBG147:!dbg !.*]] ; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META141:metadata !.*]], metadata !DIExpression()), [[DBG147]] ; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG148:!dbg !.*]] ; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META142:metadata !.*]], metadata !DIExpression()), [[DBG148]] -; LZCNT-NEXT: [[TMP1:%.*]] = shl i32 [[X_CURR]], 1, [[DBG149:!dbg !.*]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META143:metadata !.*]], metadata !DIExpression()), [[DBG149]] -; LZCNT-NEXT: br i1 true, label [[END:%.*]], label [[LOOP]], [[DBG150:!dbg !.*]] +; LZCNT-NEXT: [[TMP1]] = shl i32 [[TMP0]], 1, [[DBG149:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], [[META143:metadata !.*]], metadata !DIExpression()), [[DBG149]] +; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw i32 [[LOOP_IV]], 1, [[DBG150:!dbg !.*]] +; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], [[DBG150]] +; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], [[DBG150]] ; LZCNT: end: ; LZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG145]] ; LZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG151:!dbg !.*]] @@ -650,15 +680,18 @@ ; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG160]] ; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG161:!dbg !.*]] ; LZCNT: loop: -; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X_NEXT]], [[LOOP]] ], [ [[X]], [[ENTRY:%.*]] ], [[DBG160]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META155:metadata !.*]], metadata !DIExpression()), [[DBG160]] -; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], [[DBG162:!dbg !.*]] +; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG160]] +; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[TMP1:%.*]], [[LOOP]] ], [ [[X]], [[ENTRY]] ], [[DBG160]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], [[META155:metadata !.*]], metadata !DIExpression()), [[DBG160]] +; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[TMP0]], [[BITMASK]], [[DBG162:!dbg !.*]] ; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META156:metadata !.*]], metadata !DIExpression()), [[DBG162]] ; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG163:!dbg !.*]] ; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META157:metadata !.*]], metadata !DIExpression()), [[DBG163]] -; LZCNT-NEXT: [[TMP1:%.*]] = shl i32 [[X_CURR]], 1, [[DBG164:!dbg !.*]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META158:metadata !.*]], metadata !DIExpression()), [[DBG164]] -; LZCNT-NEXT: br i1 true, label [[END:%.*]], label [[LOOP]], [[DBG165:!dbg !.*]] +; LZCNT-NEXT: [[TMP1]] = shl i32 [[TMP0]], 1, [[DBG164:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], [[META158:metadata !.*]], metadata !DIExpression()), [[DBG164]] +; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw i32 [[LOOP_IV]], 1, [[DBG165:!dbg !.*]] +; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], [[DBG165]] +; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], [[DBG165]] ; LZCNT: end: ; LZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG160]] ; LZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG166:!dbg !.*]] @@ -710,13 +743,16 @@ ; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG172]] ; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG173:!dbg !.*]] ; LZCNT: loop: -; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[X_NEXT]], [[LOOP]] ], [[DBG172]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META169:metadata !.*]], metadata !DIExpression()), [[DBG172]] -; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp slt i32 [[X_CURR]], 0, [[DBG174:!dbg !.*]] +; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG172]] +; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], [[DBG172]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], [[META169:metadata !.*]], metadata !DIExpression()), [[DBG172]] +; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp slt i32 [[TMP0]], 0, [[DBG174:!dbg !.*]] ; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META170:metadata !.*]], metadata !DIExpression()), [[DBG174]] -; LZCNT-NEXT: [[TMP1:%.*]] = shl i32 [[X_CURR]], 1, [[DBG175:!dbg !.*]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META171:metadata !.*]], metadata !DIExpression()), [[DBG175]] -; LZCNT-NEXT: br i1 true, label [[END:%.*]], label [[LOOP]], [[DBG176:!dbg !.*]] +; LZCNT-NEXT: [[TMP1]] = shl i32 [[TMP0]], 1, [[DBG175:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], [[META171:metadata !.*]], metadata !DIExpression()), [[DBG175]] +; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw i32 [[LOOP_IV]], 1, [[DBG176:!dbg !.*]] +; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], [[DBG176]] +; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], [[DBG176]] ; LZCNT: end: ; LZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG172]] ; LZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG177:!dbg !.*]] @@ -1250,25 +1286,58 @@ ; If loop body has any extra instructions we don't want to deal with it. define i32 @n26(i32 %x, i32 %bit) { -; ALL-LABEL: @n26( -; ALL-NEXT: entry: -; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], [[DBG389:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], [[META384:metadata !.*]], metadata !DIExpression()), [[DBG389]] -; ALL-NEXT: br label [[LOOP:%.*]], [[DBG390:!dbg !.*]] -; ALL: loop: -; ALL-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG391:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META385:metadata !.*]], metadata !DIExpression()), [[DBG391]] -; ALL-NEXT: call void @external_side_effect(), [[DBG392:!dbg !.*]] -; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], [[DBG393:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META386:metadata !.*]], metadata !DIExpression()), [[DBG393]] -; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG394:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META387:metadata !.*]], metadata !DIExpression()), [[DBG394]] -; ALL-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG395:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META388:metadata !.*]], metadata !DIExpression()), [[DBG395]] -; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], [[DBG396:!dbg !.*]] -; ALL: end: -; ALL-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG391]] -; ALL-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG397:!dbg !.*]] +; LZCNT-LABEL: @n26( +; LZCNT-NEXT: entry: +; LZCNT-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], [[DBG389:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], [[META384:metadata !.*]], metadata !DIExpression()), [[DBG389]] +; LZCNT-NEXT: [[BIT_LOWBITMASK:%.*]] = add i32 [[BITMASK]], -1, [[DBG390:!dbg !.*]] +; LZCNT-NEXT: [[BIT_MASK:%.*]] = or i32 [[BIT_LOWBITMASK]], [[BITMASK]], [[DBG390]] +; LZCNT-NEXT: [[X_MASKED:%.*]] = and i32 [[X:%.*]], [[BIT_MASK]], [[DBG390]] +; LZCNT-NEXT: [[X_MASKED_NUMLEADINGZEROS:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X_MASKED]], i1 true), [[DBG390]] +; LZCNT-NEXT: [[X_MASKED_NUMACTIVEBITS:%.*]] = sub i32 32, [[X_MASKED_NUMLEADINGZEROS]], [[DBG390]] +; LZCNT-NEXT: [[X_MASKED_LEADINGONEPOS:%.*]] = add i32 [[X_MASKED_NUMACTIVEBITS]], -1, [[DBG390]] +; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG390]] +; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG390]] +; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG390]] +; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG390]] +; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG391:!dbg !.*]] +; LZCNT: loop: +; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG390]] +; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], [[DBG390]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], [[META385:metadata !.*]], metadata !DIExpression()), [[DBG390]] +; LZCNT-NEXT: call void @external_side_effect(), [[DBG392:!dbg !.*]] +; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[TMP0]], [[BITMASK]], [[DBG393:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META386:metadata !.*]], metadata !DIExpression()), [[DBG393]] +; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG394:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META387:metadata !.*]], metadata !DIExpression()), [[DBG394]] +; LZCNT-NEXT: [[TMP1]] = shl i32 [[TMP0]], 1, [[DBG395:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], [[META388:metadata !.*]], metadata !DIExpression()), [[DBG395]] +; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw i32 [[LOOP_IV]], 1, [[DBG396:!dbg !.*]] +; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], [[DBG396]] +; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], [[DBG396]] +; LZCNT: end: +; LZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG390]] +; LZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG397:!dbg !.*]] +; +; NOLZCNT-LABEL: @n26( +; NOLZCNT-NEXT: entry: +; NOLZCNT-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], [[DBG389:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], [[META384:metadata !.*]], metadata !DIExpression()), [[DBG389]] +; NOLZCNT-NEXT: br label [[LOOP:%.*]], [[DBG390:!dbg !.*]] +; NOLZCNT: loop: +; NOLZCNT-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG391:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META385:metadata !.*]], metadata !DIExpression()), [[DBG391]] +; NOLZCNT-NEXT: call void @external_side_effect(), [[DBG392:!dbg !.*]] +; NOLZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], [[DBG393:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META386:metadata !.*]], metadata !DIExpression()), [[DBG393]] +; NOLZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG394:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META387:metadata !.*]], metadata !DIExpression()), [[DBG394]] +; NOLZCNT-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG395:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META388:metadata !.*]], metadata !DIExpression()), [[DBG395]] +; NOLZCNT-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], [[DBG396:!dbg !.*]] +; NOLZCNT: end: +; NOLZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG391]] +; NOLZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG397:!dbg !.*]] ; entry: %bitmask = shl i32 1, %bit @@ -1287,21 +1356,47 @@ } define i32 @n27(i32 %x) { -; ALL-LABEL: @n27( -; ALL-NEXT: entry: -; ALL-NEXT: br label [[LOOP:%.*]], [[DBG403:!dbg !.*]] -; ALL: loop: -; ALL-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG404:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META400:metadata !.*]], metadata !DIExpression()), [[DBG404]] -; ALL-NEXT: call void @external_side_effect(), [[DBG405:!dbg !.*]] -; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp sgt i32 [[X_CURR]], -1, [[DBG406:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META401:metadata !.*]], metadata !DIExpression()), [[DBG406]] -; ALL-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG407:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META402:metadata !.*]], metadata !DIExpression()), [[DBG407]] -; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], [[DBG408:!dbg !.*]] -; ALL: end: -; ALL-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG404]] -; ALL-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG409:!dbg !.*]] +; LZCNT-LABEL: @n27( +; LZCNT-NEXT: entry: +; LZCNT-NEXT: [[X_NUMLEADINGZEROS:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 true), [[DBG403:!dbg !.*]] +; LZCNT-NEXT: [[X_NUMACTIVEBITS:%.*]] = sub i32 32, [[X_NUMLEADINGZEROS]], [[DBG403]] +; LZCNT-NEXT: [[X_LEADINGONEPOS:%.*]] = add i32 [[X_NUMACTIVEBITS]], -1, [[DBG403]] +; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 31, [[X_LEADINGONEPOS]], [[DBG403]] +; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG403]] +; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG403]] +; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG403]] +; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG404:!dbg !.*]] +; LZCNT: loop: +; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG403]] +; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], [[DBG403]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], [[META400:metadata !.*]], metadata !DIExpression()), [[DBG403]] +; LZCNT-NEXT: call void @external_side_effect(), [[DBG405:!dbg !.*]] +; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp sgt i32 [[TMP0]], -1, [[DBG406:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META401:metadata !.*]], metadata !DIExpression()), [[DBG406]] +; LZCNT-NEXT: [[TMP1]] = shl i32 [[TMP0]], 1, [[DBG407:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], [[META402:metadata !.*]], metadata !DIExpression()), [[DBG407]] +; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw i32 [[LOOP_IV]], 1, [[DBG408:!dbg !.*]] +; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], [[DBG408]] +; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], [[DBG408]] +; LZCNT: end: +; LZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG403]] +; LZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG409:!dbg !.*]] +; +; NOLZCNT-LABEL: @n27( +; NOLZCNT-NEXT: entry: +; NOLZCNT-NEXT: br label [[LOOP:%.*]], [[DBG403:!dbg !.*]] +; NOLZCNT: loop: +; NOLZCNT-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG404:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META400:metadata !.*]], metadata !DIExpression()), [[DBG404]] +; NOLZCNT-NEXT: call void @external_side_effect(), [[DBG405:!dbg !.*]] +; NOLZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp sgt i32 [[X_CURR]], -1, [[DBG406:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META401:metadata !.*]], metadata !DIExpression()), [[DBG406]] +; NOLZCNT-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG407:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META402:metadata !.*]], metadata !DIExpression()), [[DBG407]] +; NOLZCNT-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], [[DBG408:!dbg !.*]] +; NOLZCNT: end: +; NOLZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG404]] +; NOLZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG409:!dbg !.*]] ; entry: br label %loop @@ -1319,26 +1414,60 @@ ; In-loop instructions should not have uses outside of the loop. define i32 @n28(i32 %x, i32 %bit) { -; ALL-LABEL: @n28( -; ALL-NEXT: entry: -; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], [[DBG417:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], [[META412:metadata !.*]], metadata !DIExpression()), [[DBG417]] -; ALL-NEXT: br label [[LOOP:%.*]], [[DBG418:!dbg !.*]] -; ALL: loop: -; ALL-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG419:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META413:metadata !.*]], metadata !DIExpression()), [[DBG419]] -; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], [[DBG420:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META414:metadata !.*]], metadata !DIExpression()), [[DBG420]] -; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG421:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META415:metadata !.*]], metadata !DIExpression()), [[DBG421]] -; ALL-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG422:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META416:metadata !.*]], metadata !DIExpression()), [[DBG422]] -; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], [[DBG423:!dbg !.*]] -; ALL: end: -; ALL-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG419]] -; ALL-NEXT: [[X_CURR_BITMASKED_LCSSA:%.*]] = phi i32 [ [[X_CURR_BITMASKED]], [[LOOP]] ], [[DBG420]] -; ALL-NEXT: call void @use32(i32 [[X_CURR_BITMASKED_LCSSA]]), [[DBG424:!dbg !.*]] -; ALL-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG425:!dbg !.*]] +; LZCNT-LABEL: @n28( +; LZCNT-NEXT: entry: +; LZCNT-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], [[DBG417:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], [[META412:metadata !.*]], metadata !DIExpression()), [[DBG417]] +; LZCNT-NEXT: [[BIT_LOWBITMASK:%.*]] = add i32 [[BITMASK]], -1, [[DBG418:!dbg !.*]] +; LZCNT-NEXT: [[BIT_MASK:%.*]] = or i32 [[BIT_LOWBITMASK]], [[BITMASK]], [[DBG418]] +; LZCNT-NEXT: [[X_MASKED:%.*]] = and i32 [[X:%.*]], [[BIT_MASK]], [[DBG418]] +; LZCNT-NEXT: [[X_MASKED_NUMLEADINGZEROS:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X_MASKED]], i1 true), [[DBG418]] +; LZCNT-NEXT: [[X_MASKED_NUMACTIVEBITS:%.*]] = sub i32 32, [[X_MASKED_NUMLEADINGZEROS]], [[DBG418]] +; LZCNT-NEXT: [[X_MASKED_LEADINGONEPOS:%.*]] = add i32 [[X_MASKED_NUMACTIVEBITS]], -1, [[DBG418]] +; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG418]] +; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG418]] +; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG418]] +; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG418]] +; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG419:!dbg !.*]] +; LZCNT: loop: +; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG418]] +; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], [[DBG418]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], [[META413:metadata !.*]], metadata !DIExpression()), [[DBG418]] +; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[TMP0]], [[BITMASK]], [[DBG420:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META414:metadata !.*]], metadata !DIExpression()), [[DBG420]] +; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG421:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META415:metadata !.*]], metadata !DIExpression()), [[DBG421]] +; LZCNT-NEXT: [[TMP1]] = shl i32 [[TMP0]], 1, [[DBG422:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], [[META416:metadata !.*]], metadata !DIExpression()), [[DBG422]] +; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw i32 [[LOOP_IV]], 1, [[DBG423:!dbg !.*]] +; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], [[DBG423]] +; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], [[DBG423]] +; LZCNT: end: +; LZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG418]] +; LZCNT-NEXT: [[X_CURR_BITMASKED_LCSSA:%.*]] = phi i32 [ [[X_CURR_BITMASKED]], [[LOOP]] ], [[DBG420]] +; LZCNT-NEXT: call void @use32(i32 [[X_CURR_BITMASKED_LCSSA]]), [[DBG424:!dbg !.*]] +; LZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG425:!dbg !.*]] +; +; NOLZCNT-LABEL: @n28( +; NOLZCNT-NEXT: entry: +; NOLZCNT-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], [[DBG417:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], [[META412:metadata !.*]], metadata !DIExpression()), [[DBG417]] +; NOLZCNT-NEXT: br label [[LOOP:%.*]], [[DBG418:!dbg !.*]] +; NOLZCNT: loop: +; NOLZCNT-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG419:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META413:metadata !.*]], metadata !DIExpression()), [[DBG419]] +; NOLZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], [[DBG420:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META414:metadata !.*]], metadata !DIExpression()), [[DBG420]] +; NOLZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG421:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META415:metadata !.*]], metadata !DIExpression()), [[DBG421]] +; NOLZCNT-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG422:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META416:metadata !.*]], metadata !DIExpression()), [[DBG422]] +; NOLZCNT-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], [[DBG423:!dbg !.*]] +; NOLZCNT: end: +; NOLZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG419]] +; NOLZCNT-NEXT: [[X_CURR_BITMASKED_LCSSA:%.*]] = phi i32 [ [[X_CURR_BITMASKED]], [[LOOP]] ], [[DBG420]] +; NOLZCNT-NEXT: call void @use32(i32 [[X_CURR_BITMASKED_LCSSA]]), [[DBG424:!dbg !.*]] +; NOLZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG425:!dbg !.*]] ; entry: %bitmask = shl i32 1, %bit @@ -1356,26 +1485,60 @@ ret i32 %x.curr } define i32 @n29(i32 %x, i32 %bit) { -; ALL-LABEL: @n29( -; ALL-NEXT: entry: -; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], [[DBG433:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], [[META428:metadata !.*]], metadata !DIExpression()), [[DBG433]] -; ALL-NEXT: br label [[LOOP:%.*]], [[DBG434:!dbg !.*]] -; ALL: loop: -; ALL-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG435:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META429:metadata !.*]], metadata !DIExpression()), [[DBG435]] -; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], [[DBG436:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META430:metadata !.*]], metadata !DIExpression()), [[DBG436]] -; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG437:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META431:metadata !.*]], metadata !DIExpression()), [[DBG437]] -; ALL-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG438:!dbg !.*]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META432:metadata !.*]], metadata !DIExpression()), [[DBG438]] -; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], [[DBG439:!dbg !.*]] -; ALL: end: -; ALL-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG435]] -; ALL-NEXT: [[X_CURR_ISBITUNSET_LCSSA:%.*]] = phi i1 [ [[X_CURR_ISBITUNSET]], [[LOOP]] ], [[DBG437]] -; ALL-NEXT: call void @use1(i1 [[X_CURR_ISBITUNSET_LCSSA]]), [[DBG440:!dbg !.*]] -; ALL-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG441:!dbg !.*]] +; LZCNT-LABEL: @n29( +; LZCNT-NEXT: entry: +; LZCNT-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], [[DBG433:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], [[META428:metadata !.*]], metadata !DIExpression()), [[DBG433]] +; LZCNT-NEXT: [[BIT_LOWBITMASK:%.*]] = add i32 [[BITMASK]], -1, [[DBG434:!dbg !.*]] +; LZCNT-NEXT: [[BIT_MASK:%.*]] = or i32 [[BIT_LOWBITMASK]], [[BITMASK]], [[DBG434]] +; LZCNT-NEXT: [[X_MASKED:%.*]] = and i32 [[X:%.*]], [[BIT_MASK]], [[DBG434]] +; LZCNT-NEXT: [[X_MASKED_NUMLEADINGZEROS:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X_MASKED]], i1 true), [[DBG434]] +; LZCNT-NEXT: [[X_MASKED_NUMACTIVEBITS:%.*]] = sub i32 32, [[X_MASKED_NUMLEADINGZEROS]], [[DBG434]] +; LZCNT-NEXT: [[X_MASKED_LEADINGONEPOS:%.*]] = add i32 [[X_MASKED_NUMACTIVEBITS]], -1, [[DBG434]] +; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 [[BIT]], [[X_MASKED_LEADINGONEPOS]], [[DBG434]] +; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG434]] +; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG434]] +; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG434]] +; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG435:!dbg !.*]] +; LZCNT: loop: +; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], [[DBG434]] +; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], [[DBG434]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], [[META429:metadata !.*]], metadata !DIExpression()), [[DBG434]] +; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[TMP0]], [[BITMASK]], [[DBG436:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META430:metadata !.*]], metadata !DIExpression()), [[DBG436]] +; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG437:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META431:metadata !.*]], metadata !DIExpression()), [[DBG437]] +; LZCNT-NEXT: [[TMP1]] = shl i32 [[TMP0]], 1, [[DBG438:!dbg !.*]] +; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], [[META432:metadata !.*]], metadata !DIExpression()), [[DBG438]] +; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw i32 [[LOOP_IV]], 1, [[DBG439:!dbg !.*]] +; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], [[DBG439]] +; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], [[DBG439]] +; LZCNT: end: +; LZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG434]] +; LZCNT-NEXT: [[X_CURR_ISBITUNSET_LCSSA:%.*]] = phi i1 [ [[X_CURR_ISBITUNSET]], [[LOOP]] ], [[DBG437]] +; LZCNT-NEXT: call void @use1(i1 [[X_CURR_ISBITUNSET_LCSSA]]), [[DBG440:!dbg !.*]] +; LZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG441:!dbg !.*]] +; +; NOLZCNT-LABEL: @n29( +; NOLZCNT-NEXT: entry: +; NOLZCNT-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], [[DBG433:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], [[META428:metadata !.*]], metadata !DIExpression()), [[DBG433]] +; NOLZCNT-NEXT: br label [[LOOP:%.*]], [[DBG434:!dbg !.*]] +; NOLZCNT: loop: +; NOLZCNT-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG435:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META429:metadata !.*]], metadata !DIExpression()), [[DBG435]] +; NOLZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], [[DBG436:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], [[META430:metadata !.*]], metadata !DIExpression()), [[DBG436]] +; NOLZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, [[DBG437:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META431:metadata !.*]], metadata !DIExpression()), [[DBG437]] +; NOLZCNT-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG438:!dbg !.*]] +; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META432:metadata !.*]], metadata !DIExpression()), [[DBG438]] +; NOLZCNT-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], [[DBG439:!dbg !.*]] +; NOLZCNT: end: +; NOLZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG435]] +; NOLZCNT-NEXT: [[X_CURR_ISBITUNSET_LCSSA:%.*]] = phi i1 [ [[X_CURR_ISBITUNSET]], [[LOOP]] ], [[DBG437]] +; NOLZCNT-NEXT: call void @use1(i1 [[X_CURR_ISBITUNSET_LCSSA]]), [[DBG440:!dbg !.*]] +; NOLZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG441:!dbg !.*]] ; entry: %bitmask = shl i32 1, %bit