diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GuardUtils.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" @@ -74,6 +75,7 @@ STATISTIC(NumBranches, "Number of branches unswitched"); STATISTIC(NumSwitches, "Number of switches unswitched"); +STATISTIC(NumSelects, "Number of selects turned into branches for unswitching"); STATISTIC(NumGuards, "Number of guards turned into branches for unswitching"); STATISTIC(NumTrivial, "Number of unswitches that are trivial"); STATISTIC( @@ -2126,7 +2128,7 @@ AssumptionCache &AC, function_ref)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, - function_ref DestroyLoopCB) { + function_ref DestroyLoopCB, bool InsertFreeze) { auto *ParentBB = TI.getParent(); BranchInst *BI = dyn_cast(&TI); SwitchInst *SI = BI ? nullptr : cast(&TI); @@ -2230,25 +2232,6 @@ SE->forgetBlockAndLoopDispositions(); } - bool InsertFreeze = false; - if (FreezeLoopUnswitchCond) { - ICFLoopSafetyInfo SafetyInfo; - SafetyInfo.computeLoopSafetyInfo(&L); - InsertFreeze = !SafetyInfo.isGuaranteedToExecute(TI, &DT, &L); - } - - // Perform the isGuaranteedNotToBeUndefOrPoison() query before the transform, - // otherwise the branch instruction will have been moved outside the loop - // already, and may imply that a poison condition is always UB. - Value *FullUnswitchCond = nullptr; - if (FullUnswitch) { - FullUnswitchCond = - BI ? skipTrivialSelect(BI->getCondition()) : SI->getCondition(); - if (InsertFreeze) - InsertFreeze = !isGuaranteedNotToBeUndefOrPoison( - FullUnswitchCond, &AC, L.getLoopPreheader()->getTerminator(), &DT); - } - // If the edge from this terminator to a successor dominates that successor, // store a map from each block in its dominator subtree to it. This lets us // tell when cloning for a particular successor if a block is dominated by @@ -2323,10 +2306,11 @@ BasicBlock *ClonedPH = ClonedPHs.begin()->second; BI->setSuccessor(ClonedSucc, ClonedPH); BI->setSuccessor(1 - ClonedSucc, LoopPH); + Value *Cond = skipTrivialSelect(BI->getCondition()); if (InsertFreeze) - FullUnswitchCond = new FreezeInst( - FullUnswitchCond, FullUnswitchCond->getName() + ".fr", BI); - BI->setCondition(FullUnswitchCond); + Cond = new FreezeInst( + Cond, Cond->getName() + ".fr", BI); + BI->setCondition(Cond); DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH}); } else { assert(SI && "Must either be a branch or switch!"); @@ -2343,7 +2327,7 @@ if (InsertFreeze) SI->setCondition(new FreezeInst( - FullUnswitchCond, FullUnswitchCond->getName() + ".fr", SI)); + SI->getCondition(), SI->getCondition()->getName() + ".fr", SI)); // We need to use the set to populate domtree updates as even when there // are multiple cases pointing at the same successor we only want to @@ -2642,6 +2626,58 @@ return Cost; } +/// Turns a select instruction into implicit control flow branch, +/// making the following replacement: +/// +/// head: +/// --code before select-- +/// select %cond, %trueval, %falseval +/// --code after select-- +/// +/// into +/// +/// head: +/// --code before select-- +/// br i1 %cond, label %then, label %tail +/// +/// then: +/// br %tail +/// +/// tail: +/// phi [ %trueval, %then ], [ %falseval, %head] +/// unreachable +/// +/// It also makes all relevant DT and LI updates, so that all structures are in +/// valid state after this transform. +static BranchInst *turnSelectIntoBranch(SelectInst *SI, DominatorTree &DT, + LoopInfo &LI, MemorySSAUpdater *MSSAU, + AssumptionCache *AC) { + LLVM_DEBUG(dbgs() << "Turning " << *SI << " into a branch.\n"); + BasicBlock *HeadBB = SI->getParent(); + + DomTreeUpdater DTU = + DomTreeUpdater(DT, DomTreeUpdater::UpdateStrategy::Eager); + SplitBlockAndInsertIfThen(SI->getCondition(), SI, false, + SI->getMetadata(LLVMContext::MD_prof), &DTU, &LI); + auto *CondBr = cast(HeadBB->getTerminator()); + BasicBlock *ThenBB = CondBr->getSuccessor(0), + *TailBB = CondBr->getSuccessor(1); + if (MSSAU) + MSSAU->moveAllAfterSpliceBlocks(HeadBB, TailBB, SI); + + PHINode *Phi = PHINode::Create(SI->getType(), 2, "unswitched.select", SI); + Phi->addIncoming(SI->getTrueValue(), ThenBB); + Phi->addIncoming(SI->getFalseValue(), HeadBB); + SI->replaceAllUsesWith(Phi); + SI->eraseFromParent(); + + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + + ++NumSelects; + return CondBr; +} + /// Turns a llvm.experimental.guard intrinsic into implicit control flow branch, /// making the following replacement: /// @@ -2749,9 +2785,10 @@ const BasicBlock *CondBlock = TI.getParent(); if (DT.dominates(CondBlock, Latch) && (isGuard(&TI) || - llvm::count_if(successors(&TI), [&L](const BasicBlock *SuccBB) { - return L.contains(SuccBB); - }) <= 1)) { + (TI.isTerminator() && + llvm::count_if(successors(&TI), [&L](const BasicBlock *SuccBB) { + return L.contains(SuccBB); + }) <= 1))) { NumCostMultiplierSkipped++; return 1; } @@ -2760,12 +2797,17 @@ int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size() : std::distance(LI.begin(), LI.end())); // Count amount of clones that all the candidates might cause during - // unswitching. Branch/guard counts as 1, switch counts as log2 of its cases. + // unswitching. Branch/guard/select counts as 1, switch counts as log2 of its + // cases. int UnswitchedClones = 0; for (const auto &Candidate : UnswitchCandidates) { const Instruction *CI = Candidate.TI; const BasicBlock *CondBlock = CI->getParent(); bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch); + if (isa(CI)) { + UnswitchedClones++; + continue; + } if (isGuard(CI)) { if (!SkipExitingSuccessors) UnswitchedClones++; @@ -2828,15 +2870,20 @@ if (LI.getLoopFor(BB) != &L) continue; - if (CollectGuards) - for (auto &I : *BB) - if (isGuard(&I)) { - auto *Cond = - skipTrivialSelect(cast(&I)->getArgOperand(0)); - // TODO: Support AND, OR conditions and partial unswitching. - if (!isa(Cond) && L.isLoopInvariant(Cond)) - UnswitchCandidates.push_back({&I, {Cond}}); - } + for (auto &I : *BB) { + if (auto *SI = dyn_cast(&I)) { + auto *Cond = SI->getCondition(); + // restrict to simple boolean selects + if (!isa(Cond) && L.isLoopInvariant(Cond) && Cond->getType()->isIntegerTy(1)) + UnswitchCandidates.push_back({&I, {Cond}}); + } else if (CollectGuards && isGuard(&I)) { + auto *Cond = + skipTrivialSelect(cast(&I)->getArgOperand(0)); + // TODO: Support AND, OR conditions and partial unswitching. + if (!isa(Cond) && L.isLoopInvariant(Cond)) + UnswitchCandidates.push_back({&I, {Cond}}); + } + } if (auto *SI = dyn_cast(BB->getTerminator())) { // We can only consider fully loop-invariant switch conditions as we need @@ -3338,7 +3385,8 @@ // loop. This is computing the new cost of unswitching a condition. // Note that guards always have 2 unique successors that are implicit and // will be materialized if we decide to unswitch it. - int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size(); + int SuccessorsCount = + isGuard(&TI) || isa(TI) ? 2 : Visited.size(); assert(SuccessorsCount > 1 && "Cannot unswitch a condition without multiple distinct successors!"); return (LoopCost - Cost) * (SuccessorsCount - 1); @@ -3380,6 +3428,32 @@ return *Best; } +// Insert a freeze on an unswitched branch if all is true: +// 1. freeze-loop-unswitch-cond option is true +// 2. The branch may not execute in the loop pre-transformation. If a branch may +// not execute and could cause UB, it would always cause UB if it is hoisted outside +// of the loop. Insert a freeze to prevent this case. +// 3. The branch condition may be poison or undef +static bool shouldInsertFreeze(Loop &L, Instruction &TI, DominatorTree &DT, + AssumptionCache &AC) { + assert(isa(TI) || isa(TI)); + if (!FreezeLoopUnswitchCond) + return false; + + ICFLoopSafetyInfo SafetyInfo; + SafetyInfo.computeLoopSafetyInfo(&L); + if (SafetyInfo.isGuaranteedToExecute(TI, &DT, &L)) + return false; + + Value *Cond; + if (BranchInst *BI = dyn_cast(&TI)) + Cond = skipTrivialSelect(BI->getCondition()); + else + Cond = skipTrivialSelect(cast(&TI)->getCondition()); + return !isGuaranteedNotToBeUndefOrPoison( + Cond, &AC, L.getLoopPreheader()->getTerminator(), &DT); +} + static bool unswitchBestCondition( Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, @@ -3424,15 +3498,28 @@ if (Best.TI != PartialIVCondBranch) PartialIVInfo.InstToDuplicate.clear(); - // If the best candidate is a guard, turn it into a branch. - if (isGuard(Best.TI)) - Best.TI = - turnGuardIntoBranch(cast(Best.TI), L, DT, LI, MSSAU); + bool InsertFreeze; + if (auto *SI = dyn_cast(Best.TI)) { + // If the best candidate is a select, turn it into a branch. Select + // instructions with a poison conditional do not propagate poison, but + // branching on poison causes UB. Insert a freeze on the select + // conditional to prevent UB after turning the select into a branch. + InsertFreeze = !isGuaranteedNotToBeUndefOrPoison( + SI->getCondition(), &AC, L.getLoopPreheader()->getTerminator(), &DT); + Best.TI = turnSelectIntoBranch(SI, DT, LI, MSSAU, &AC); + } else { + // If the best candidate is a guard, turn it into a branch. + if (isGuard(Best.TI)) + Best.TI = + turnGuardIntoBranch(cast(Best.TI), L, DT, LI, MSSAU); + InsertFreeze = shouldInsertFreeze(L, *Best.TI, DT, AC); + } LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = " << Best.Cost << ") terminator: " << *Best.TI << "\n"); unswitchNontrivialInvariants(L, *Best.TI, Best.Invariants, PartialIVInfo, DT, - LI, AC, UnswitchCB, SE, MSSAU, DestroyLoopCB); + LI, AC, UnswitchCB, SE, MSSAU, DestroyLoopCB, + InsertFreeze); return true; } diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/crash.ll b/llvm/test/Transforms/SimpleLoopUnswitch/crash.ll --- a/llvm/test/Transforms/SimpleLoopUnswitch/crash.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/crash.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -passes=simple-loop-unswitch -verify-memoryssa -disable-output +; RUN: opt < %s -passes='simple-loop-unswitch' -verify-memoryssa -disable-output define void @test1(ptr %S2) { entry: diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-freeze.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-freeze.ll --- a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-freeze.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-freeze.ll @@ -2332,21 +2332,26 @@ define i32 @test_partial_unswitch_all_conds_guaranteed_non_poison(i1 noundef %c.1, i1 noundef %c.2) { ; CHECK-LABEL: @test_partial_unswitch_all_conds_guaranteed_non_poison( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[C_1:%.*]], [[C_2:%.*]] -; CHECK-NEXT: br i1 [[TMP0]], label [[ENTRY_SPLIT:%.*]], label [[ENTRY_SPLIT_US:%.*]] +; CHECK-NEXT: br i1 [[C_1:%.*]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split.us: ; CHECK-NEXT: br label [[LOOP_US:%.*]] ; CHECK: loop.us: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @a() -; CHECK-NEXT: br label [[EXIT_SPLIT_US:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @a() +; CHECK-NEXT: br label [[TMP1:%.*]] +; CHECK: 1: +; CHECK-NEXT: br label [[TMP2:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[UNSWITCHED_SELECT_US:%.*]] = phi i1 [ [[C_2:%.*]], [[TMP1]] ] +; CHECK-NEXT: br i1 [[UNSWITCHED_SELECT_US]], label [[LOOP_US]], label [[EXIT_SPLIT_US:%.*]] ; CHECK: exit.split.us: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @a() -; CHECK-NEXT: [[SEL:%.*]] = select i1 true, i1 true, i1 false -; CHECK-NEXT: br i1 [[SEL]], label [[LOOP]], label [[EXIT_SPLIT:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @a() +; CHECK-NEXT: br label [[TMP4:%.*]] +; CHECK: 4: +; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT_SPLIT:%.*]] ; CHECK: exit.split: ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-select.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-select.ll --- a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-select.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-select.ll @@ -5,11 +5,76 @@ declare i1 @foo() declare i1 @bar(i32) +declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>) define i32 @basic(i32 %N, i1 %cond, i32 %select_input) { ; CHECK-LABEL: define i32 @basic ; CHECK-SAME: (i32 [[N:%.*]], i1 [[COND:%.*]], i32 [[SELECT_INPUT:%.*]]) { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[COND]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[FOR_COND_US:%.*]] +; CHECK: for.cond.us: +; CHECK-NEXT: [[RES_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[ADD_US:%.*]], [[TMP1:%.*]] ] +; CHECK-NEXT: [[I_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[INC_US:%.*]], [[TMP1]] ] +; CHECK-NEXT: [[CMP_US:%.*]] = icmp slt i32 [[I_US]], [[N]] +; CHECK-NEXT: br i1 [[CMP_US]], label [[FOR_BODY_US:%.*]], label [[FOR_COND_CLEANUP_SPLIT_US:%.*]] +; CHECK: for.body.us: +; CHECK-NEXT: br label [[TMP0:%.*]] +; CHECK: 0: +; CHECK-NEXT: br label [[TMP1]] +; CHECK: 1: +; CHECK-NEXT: [[UNSWITCHED_SELECT_US:%.*]] = phi i32 [ [[SELECT_INPUT]], [[TMP0]] ] +; CHECK-NEXT: [[ADD_US]] = add nuw nsw i32 [[UNSWITCHED_SELECT_US]], [[RES_US]] +; CHECK-NEXT: [[INC_US]] = add nuw nsw i32 [[I_US]], 1 +; CHECK-NEXT: br label [[FOR_COND_US]] +; CHECK: for.cond.cleanup.split.us: +; CHECK-NEXT: [[RES_LCSSA_US:%.*]] = phi i32 [ [[RES_US]], [[FOR_COND_US]] ] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[ADD:%.*]], [[TMP2:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[INC:%.*]], [[TMP2]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP_SPLIT:%.*]] +; CHECK: for.body: +; CHECK-NEXT: br label [[TMP2]] +; CHECK: 2: +; CHECK-NEXT: [[ADD]] = add nuw nsw i32 42, [[RES]] +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: br label [[FOR_COND]] +; CHECK: for.cond.cleanup.split: +; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i32 [ [[RES]], [[FOR_COND]] ] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: [[DOTUS_PHI:%.*]] = phi i32 [ [[RES_LCSSA]], [[FOR_COND_CLEANUP_SPLIT]] ], [ [[RES_LCSSA_US]], [[FOR_COND_CLEANUP_SPLIT_US]] ] +; CHECK-NEXT: ret i32 [[DOTUS_PHI]] +; +entry: + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %res = phi i32 [ 0, %entry ], [ %add, %for.body ] + %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %cmp = icmp slt i32 %i, %N + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.body: ; preds = %for.cond + %cond1 = select i1 %cond, i32 %select_input, i32 42 + %add = add nuw nsw i32 %cond1, %res + %inc = add nuw nsw i32 %i, 1 + br label %for.cond + +for.cond.cleanup: ; preds = %for.cond + ret i32 %res +} + +define i32 @basic_veccond(i32 %N, <2 x i1> %cond, <2 x i32> %select_input) { +; CHECK-LABEL: define i32 @basic_veccond +; CHECK-SAME: (i32 [[N:%.*]], <2 x i1> [[COND:%.*]], <2 x i32> [[SELECT_INPUT:%.*]]) { +; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_BODY:%.*]] ] @@ -17,8 +82,9 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[COND1:%.*]] = select i1 [[COND]], i32 [[SELECT_INPUT]], i32 42 -; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[COND1]], [[RES]] +; CHECK-NEXT: [[COND1:%.*]] = select <2 x i1> [[COND]], <2 x i32> [[SELECT_INPUT]], <2 x i32> +; CHECK-NEXT: [[VREDUCE:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[COND1]]) +; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[VREDUCE]], [[RES]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I]], 1 ; CHECK-NEXT: br label [[FOR_COND]] ; CHECK: for.cond.cleanup: @@ -35,8 +101,9 @@ br i1 %cmp, label %for.body, label %for.cond.cleanup for.body: ; preds = %for.cond - %cond1 = select i1 %cond, i32 %select_input, i32 42 - %add = add nuw nsw i32 %cond1, %res + %cond1 = select <2 x i1> %cond, <2 x i32> %select_input, <2 x i32> + %vreduce = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %cond1) + %add = add nuw nsw i32 %vreduce, %res %inc = add nuw nsw i32 %i, 1 br label %for.cond @@ -48,20 +115,46 @@ ; CHECK-LABEL: define i32 @select_phi_input ; CHECK-SAME: (i32 [[N:%.*]], i1 [[COND:%.*]]) { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[COND]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[FOR_COND_US:%.*]] +; CHECK: for.cond.us: +; CHECK-NEXT: [[RES_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[ADD_US:%.*]], [[TMP1:%.*]] ] +; CHECK-NEXT: [[I_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[INC_US:%.*]], [[TMP1]] ] +; CHECK-NEXT: [[CMP_US:%.*]] = icmp slt i32 [[I_US]], [[N]] +; CHECK-NEXT: br i1 [[CMP_US]], label [[FOR_BODY_US:%.*]], label [[FOR_COND_CLEANUP_SPLIT_US:%.*]] +; CHECK: for.body.us: +; CHECK-NEXT: br label [[TMP0:%.*]] +; CHECK: 0: +; CHECK-NEXT: br label [[TMP1]] +; CHECK: 1: +; CHECK-NEXT: [[UNSWITCHED_SELECT_US:%.*]] = phi i32 [ [[I_US]], [[TMP0]] ] +; CHECK-NEXT: [[ADD_US]] = add nuw nsw i32 [[UNSWITCHED_SELECT_US]], [[RES_US]] +; CHECK-NEXT: [[INC_US]] = add nuw nsw i32 [[I_US]], 1 +; CHECK-NEXT: br label [[FOR_COND_US]] +; CHECK: for.cond.cleanup.split.us: +; CHECK-NEXT: [[RES_LCSSA_US:%.*]] = phi i32 [ [[RES_US]], [[FOR_COND_US]] ] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] +; CHECK: entry.split: ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: -; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_BODY:%.*]] ] -; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[ADD:%.*]], [[TMP2:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[INC:%.*]], [[TMP2]] ] ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP_SPLIT:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[COND1:%.*]] = select i1 [[COND]], i32 [[I]], i32 42 -; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[COND1]], [[RES]] +; CHECK-NEXT: br label [[TMP2]] +; CHECK: 2: +; CHECK-NEXT: [[ADD]] = add nuw nsw i32 42, [[RES]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I]], 1 ; CHECK-NEXT: br label [[FOR_COND]] -; CHECK: for.cond.cleanup: +; CHECK: for.cond.cleanup.split: ; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i32 [ [[RES]], [[FOR_COND]] ] -; CHECK-NEXT: ret i32 [[RES_LCSSA]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: [[DOTUS_PHI:%.*]] = phi i32 [ [[RES_LCSSA]], [[FOR_COND_CLEANUP_SPLIT]] ], [ [[RES_LCSSA_US]], [[FOR_COND_CLEANUP_SPLIT_US]] ] +; CHECK-NEXT: ret i32 [[DOTUS_PHI]] ; entry: br label %for.cond @@ -86,20 +179,45 @@ ; CHECK-LABEL: define i32 @basic_cond_noundef ; CHECK-SAME: (i32 [[N:%.*]], i1 noundef [[COND:%.*]]) { ; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[FOR_COND_US:%.*]] +; CHECK: for.cond.us: +; CHECK-NEXT: [[RES_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[ADD_US:%.*]], [[TMP1:%.*]] ] +; CHECK-NEXT: [[I_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[INC_US:%.*]], [[TMP1]] ] +; CHECK-NEXT: [[CMP_US:%.*]] = icmp slt i32 [[I_US]], [[N]] +; CHECK-NEXT: br i1 [[CMP_US]], label [[FOR_BODY_US:%.*]], label [[FOR_COND_CLEANUP_SPLIT_US:%.*]] +; CHECK: for.body.us: +; CHECK-NEXT: br label [[TMP0:%.*]] +; CHECK: 0: +; CHECK-NEXT: br label [[TMP1]] +; CHECK: 1: +; CHECK-NEXT: [[UNSWITCHED_SELECT_US:%.*]] = phi i32 [ [[I_US]], [[TMP0]] ] +; CHECK-NEXT: [[ADD_US]] = add nuw nsw i32 [[UNSWITCHED_SELECT_US]], [[RES_US]] +; CHECK-NEXT: [[INC_US]] = add nuw nsw i32 [[I_US]], 1 +; CHECK-NEXT: br label [[FOR_COND_US]] +; CHECK: for.cond.cleanup.split.us: +; CHECK-NEXT: [[RES_LCSSA_US:%.*]] = phi i32 [ [[RES_US]], [[FOR_COND_US]] ] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] +; CHECK: entry.split: ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: -; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_BODY:%.*]] ] -; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[ADD:%.*]], [[TMP2:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[INC:%.*]], [[TMP2]] ] ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP_SPLIT:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[COND1:%.*]] = select i1 [[COND]], i32 [[I]], i32 42 -; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[COND1]], [[RES]] +; CHECK-NEXT: br label [[TMP2]] +; CHECK: 2: +; CHECK-NEXT: [[ADD]] = add nuw nsw i32 42, [[RES]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I]], 1 ; CHECK-NEXT: br label [[FOR_COND]] -; CHECK: for.cond.cleanup: +; CHECK: for.cond.cleanup.split: ; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i32 [ [[RES]], [[FOR_COND]] ] -; CHECK-NEXT: ret i32 [[RES_LCSSA]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: [[DOTUS_PHI:%.*]] = phi i32 [ [[RES_LCSSA]], [[FOR_COND_CLEANUP_SPLIT]] ], [ [[RES_LCSSA_US]], [[FOR_COND_CLEANUP_SPLIT_US]] ] +; CHECK-NEXT: ret i32 [[DOTUS_PHI]] ; entry: br label %for.cond @@ -164,21 +282,107 @@ ; CHECK-LABEL: define i32 @chained_select ; CHECK-SAME: (i32 [[N:%.*]], i1 [[COND:%.*]], i1 [[COND2:%.*]]) { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[COND]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: [[COND2_FR13:%.*]] = freeze i1 [[COND2]] +; CHECK-NEXT: br i1 [[COND2_FR13]], label [[ENTRY_SPLIT_US_SPLIT_US:%.*]], label [[ENTRY_SPLIT_US_SPLIT:%.*]] +; CHECK: entry.split.us.split.us: +; CHECK-NEXT: br label [[FOR_COND_US_US:%.*]] +; CHECK: for.cond.us.us: +; CHECK-NEXT: [[RES_US_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US_SPLIT_US]] ], [ [[ADD_US_US:%.*]], [[TMP3:%.*]] ] +; CHECK-NEXT: [[I_US_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US_SPLIT_US]] ], [ [[INC_US_US:%.*]], [[TMP3]] ] +; CHECK-NEXT: [[CMP_US_US:%.*]] = icmp slt i32 [[I_US_US]], [[N]] +; CHECK-NEXT: br i1 [[CMP_US_US]], label [[FOR_BODY_US_US:%.*]], label [[FOR_COND_CLEANUP_SPLIT_US_SPLIT_US:%.*]] +; CHECK: for.body.us.us: +; CHECK-NEXT: br label [[TMP0:%.*]] +; CHECK: 0: +; CHECK-NEXT: br label [[TMP1:%.*]] +; CHECK: 1: +; CHECK-NEXT: [[UNSWITCHED_SELECT_US_US:%.*]] = phi i32 [ [[I_US_US]], [[TMP0]] ] +; CHECK-NEXT: br label [[TMP2:%.*]] +; CHECK: 2: +; CHECK-NEXT: br label [[TMP3]] +; CHECK: 3: +; CHECK-NEXT: [[UNSWITCHED_SELECT_US11:%.*]] = phi i32 [ [[UNSWITCHED_SELECT_US_US]], [[TMP2]] ] +; CHECK-NEXT: [[ADD_US_US]] = add nuw nsw i32 [[UNSWITCHED_SELECT_US11]], [[RES_US_US]] +; CHECK-NEXT: [[INC_US_US]] = add nuw nsw i32 [[I_US_US]], 1 +; CHECK-NEXT: br label [[FOR_COND_US_US]] +; CHECK: for.cond.cleanup.split.us.split.us: +; CHECK-NEXT: [[RES_LCSSA_US_US:%.*]] = phi i32 [ [[RES_US_US]], [[FOR_COND_US_US]] ] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_SPLIT_US:%.*]] +; CHECK: entry.split.us.split: +; CHECK-NEXT: br label [[FOR_COND_US:%.*]] +; CHECK: for.cond.us: +; CHECK-NEXT: [[RES_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US_SPLIT]] ], [ [[ADD_US:%.*]], [[TMP6:%.*]] ] +; CHECK-NEXT: [[I_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US_SPLIT]] ], [ [[INC_US:%.*]], [[TMP6]] ] +; CHECK-NEXT: [[CMP_US:%.*]] = icmp slt i32 [[I_US]], [[N]] +; CHECK-NEXT: br i1 [[CMP_US]], label [[FOR_BODY_US:%.*]], label [[FOR_COND_CLEANUP_SPLIT_US_SPLIT:%.*]] +; CHECK: for.body.us: +; CHECK-NEXT: br label [[TMP4:%.*]] +; CHECK: 4: +; CHECK-NEXT: br label [[TMP5:%.*]] +; CHECK: 5: +; CHECK-NEXT: [[UNSWITCHED_SELECT_US:%.*]] = phi i32 [ [[I_US]], [[TMP4]] ] +; CHECK-NEXT: br label [[TMP6]] +; CHECK: 6: +; CHECK-NEXT: [[ADD_US]] = add nuw nsw i32 24, [[RES_US]] +; CHECK-NEXT: [[INC_US]] = add nuw nsw i32 [[I_US]], 1 +; CHECK-NEXT: br label [[FOR_COND_US]] +; CHECK: for.cond.cleanup.split.us.split: +; CHECK-NEXT: [[RES_LCSSA_US:%.*]] = phi i32 [ [[RES_US]], [[FOR_COND_US]] ] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_SPLIT_US]] +; CHECK: for.cond.cleanup.split.us: +; CHECK-NEXT: [[DOTUS_PHI12:%.*]] = phi i32 [ [[RES_LCSSA_US]], [[FOR_COND_CLEANUP_SPLIT_US_SPLIT]] ], [ [[RES_LCSSA_US_US]], [[FOR_COND_CLEANUP_SPLIT_US_SPLIT_US]] ] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: [[COND2_FR:%.*]] = freeze i1 [[COND2]] +; CHECK-NEXT: br i1 [[COND2_FR]], label [[ENTRY_SPLIT_SPLIT_US:%.*]], label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split.us: +; CHECK-NEXT: br label [[FOR_COND_US1:%.*]] +; CHECK: for.cond.us1: +; CHECK-NEXT: [[RES_US2:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_SPLIT_US]] ], [ [[ADD_US7:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[I_US3:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_SPLIT_US]] ], [ [[INC_US8:%.*]], [[TMP9]] ] +; CHECK-NEXT: [[CMP_US4:%.*]] = icmp slt i32 [[I_US3]], [[N]] +; CHECK-NEXT: br i1 [[CMP_US4]], label [[FOR_BODY_US5:%.*]], label [[FOR_COND_CLEANUP_SPLIT_SPLIT_US:%.*]] +; CHECK: for.body.us5: +; CHECK-NEXT: br label [[TMP7:%.*]] +; CHECK: 7: +; CHECK-NEXT: br label [[TMP8:%.*]] +; CHECK: 8: +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[UNSWITCHED_SELECT_US6:%.*]] = phi i32 [ 42, [[TMP8]] ] +; CHECK-NEXT: [[ADD_US7]] = add nuw nsw i32 [[UNSWITCHED_SELECT_US6]], [[RES_US2]] +; CHECK-NEXT: [[INC_US8]] = add nuw nsw i32 [[I_US3]], 1 +; CHECK-NEXT: br label [[FOR_COND_US1]] +; CHECK: for.cond.cleanup.split.split.us: +; CHECK-NEXT: [[RES_LCSSA_US9:%.*]] = phi i32 [ [[RES_US2]], [[FOR_COND_US1]] ] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_SPLIT:%.*]] +; CHECK: entry.split.split: ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: -; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_BODY:%.*]] ] -; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_SPLIT]] ], [ [[ADD:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_SPLIT]] ], [ [[INC:%.*]], [[TMP11]] ] ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP_SPLIT_SPLIT:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[SELECT1:%.*]] = select i1 [[COND]], i32 [[I]], i32 42 -; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[COND2]], i32 [[SELECT1]], i32 24 -; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[SELECT2]], [[RES]] +; CHECK-NEXT: br label [[TMP10:%.*]] +; CHECK: 10: +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[ADD]] = add nuw nsw i32 24, [[RES]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I]], 1 ; CHECK-NEXT: br label [[FOR_COND]] -; CHECK: for.cond.cleanup: +; CHECK: for.cond.cleanup.split.split: ; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i32 [ [[RES]], [[FOR_COND]] ] -; CHECK-NEXT: ret i32 [[RES_LCSSA]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_SPLIT]] +; CHECK: for.cond.cleanup.split: +; CHECK-NEXT: [[DOTUS_PHI10:%.*]] = phi i32 [ [[RES_LCSSA]], [[FOR_COND_CLEANUP_SPLIT_SPLIT]] ], [ [[RES_LCSSA_US9]], [[FOR_COND_CLEANUP_SPLIT_SPLIT_US]] ] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: [[DOTUS_PHI:%.*]] = phi i32 [ [[DOTUS_PHI10]], [[FOR_COND_CLEANUP_SPLIT]] ], [ [[DOTUS_PHI12]], [[FOR_COND_CLEANUP_SPLIT_US]] ] +; CHECK-NEXT: ret i32 [[DOTUS_PHI]] ; entry: br label %for.cond @@ -204,27 +408,60 @@ ; CHECK-LABEL: define i32 @select_in_if ; CHECK-SAME: (i32 [[N:%.*]], i1 [[COND:%.*]]) { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[COND]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[FOR_COND_US:%.*]] +; CHECK: for.cond.us: +; CHECK-NEXT: [[RES_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[ADD_US:%.*]], [[FOR_BODY_END_US:%.*]] ] +; CHECK-NEXT: [[I_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[INC_US:%.*]], [[FOR_BODY_END_US]] ] +; CHECK-NEXT: [[CMP_US:%.*]] = icmp slt i32 [[I_US]], [[N]] +; CHECK-NEXT: br i1 [[CMP_US]], label [[FOR_BODY_US:%.*]], label [[FOR_COND_CLEANUP_SPLIT_US:%.*]] +; CHECK: for.body.us: +; CHECK-NEXT: [[UREM_US:%.*]] = urem i32 [[I_US]], 2 +; CHECK-NEXT: [[IF_COND_US:%.*]] = icmp eq i32 [[UREM_US]], 0 +; CHECK-NEXT: br i1 [[IF_COND_US]], label [[FOR_BODY_IF_US:%.*]], label [[FOR_BODY_END_US]] +; CHECK: for.body.if.us: +; CHECK-NEXT: br label [[TMP0:%.*]] +; CHECK: for.body.end.us: +; CHECK-NEXT: [[P_US:%.*]] = phi i32 [ [[UNSWITCHED_SELECT_US:%.*]], [[TMP1:%.*]] ], [ 24, [[FOR_BODY_US]] ] +; CHECK-NEXT: [[ADD_US]] = add nuw nsw i32 [[P_US]], [[RES_US]] +; CHECK-NEXT: [[INC_US]] = add nuw nsw i32 [[I_US]], 1 +; CHECK-NEXT: br label [[FOR_COND_US]] +; CHECK: 0: +; CHECK-NEXT: br label [[TMP1]] +; CHECK: 1: +; CHECK-NEXT: [[UNSWITCHED_SELECT_US]] = phi i32 [ [[I_US]], [[TMP0]] ] +; CHECK-NEXT: br label [[FOR_BODY_END_US]] +; CHECK: for.cond.cleanup.split.us: +; CHECK-NEXT: [[RES_LCSSA_US:%.*]] = phi i32 [ [[RES_US]], [[FOR_COND_US]] ] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] +; CHECK: entry.split: ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: -; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_BODY_END:%.*]] ] -; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY_END]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[ADD:%.*]], [[FOR_BODY_END:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[INC:%.*]], [[FOR_BODY_END]] ] ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP_SPLIT:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[UREM:%.*]] = urem i32 [[I]], 2 ; CHECK-NEXT: [[IF_COND:%.*]] = icmp eq i32 [[UREM]], 0 ; CHECK-NEXT: br i1 [[IF_COND]], label [[FOR_BODY_IF:%.*]], label [[FOR_BODY_END]] ; CHECK: for.body.if: -; CHECK-NEXT: [[COND1:%.*]] = select i1 [[COND]], i32 [[I]], i32 42 +; CHECK-NEXT: br label [[TMP2:%.*]] +; CHECK: 2: ; CHECK-NEXT: br label [[FOR_BODY_END]] ; CHECK: for.body.end: -; CHECK-NEXT: [[P:%.*]] = phi i32 [ [[COND1]], [[FOR_BODY_IF]] ], [ 24, [[FOR_BODY]] ] +; CHECK-NEXT: [[P:%.*]] = phi i32 [ 42, [[TMP2]] ], [ 24, [[FOR_BODY]] ] ; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[P]], [[RES]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I]], 1 ; CHECK-NEXT: br label [[FOR_COND]] -; CHECK: for.cond.cleanup: +; CHECK: for.cond.cleanup.split: ; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i32 [ [[RES]], [[FOR_COND]] ] -; CHECK-NEXT: ret i32 [[RES_LCSSA]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: [[DOTUS_PHI:%.*]] = phi i32 [ [[RES_LCSSA]], [[FOR_COND_CLEANUP_SPLIT]] ], [ [[RES_LCSSA_US]], [[FOR_COND_CLEANUP_SPLIT_US]] ] +; CHECK-NEXT: ret i32 [[DOTUS_PHI]] ; entry: br label %for.cond @@ -258,30 +495,66 @@ ; CHECK-LABEL: define i32 @select_in_if_else ; CHECK-SAME: (i32 [[N:%.*]], i1 [[COND:%.*]]) { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[COND]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[FOR_COND_US:%.*]] +; CHECK: for.cond.us: +; CHECK-NEXT: [[RES_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[ADD_US:%.*]], [[FOR_BODY_END_US:%.*]] ] +; CHECK-NEXT: [[I_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[INC_US:%.*]], [[FOR_BODY_END_US]] ] +; CHECK-NEXT: [[CMP_US:%.*]] = icmp slt i32 [[I_US]], [[N]] +; CHECK-NEXT: br i1 [[CMP_US]], label [[FOR_BODY_US:%.*]], label [[FOR_COND_CLEANUP_SPLIT_US:%.*]] +; CHECK: for.body.us: +; CHECK-NEXT: [[UREM_US:%.*]] = urem i32 [[I_US]], 2 +; CHECK-NEXT: [[IF_COND_US:%.*]] = icmp eq i32 [[UREM_US]], 0 +; CHECK-NEXT: br i1 [[IF_COND_US]], label [[FOR_BODY_IF_US:%.*]], label [[FOR_BODY_ELSE_US:%.*]] +; CHECK: for.body.else.us: +; CHECK-NEXT: br label [[TMP0:%.*]] +; CHECK: for.body.if.us: +; CHECK-NEXT: [[COND1A_US:%.*]] = select i1 true, i32 [[I_US]], i32 42 +; CHECK-NEXT: br label [[FOR_BODY_END_US]] +; CHECK: for.body.end.us: +; CHECK-NEXT: [[P_US:%.*]] = phi i32 [ [[COND1A_US]], [[FOR_BODY_IF_US]] ], [ [[UNSWITCHED_SELECT_US:%.*]], [[TMP1:%.*]] ] +; CHECK-NEXT: [[ADD_US]] = add nuw nsw i32 [[P_US]], [[RES_US]] +; CHECK-NEXT: [[INC_US]] = add nuw nsw i32 [[I_US]], 1 +; CHECK-NEXT: br label [[FOR_COND_US]] +; CHECK: 0: +; CHECK-NEXT: br label [[TMP1]] +; CHECK: 1: +; CHECK-NEXT: [[UNSWITCHED_SELECT_US]] = phi i32 [ 24, [[TMP0]] ] +; CHECK-NEXT: br label [[FOR_BODY_END_US]] +; CHECK: for.cond.cleanup.split.us: +; CHECK-NEXT: [[RES_LCSSA_US:%.*]] = phi i32 [ [[RES_US]], [[FOR_COND_US]] ] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] +; CHECK: entry.split: ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: -; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_BODY_END:%.*]] ] -; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY_END]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[ADD:%.*]], [[FOR_BODY_END:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[INC:%.*]], [[FOR_BODY_END]] ] ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP_SPLIT:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[UREM:%.*]] = urem i32 [[I]], 2 ; CHECK-NEXT: [[IF_COND:%.*]] = icmp eq i32 [[UREM]], 0 ; CHECK-NEXT: br i1 [[IF_COND]], label [[FOR_BODY_IF:%.*]], label [[FOR_BODY_ELSE:%.*]] ; CHECK: for.body.if: -; CHECK-NEXT: [[COND1A:%.*]] = select i1 [[COND]], i32 [[I]], i32 42 +; CHECK-NEXT: [[COND1A:%.*]] = select i1 false, i32 [[I]], i32 42 ; CHECK-NEXT: br label [[FOR_BODY_END]] ; CHECK: for.body.else: -; CHECK-NEXT: [[COND1B:%.*]] = select i1 [[COND]], i32 24, i32 [[I]] +; CHECK-NEXT: br label [[TMP2:%.*]] +; CHECK: 2: ; CHECK-NEXT: br label [[FOR_BODY_END]] ; CHECK: for.body.end: -; CHECK-NEXT: [[P:%.*]] = phi i32 [ [[COND1A]], [[FOR_BODY_IF]] ], [ [[COND1B]], [[FOR_BODY_ELSE]] ] +; CHECK-NEXT: [[P:%.*]] = phi i32 [ [[COND1A]], [[FOR_BODY_IF]] ], [ [[I]], [[TMP2]] ] ; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[P]], [[RES]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I]], 1 ; CHECK-NEXT: br label [[FOR_COND]] -; CHECK: for.cond.cleanup: +; CHECK: for.cond.cleanup.split: ; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i32 [ [[RES]], [[FOR_COND]] ] -; CHECK-NEXT: ret i32 [[RES_LCSSA]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: [[DOTUS_PHI:%.*]] = phi i32 [ [[RES_LCSSA]], [[FOR_COND_CLEANUP_SPLIT]] ], [ [[RES_LCSSA_US]], [[FOR_COND_CLEANUP_SPLIT_US]] ] +; CHECK-NEXT: ret i32 [[DOTUS_PHI]] ; entry: br label %for.cond @@ -324,21 +597,56 @@ ; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP17_NOT]], [[CMP215_NOT]] ; CHECK-NEXT: br i1 [[OR_COND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]] ; CHECK: for.cond1.preheader.us.preheader: +; CHECK-NEXT: br i1 [[COND]], label [[FOR_COND1_PREHEADER_US_PREHEADER_SPLIT_US:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER_SPLIT:%.*]] +; CHECK: for.cond1.preheader.us.preheader.split.us: +; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US_US:%.*]] +; CHECK: for.cond1.preheader.us.us: +; CHECK-NEXT: [[I_018_US_US:%.*]] = phi i32 [ [[INC7_US_US:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER_SPLIT_US]] ] +; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US_SPLIT_US_US:%.*]] +; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us.us: +; CHECK-NEXT: [[INC7_US_US]] = add nuw i32 [[I_018_US_US]], 1 +; CHECK-NEXT: [[EXITCOND21_NOT_US:%.*]] = icmp eq i32 [[INC7_US_US]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND21_NOT_US]], label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT_US:%.*]], label [[FOR_COND1_PREHEADER_US_US]] +; CHECK: for.cond1.preheader.us.split.us.us: +; CHECK-NEXT: br label [[FOR_BODY4_US_US_US:%.*]] +; CHECK: for.body4.us.us.us: +; CHECK-NEXT: [[J_016_US_US_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_US_SPLIT_US_US]] ], [ [[INC_US_US_US:%.*]], [[TMP1:%.*]] ] +; CHECK-NEXT: br label [[TMP0:%.*]] +; CHECK: 0: +; CHECK-NEXT: br label [[TMP1]] +; CHECK: 1: +; CHECK-NEXT: [[UNSWITCHED_SELECT_US_US:%.*]] = phi i32 [ [[I_018_US_US]], [[TMP0]] ] +; CHECK-NEXT: tail call void @bar(i32 noundef [[UNSWITCHED_SELECT_US_US]]) +; CHECK-NEXT: [[INC_US_US_US]] = add nuw i32 [[J_016_US_US_US]], 1 +; CHECK-NEXT: [[EXITCOND_NOT_US_US:%.*]] = icmp eq i32 [[INC_US_US_US]], [[M]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT_US_US]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_SPLIT_US_US:%.*]], label [[FOR_BODY4_US_US_US]] +; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us.split.us.us: +; CHECK-NEXT: br label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_US]] +; CHECK: for.cond.cleanup.loopexit.split.us: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] +; CHECK: for.cond1.preheader.us.preheader.split: ; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]] ; CHECK: for.cond1.preheader.us: -; CHECK-NEXT: [[I_018_US:%.*]] = phi i32 [ [[INC7_US:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ] +; CHECK-NEXT: [[I_018_US:%.*]] = phi i32 [ [[INC7_US:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER_SPLIT]] ] +; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US_SPLIT:%.*]] +; CHECK: for.cond1.preheader.us.split: ; CHECK-NEXT: br label [[FOR_BODY4_US:%.*]] ; CHECK: for.body4.us: -; CHECK-NEXT: [[J_016_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_US]] ], [ [[INC_US:%.*]], [[FOR_BODY4_US]] ] -; CHECK-NEXT: [[COND5_US:%.*]] = select i1 [[COND]], i32 [[I_018_US]], i32 [[J_016_US]] -; CHECK-NEXT: tail call void @bar(i32 noundef [[COND5_US]]) +; CHECK-NEXT: [[J_016_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_US_SPLIT]] ], [ [[INC_US:%.*]], [[TMP2:%.*]] ] +; CHECK-NEXT: br label [[TMP2]] +; CHECK: 2: +; CHECK-NEXT: tail call void @bar(i32 noundef [[J_016_US]]) ; CHECK-NEXT: [[INC_US]] = add nuw i32 [[J_016_US]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_US]], [[M]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]], label [[FOR_BODY4_US]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_SPLIT:%.*]], label [[FOR_BODY4_US]] +; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us.split: +; CHECK-NEXT: br label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] ; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us: ; CHECK-NEXT: [[INC7_US]] = add nuw i32 [[I_018_US]], 1 ; CHECK-NEXT: [[EXITCOND21_NOT:%.*]] = icmp eq i32 [[INC7_US]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND21_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_COND1_PREHEADER_US]] +; CHECK-NEXT: br i1 [[EXITCOND21_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT:%.*]], label [[FOR_COND1_PREHEADER_US]] +; CHECK: for.cond.cleanup.loopexit.split: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: @@ -385,14 +693,35 @@ ; CHECK-NEXT: [[I_021_US:%.*]] = phi i32 [ [[INC9_US:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ] ; CHECK-NEXT: [[REM_US:%.*]] = and i32 [[I_021_US]], 1 ; CHECK-NEXT: [[CMP5_US:%.*]] = icmp eq i32 [[REM_US]], 0 +; CHECK-NEXT: [[CMP5_US_FR:%.*]] = freeze i1 [[CMP5_US]] +; CHECK-NEXT: br i1 [[CMP5_US_FR]], label [[FOR_COND1_PREHEADER_US_SPLIT_US:%.*]], label [[FOR_COND1_PREHEADER_US_SPLIT:%.*]] +; CHECK: for.cond1.preheader.us.split.us: +; CHECK-NEXT: br label [[FOR_BODY4_US_US:%.*]] +; CHECK: for.body4.us.us: +; CHECK-NEXT: [[J_019_US_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_US_SPLIT_US]] ], [ [[INC_US_US:%.*]], [[TMP1:%.*]] ] +; CHECK-NEXT: br label [[TMP0:%.*]] +; CHECK: 0: +; CHECK-NEXT: br label [[TMP1]] +; CHECK: 1: +; CHECK-NEXT: [[UNSWITCHED_SELECT_US:%.*]] = phi i32 [ [[I_021_US]], [[TMP0]] ] +; CHECK-NEXT: tail call void @bar(i32 noundef [[UNSWITCHED_SELECT_US]]) +; CHECK-NEXT: [[INC_US_US]] = add nuw i32 [[J_019_US_US]], 1 +; CHECK-NEXT: [[EXITCOND_NOT_US:%.*]] = icmp eq i32 [[INC_US_US]], [[M]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT_US]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_SPLIT_US:%.*]], label [[FOR_BODY4_US_US]] +; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us.split.us: +; CHECK-NEXT: br label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] +; CHECK: for.cond1.preheader.us.split: ; CHECK-NEXT: br label [[FOR_BODY4_US:%.*]] ; CHECK: for.body4.us: -; CHECK-NEXT: [[J_019_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_US]] ], [ [[INC_US:%.*]], [[FOR_BODY4_US]] ] -; CHECK-NEXT: [[COND7_US:%.*]] = select i1 [[CMP5_US]], i32 [[I_021_US]], i32 [[J_019_US]] -; CHECK-NEXT: tail call void @bar(i32 noundef [[COND7_US]]) +; CHECK-NEXT: [[J_019_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_US_SPLIT]] ], [ [[INC_US:%.*]], [[TMP2:%.*]] ] +; CHECK-NEXT: br label [[TMP2]] +; CHECK: 2: +; CHECK-NEXT: tail call void @bar(i32 noundef [[J_019_US]]) ; CHECK-NEXT: [[INC_US]] = add nuw i32 [[J_019_US]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_US]], [[M]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]], label [[FOR_BODY4_US]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_SPLIT:%.*]], label [[FOR_BODY4_US]] +; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us.split: +; CHECK-NEXT: br label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] ; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us: ; CHECK-NEXT: [[INC9_US]] = add nuw i32 [[I_021_US]], 1 ; CHECK-NEXT: [[EXITCOND24_NOT:%.*]] = icmp eq i32 [[INC9_US]], [[N]] diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-trivial-select.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-trivial-select.ll --- a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-trivial-select.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-trivial-select.ll @@ -88,21 +88,34 @@ ; CHECK-LABEL: @unswitch_trivial_select_cmp_outside( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[X:%.*]], 100 -; CHECK-NEXT: br i1 [[C]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK-NEXT: [[C_FR:%.*]] = freeze i1 [[C]] +; CHECK-NEXT: br i1 [[C_FR]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split.us: ; CHECK-NEXT: br label [[LOOP_US:%.*]] ; CHECK: loop.us: -; CHECK-NEXT: [[P_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ 35, [[LOOP_US]] ] -; CHECK-NEXT: br label [[LOOP_US]] +; CHECK-NEXT: [[P_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ 35, [[TMP1:%.*]] ] +; CHECK-NEXT: br label [[TMP0:%.*]] +; CHECK: 0: +; CHECK-NEXT: br label [[TMP1]] +; CHECK: 1: +; CHECK-NEXT: [[UNSWITCHED_SELECT_US:%.*]] = phi i1 [ true, [[TMP0]] ] +; CHECK-NEXT: br i1 [[UNSWITCHED_SELECT_US]], label [[LOOP_US]], label [[EXIT_SPLIT_US:%.*]] +; CHECK: exit.split.us: +; CHECK-NEXT: [[LCSSA_US:%.*]] = phi i32 [ [[P_US]], [[TMP1]] ] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[P:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ] -; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 false, i1 true, i1 false -; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK-NEXT: [[P:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ 35, [[TMP2:%.*]] ] +; CHECK-NEXT: br label [[TMP2]] +; CHECK: 2: +; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT_SPLIT:%.*]] +; CHECK: exit.split: +; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ [[P]], [[TMP2]] ] +; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ [[P]], [[LOOP]] ] -; CHECK-NEXT: ret i32 [[LCSSA]] +; CHECK-NEXT: [[DOTUS_PHI:%.*]] = phi i32 [ [[LCSSA]], [[EXIT_SPLIT]] ], [ [[LCSSA_US]], [[EXIT_SPLIT_US]] ] +; CHECK-NEXT: ret i32 [[DOTUS_PHI]] ; entry: %c = icmp ult i32 %x, 100