diff --git a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp --- a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -99,6 +99,12 @@ Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden); +static cl::opt + MSSAThreshold("loop-unswitch-memoryssa-threshold", + cl::desc("Max number of memory uses to explore during " + "partial unswitching analysis"), + cl::init(100), cl::Hidden); + namespace { class LUAnalysisCache { @@ -185,6 +191,7 @@ Loop *CurrentLoop = nullptr; DominatorTree *DT = nullptr; MemorySSA *MSSA = nullptr; + AAResults *AA = nullptr; std::unique_ptr MSSAU; BasicBlock *LoopHeader = nullptr; BasicBlock *LoopPreheader = nullptr; @@ -249,19 +256,22 @@ bool tryTrivialLoopUnswitch(bool &Changed); bool unswitchIfProfitable(Value *LoopCond, Constant *Val, - Instruction *TI = nullptr); + Instruction *TI = nullptr, + ArrayRef ToDuplicate = {}); void unswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, BasicBlock *ExitBlock, Instruction *TI); void unswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L, - Instruction *TI); + Instruction *TI, + ArrayRef ToDuplicate = {}); void rewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, Constant *Val, bool IsEqual); - void emitPreheaderBranchOnCondition(Value *LIC, Constant *Val, - BasicBlock *TrueDest, - BasicBlock *FalseDest, - BranchInst *OldBranch, Instruction *TI); + void + emitPreheaderBranchOnCondition(Value *LIC, Constant *Val, + BasicBlock *TrueDest, BasicBlock *FalseDest, + BranchInst *OldBranch, Instruction *TI, + ArrayRef ToDuplicate = {}); void simplifyCode(std::vector &Worklist, Loop *L); @@ -528,6 +538,7 @@ LI = &getAnalysis().getLoopInfo(); LPM = &LPMRef; DT = &getAnalysis().getDomTree(); + AA = &getAnalysis().getAAResults(); if (EnableMSSALoopDependency) { MSSA = &getAnalysis().getMSSA(); MSSAU = std::make_unique(MSSA); @@ -629,6 +640,129 @@ return false; } +/// Check if the loop header has a conditional branch that is not +/// loop-invariant, because it involves load instructions. If all paths from +/// either the true or false successor to the header or loop exists do not +/// modify the memory feeding the condition, perform 'partial unswitching'. That +/// is, duplicate the instructions feeding the condition in the pre-header. Then +/// unswitch on the duplicated condition. The condition is now known in the +/// unswitched version for the 'invariant' path through the original loop. +/// +/// If the branch condition of the header is partially invariant, return a pair +/// containing the instructions to duplicate and a boolean Constant to update +/// the condition in the loops created for the true or false successors. +static std::pair, Constant *> +hasPartialIVCondition(Loop *L, MemorySSA &MSSA, AAResults *AA) { + SmallVector ToDuplicate; + + auto *TI = dyn_cast(L->getHeader()->getTerminator()); + if (!TI || !TI->isConditional()) + return {}; + + auto *CondI = dyn_cast(TI->getCondition()); + // The case with the condition outside the loop should already be handled + // earlier. + if (!CondI || !L->contains(CondI)) + return {}; + + ToDuplicate.push_back(CondI); + + SmallVector WorkList; + WorkList.append(CondI->op_begin(), CondI->op_end()); + + SmallVector AccessesToCheck; + SmallVector AccessedLocs; + while (!WorkList.empty()) { + Instruction *I = dyn_cast(WorkList.pop_back_val()); + if (!I || !L->contains(I)) + continue; + + // TODO: support additional instructions. + if (!isa(I) && !isa(I)) + return {}; + + // Do not duplicate volatile loads. + if (auto *LI = dyn_cast(I)) + if (LI->isVolatile()) + return {}; + + ToDuplicate.push_back(I); + if (auto *MemUse = dyn_cast_or_null(MSSA.getMemoryAccess(I))) { + // Queue the defining access to check for alias checks. + AccessesToCheck.push_back(MemUse->getDefiningAccess()); + AccessedLocs.push_back(MemoryLocation::get(I)); + } + WorkList.append(I->op_begin(), I->op_end()); + } + + if (ToDuplicate.size() <= 1) + return {}; + + auto HasNoClobbersOnPath = + [L, AA, &AccessedLocs](BasicBlock *Succ, BasicBlock *Header, + SmallVector AccessesToCheck) { + // First, collect all blocks in the loop that are on a patch from Succ + // to the header. + SmallVector WorkList; + WorkList.push_back(Succ); + WorkList.push_back(Header); + SmallPtrSet Seen; + Seen.insert(Header); + while (!WorkList.empty()) { + BasicBlock *Current = WorkList.pop_back_val(); + if (!L->contains(Current)) + continue; + const auto &SeenIns = Seen.insert(Current); + if (!SeenIns.second) + continue; + + WorkList.append(succ_begin(Current), succ_end(Current)); + } + + // Next, check if there are any MemoryDefs that are on the path through + // the loop (in the Seen set) and they may-alias any of the locations in + // AccessedLocs. If that is the case, they may modify the condition and + // partial unswitching is not possible. + SmallPtrSet SeenAccesses; + while (!AccessesToCheck.empty()) { + MemoryAccess *Current = AccessesToCheck.pop_back_val(); + auto SeenI = SeenAccesses.insert(Current); + if (!SeenI.second || !Seen.contains(Current->getBlock())) + continue; + + // Bail out if exceeded the threshold. + if (SeenAccesses.size() >= MSSAThreshold) + return false; + + // MemoryUse are read-only accesses. + if (isa(Current)) + continue; + + // For a MemoryDef, check if is aliases any of the location feeding + // the original condition. + if (auto *CurrentDef = dyn_cast(Current)) { + if (any_of(AccessedLocs, [AA, CurrentDef](MemoryLocation &Loc) { + return isModSet( + AA->getModRefInfo(CurrentDef->getMemoryInst(), Loc)); + })) + return false; + } + + for (Use &U : Current->uses()) + AccessesToCheck.push_back(cast(U.getUser())); + } + + return true; + }; + + if (HasNoClobbersOnPath(TI->getSuccessor(0), L->getHeader(), AccessesToCheck)) + return {ToDuplicate, ConstantInt::getTrue(TI->getContext())}; + if (HasNoClobbersOnPath(TI->getSuccessor(1), L->getHeader(), AccessesToCheck)) + return {ToDuplicate, ConstantInt::getFalse(TI->getContext())}; + + return {}; +} + /// Do actual work and unswitch loop if possible and profitable. bool LoopUnswitch::processCurrentLoop() { bool Changed = false; @@ -828,6 +962,26 @@ } } } + + // Check if there is a header condition that is invariant along the patch from + // either the true or false successors to the header. This allows unswitching + // conditions depending on memory accesses, if there's a path not clobbering + // the memory locations. Check if this transform has been disabled using + // metadata, to avoid unswitching the same loop multiple times. + if (MSSA && + !findOptionMDForLoop(CurrentLoop, "llvm.loop.unswitch.partial.disable")) { + auto ToDuplicate = hasPartialIVCondition(CurrentLoop, *MSSA, AA); + if (!ToDuplicate.first.empty()) { + ++NumBranches; + unswitchIfProfitable(ToDuplicate.first[0], ToDuplicate.second, + CurrentLoop->getHeader()->getTerminator(), + ToDuplicate.first); + + RedoLoop = false; + return true; + } + } + return Changed; } @@ -885,7 +1039,8 @@ /// simplify the loop. If we decide that this is profitable, /// unswitch the loop, reprocess the pieces, then return true. bool LoopUnswitch::unswitchIfProfitable(Value *LoopCond, Constant *Val, - Instruction *TI) { + Instruction *TI, + ArrayRef ToDuplicate) { // Check to see if it would be profitable to unswitch current loop. if (!BranchesInfo.costAllowsUnswitching()) { LLVM_DEBUG(dbgs() << "NOT unswitching loop %" @@ -905,31 +1060,65 @@ return false; } - unswitchNontrivialCondition(LoopCond, Val, CurrentLoop, TI); + unswitchNontrivialCondition(LoopCond, Val, CurrentLoop, TI, ToDuplicate); return true; } /// Emit a conditional branch on two values if LIC == Val, branch to TrueDst, /// otherwise branch to FalseDest. Insert the code immediately before OldBranch /// and remove (but not erase!) it from the function. -void LoopUnswitch::emitPreheaderBranchOnCondition(Value *LIC, Constant *Val, - BasicBlock *TrueDest, - BasicBlock *FalseDest, - BranchInst *OldBranch, - Instruction *TI) { +void LoopUnswitch::emitPreheaderBranchOnCondition( + Value *LIC, Constant *Val, BasicBlock *TrueDest, BasicBlock *FalseDest, + BranchInst *OldBranch, Instruction *TI, + ArrayRef ToDuplicate) { assert(OldBranch->isUnconditional() && "Preheader is not split correctly"); assert(TrueDest != FalseDest && "Branch targets should be different"); + // Insert a conditional branch on LIC to the two preheaders. The original // code is the true version and the new code is the false version. Value *BranchVal = LIC; bool Swapped = false; - if (!isa(Val) || - Val->getType() != Type::getInt1Ty(LIC->getContext())) - BranchVal = new ICmpInst(OldBranch, ICmpInst::ICMP_EQ, LIC, Val); - else if (Val != ConstantInt::getTrue(Val->getContext())) { - // We want to enter the new loop when the condition is true. - std::swap(TrueDest, FalseDest); - Swapped = true; + + if (!ToDuplicate.empty()) { + ValueToValueMapTy Old2New; + for (Instruction *I : reverse(ToDuplicate)) { + auto *New = I->clone(); + New->insertBefore(OldBranch); + RemapInstruction(New, Old2New, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + Old2New[I] = New; + + if (MSSAU) { + MemorySSA *MSSA = MSSAU->getMemorySSA(); + auto *MemA = dyn_cast_or_null(MSSA->getMemoryAccess(I)); + if (!MemA) + continue; + + Loop *L = LI->getLoopFor(I->getParent()); + auto *DefiningAccess = MemA->getDefiningAccess(); + // If the defining access is a MemoryPhi in the header, get the incoming + // value for the pre-header as defining access. + if (DefiningAccess->getBlock() == I->getParent()) { + if (auto *MemPhi = dyn_cast(DefiningAccess)) { + DefiningAccess = + MemPhi->getIncomingValueForBlock(L->getLoopPreheader()); + } + } + MSSAU->createMemoryAccessInBB(New, DefiningAccess, New->getParent(), + MemorySSA::BeforeTerminator); + } + } + BranchVal = Old2New[ToDuplicate[0]]; + } else { + + if (!isa(Val) || + Val->getType() != Type::getInt1Ty(LIC->getContext())) + BranchVal = new ICmpInst(OldBranch, ICmpInst::ICMP_EQ, LIC, Val); + else if (Val != ConstantInt::getTrue(Val->getContext())) { + // We want to enter the new loop when the condition is true. + std::swap(TrueDest, FalseDest); + Swapped = true; + } } // Old branch will be removed, so save its parent and successor to update the @@ -1213,8 +1402,9 @@ /// We determined that the loop is profitable to unswitch when LIC equal Val. /// Split it into loop versions and test the condition outside of either loop. /// Return the loops created as Out1/Out2. -void LoopUnswitch::unswitchNontrivialCondition(Value *LIC, Constant *Val, - Loop *L, Instruction *TI) { +void LoopUnswitch::unswitchNontrivialCondition( + Value *LIC, Constant *Val, Loop *L, Instruction *TI, + ArrayRef ToDuplicate) { Function *F = LoopHeader->getParent(); LLVM_DEBUG(dbgs() << "loop-unswitch: Unswitching loop %" << LoopHeader->getName() << " [" << L->getBlocks().size() @@ -1346,7 +1536,7 @@ // Emit the new branch that selects between the two versions of this loop. emitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR, - TI); + TI, ToDuplicate); if (MSSAU) { // Update MemoryPhis in Exit blocks. MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMap, *DT); @@ -1368,17 +1558,38 @@ // iteration. WeakTrackingVH LICHandle(LIC); - // Now we rewrite the original code to know that the condition is true and the - // new code to know that the condition is false. - rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/false); - - // It's possible that simplifying one loop could cause the other to be - // changed to another value or a constant. If its a constant, don't simplify - // it. - if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop && - LICHandle && !isa(LICHandle)) - rewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, - /*IsEqual=*/true); + if (ToDuplicate.empty()) { + // Now we rewrite the original code to know that the condition is true and + // the new code to know that the condition is false. + rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/false); + + // It's possible that simplifying one loop could cause the other to be + // changed to another value or a constant. If its a constant, don't + // simplify it. + if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop && + LICHandle && !isa(LICHandle)) + rewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, + /*IsEqual=*/true); + } else { + // Partial unswitching. Update the condition in the right loop with the + // constant. + auto *CC = cast(Val); + if (CC->isOneValue()) { + rewriteLoopBodyWithConditionConstant(NewLoop, VMap[LIC], Val, + /*IsEqual=*/true); + } else + rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/true); + + // Mark the new loop as partially unswitched, to avoid unswitching on the + // same condition again. + auto &Context = NewLoop->getHeader()->getContext(); + MDNode *DisableUnswitchMD = MDNode::get( + Context, MDString::get(Context, "llvm.loop.unswitch.partial.disable")); + MDNode *NewLoopID = makePostTransformationMetadata( + Context, L->getLoopID(), {"llvm.loop.unswitch.partial"}, + {DisableUnswitchMD}); + NewLoop->setLoopID(NewLoopID); + } if (MSSA && VerifyMemorySSA) MSSA->verifyMemorySSA(); diff --git a/llvm/test/Transforms/LoopUnswitch/partial-unswitch-mssa-threshold.ll b/llvm/test/Transforms/LoopUnswitch/partial-unswitch-mssa-threshold.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopUnswitch/partial-unswitch-mssa-threshold.ll @@ -0,0 +1,48 @@ +; RUN: opt -loop-unswitch -loop-unswitch-memoryssa-threshold=0 -memssa-check-limit=1 -enable-new-pm=0 -S %s | FileCheck --check-prefix=THRESHOLD-0 %s +; RUN: opt -loop-unswitch -memssa-check-limit=1 -S -enable-new-pm=0 %s | FileCheck --check-prefix=THRESHOLD-DEFAULT %s + +; Make sure -loop-unswitch-memoryssa-threshold works. The test uses +; -memssa-check-limit=1 to effectively disable any MemorySSA optimizations +; on construction, so the test can be kept simple. + +declare void @clobber() + +; Partial unswitching is possible, because the store in %noclobber does not +; alias the load of the condition. +define i32 @partial_unswitch_true_successor_noclobber(i32* noalias %ptr.1, i32* noalias %ptr.2, i32 %N) { +; THRESHOLD-0-LABEL: @partial_unswitch_true_successor +; THRESHOLD-0: entry: +; THRESHOLD-0: br label %loop.header +; +; THRESHOLD-DEFAULT-LABEL: @partial_unswitch_true_successor +; THRESHOLD-DEFAULT-NEXT: entry: +; THRESHOLD-DEFAULT-NEXT: [[LV:%[0-9]+]] = load i32, i32* %ptr.1, align 4 +; THRESHOLD-DEFAULT-NEXT: [[C:%[0-9]+]] = icmp eq i32 [[LV]], 100 +; THRESHOLD-DEFAULT-NEXT: br i1 [[C]] +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %lv = load i32, i32* %ptr.1 + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %noclobber, label %clobber + +noclobber: + %gep.1 = getelementptr i32, i32* %ptr.2, i32 %iv + store i32 %lv, i32* %gep.1 + br label %loop.latch + +clobber: + call void @clobber() + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret i32 10 +} diff --git a/llvm/test/Transforms/LoopUnswitch/partial-unswitch.ll b/llvm/test/Transforms/LoopUnswitch/partial-unswitch.ll --- a/llvm/test/Transforms/LoopUnswitch/partial-unswitch.ll +++ b/llvm/test/Transforms/LoopUnswitch/partial-unswitch.ll @@ -5,7 +5,55 @@ define i32 @partial_unswitch_true_successor(i32* %ptr, i32 %N) { ; CHECK-LABEL: @partial_unswitch_true_successor ; CHECK-LABEL: entry: -; CHECK-NEXT: br label %loop.header +; CHECK-NEXT: [[LV:%[0-9]+]] = load i32, i32* %ptr, align 4 +; CHECK-NEXT: [[C:%[0-9]+]] = icmp eq i32 [[LV]], 100 +; CHECK-NEXT: br i1 [[C]], label %[[SPLIT_TRUE_PH:[a-z._]+]], label %[[FALSE_CRIT:[a-z._]+]] + +; CHECK: [[FALSE_CRIT]]: +; CHECK-NEXT: br label %[[FALSE_PH:[a-z.]+]] + +; CHECK: [[SPLIT_TRUE_PH]]: +; CHECK-NEXT: br label %[[TRUE_HEADER:[a-z.]+]] + +; CHECK: [[TRUE_HEADER]]: +; CHECK-NEXT: phi i32 +; CHECK-NEXT: [[TRUE_LV:%[a-z.0-9]+]] = load i32, i32* %ptr, align 4 +; CHECK-NEXT: [[TRUE_C:%[a-z.0-9]+]] = icmp eq i32 [[TRUE_LV]], 100 +; CHECK-NEXT: br i1 true, label %[[TRUE_NOCLOBBER:.+]], label %[[TRUE_CLOBBER:[a-z0-9._]+]] + +; CHECK: [[TRUE_CLOBBER]]: +; CHECK-NEXT: call +; CHECK-NEXT: br label %[[TRUE_LATCH:[a-z0-9._]+]] + +; CHECK: [[TRUE_NOCLOBBER]]: +; CHECK-NEXT: br label %[[TRUE_LATCH:[a-z0-9._]+]] + +; CHECK: [[TRUE_LATCH]]: +; CHECK-NEXT: icmp +; CHECK-NEXT: add +; CHECK-NEXT: br {{.*}} label %[[TRUE_HEADER]] + + +; CHECK: [[FALSE_PH]]: +; CHECK-NEXT: br label %[[FALSE_HEADER:[a-z.]+]] + +; CHECK: [[FALSE_HEADER]]: +; CHECK-NEXT: phi i32 +; CHECK-NEXT: [[FALSE_LV:%[a-z.0-9]+]] = load i32, i32* %ptr, align 4 +; CHECK-NEXT: [[FALSE_C:%[a-z.0-9]+]] = icmp eq i32 [[FALSE_LV]], 100 +; CHECK-NEXT: br i1 [[FALSE_C]], label %[[FALSE_NOCLOBBER:.+]], label %[[FALSE_CLOBBER:[a-z0-9._]+]] + +; CHECK: [[FALSE_NOCLOBBER]]: +; CHECK-NEXT: br label %[[FALSE_LATCH:[a-z0-9._]+]] + +; CHECK: [[FALSE_CLOBBER]]: +; CHECK-NEXT: call +; CHECK-NEXT: br label %[[FALSE_LATCH:[a-z0-9._]+]] + +; CHECK: [[FALSE_LATCH]]: +; CHECK-NEXT: icmp +; CHECK-NEXT: add +; CHECK-NEXT: br {{.*}} label %[[FALSE_HEADER]] ; entry: br label %loop.header @@ -35,7 +83,55 @@ define i32 @partial_unswitch_false_successor(i32* %ptr, i32 %N) { ; CHECK-LABEL: @partial_unswitch_false_successor ; CHECK-LABEL: entry: -; CHECK-NEXT: br label %loop.header +; CHECK-NEXT: [[LV:%[0-9]+]] = load i32, i32* %ptr, align 4 +; CHECK-NEXT: [[C:%[0-9]+]] = icmp eq i32 [[LV]], 100 +; CHECK-NEXT: br i1 [[C]], label %[[SPLIT_TRUE_PH:[a-z._]+]], label %[[FALSE_CRIT:[a-z._]+]] + +; CHECK: [[FALSE_CRIT]]: +; CHECK-NEXT: br label %[[FALSE_PH:[a-z.]+]] + +; CHECK: [[SPLIT_TRUE_PH]]: +; CHECK-NEXT: br label %[[TRUE_HEADER:[a-z.]+]] + +; CHECK: [[TRUE_HEADER]]: +; CHECK-NEXT: phi i32 +; CHECK-NEXT: [[TRUE_LV:%[a-z.0-9]+]] = load i32, i32* %ptr, align 4 +; CHECK-NEXT: [[TRUE_C:%[a-z.0-9]+]] = icmp eq i32 [[TRUE_LV]], 100 +; CHECK-NEXT: br i1 [[TRUE_C]], label %[[TRUE_CLOBBER:.+]], label %[[TRUE_NOCLOBBER:[a-z0-9._]+]] + +; CHECK: [[TRUE_NOCLOBBER]]: +; CHECK-NEXT: br label %[[TRUE_LATCH:[a-z0-9._]+]] + +; CHECK: [[TRUE_CLOBBER]]: +; CHECK-NEXT: call +; CHECK-NEXT: br label %[[TRUE_LATCH:[a-z0-9._]+]] + +; CHECK: [[TRUE_LATCH]]: +; CHECK-NEXT: icmp +; CHECK-NEXT: add +; CHECK-NEXT: br {{.*}} label %[[TRUE_HEADER]] + + +; CHECK: [[FALSE_PH]]: +; CHECK-NEXT: br label %[[FALSE_HEADER:[a-z.]+]] + +; CHECK: [[FALSE_HEADER]]: +; CHECK-NEXT: phi i32 +; CHECK-NEXT: [[FALSE_LV:%[a-z.0-9]+]] = load i32, i32* %ptr, align 4 +; CHECK-NEXT: [[FALSE_C:%[a-z.0-9]+]] = icmp eq i32 [[FALSE_LV]], 100 +; CHECK-NEXT: br i1 false, label %[[FALSE_CLOBBER:.+]], label %[[FALSE_NOCLOBBER:[a-z0-9._]+]] + +; CHECK: [[FALSE_CLOBBER]]: +; CHECK-NEXT: call +; CHECK-NEXT: br label %[[FALSE_LATCH:[a-z0-9._]+]] + +; CHECK: [[FALSE_NOCLOBBER]]: +; CHECK-NEXT: br label %[[FALSE_LATCH:[a-z0-9._]+]] + +; CHECK: [[FALSE_LATCH]]: +; CHECK-NEXT: icmp +; CHECK-NEXT: add +; CHECK-NEXT: br {{.*}} label %[[FALSE_HEADER]] ; entry: br label %loop.header @@ -65,7 +161,61 @@ define i32 @partial_unswtich_gep_load_icmp(i32** %ptr, i32 %N) { ; CHECK-LABEL: @partial_unswtich_gep_load_icmp ; CHECK-LABEL: entry: -; CHECK-NEXT: br label %loop.header +; CHECK-NEXT: [[GEP:%[a-z.0-9]+]] = getelementptr i32*, i32** %ptr, i32 1 +; CHECK-NEXT: [[LV0:%[a-z.0-9]+]] = load i32*, i32** [[GEP]] +; CHECK-NEXT: [[LV1:%[a-z.0-9]+]] = load i32, i32* [[LV0]] +; CHECK-NEXT: [[C:%[a-z.0-9]+]] = icmp eq i32 [[LV1]], 100 +; CHECK-NEXT: br i1 [[C]], label %[[SPLIT_TRUE_PH:[a-z._]+]], label %[[FALSE_CRIT:[a-z._]+]] + +; CHECK: [[FALSE_CRIT]]: +; CHECK-NEXT: br label %[[FALSE_PH:[a-z.]+]] + +; CHECK: [[SPLIT_TRUE_PH]]: +; CHECK-NEXT: br label %[[TRUE_HEADER:[a-z.]+]] + +; CHECK: [[TRUE_HEADER]]: +; CHECK-NEXT: phi i32 +; CHECK-NEXT: [[TRUE_GEP:%[a-z.0-9]+]] = getelementptr i32*, i32** %ptr, i32 1 +; CHECK-NEXT: [[TRUE_LV0:%[a-z.0-9]+]] = load i32*, i32** [[TRUE_GEP]] +; CHECK-NEXT: [[TRUE_LV1:%[a-z.0-9]+]] = load i32, i32* [[TRUE_LV0]] +; CHECK-NEXT: [[TRUE_C:%[a-z.0-9]+]] = icmp eq i32 [[TRUE_LV1]], 100 +; CHECK-NEXT: br i1 true, label %[[TRUE_NOCLOBBER:.+]], label %[[TRUE_CLOBBER:[a-z0-9._]+]] + +; CHECK: [[TRUE_CLOBBER]]: +; CHECK-NEXT: call +; CHECK-NEXT: br label %[[TRUE_LATCH:[a-z0-9._]+]] + +; CHECK: [[TRUE_NOCLOBBER]]: +; CHECK-NEXT: br label %[[TRUE_LATCH:[a-z0-9._]+]] + +; CHECK: [[TRUE_LATCH]]: +; CHECK-NEXT: icmp +; CHECK-NEXT: add +; CHECK-NEXT: br {{.*}} label %[[TRUE_HEADER]] + +; CHECK: [[FALSE_PH]]: +; CHECK-NEXT: br label %[[FALSE_HEADER:[a-z.]+]] + +; CHECK: [[FALSE_HEADER]]: +; CHECK-NEXT: phi i32 +; CHECK-NEXT: [[FALSE_GEP:%[a-z.0-9]+]] = getelementptr i32*, i32** %ptr, i32 1 +; CHECK-NEXT: [[FALSE_LV0:%[a-z.0-9]+]] = load i32*, i32** [[FALSE_GEP]] +; CHECK-NEXT: [[FALSE_LV1:%[a-z.0-9]+]] = load i32, i32* [[FALSE_LV0]] +; CHECK-NEXT: [[FALSE_C:%[a-z.0-9]+]] = icmp eq i32 [[FALSE_LV1]], 100 +; CHECK-NEXT: br i1 [[FALSE_C]], label %[[FALSE_NOCLOBBER:.+]], label %[[FALSE_CLOBBER:[a-z0-9._]+]] + +; CHECK: [[FALSE_NOCLOBBER]]: +; CHECK-NEXT: br label %[[FALSE_LATCH:[a-z0-9._]+]] + +; CHECK: [[FALSE_CLOBBER]]: +; CHECK-NEXT: call +; CHECK-NEXT: br label %[[FALSE_LATCH:[a-z0-9._]+]] + + +; CHECK: [[FALSE_LATCH]]: +; CHECK-NEXT: icmp +; CHECK-NEXT: add +; CHECK-NEXT: br {{.*}} label %[[FALSE_HEADER]] ; entry: br label %loop.header @@ -97,7 +247,63 @@ define i32 @partial_unswitch_reduction_phi(i32* %ptr, i32 %N) { ; CHECK-LABEL: @partial_unswitch_reduction_phi ; CHECK-LABEL: entry: -; CHECK-NEXT: br label %loop.header +; CHECK-NEXT: [[LV:%[0-9]+]] = load i32, i32* %ptr, align 4 +; CHECK-NEXT: [[C:%[0-9]+]] = icmp eq i32 [[LV]], 100 +; CHECK-NEXT: br i1 [[C]], label %[[SPLIT_TRUE_PH:[a-z._]+]], label %[[FALSE_CRIT:[a-z._]+]] + +; CHECK: [[FALSE_CRIT]]: +; CHECK-NEXT: br label %[[FALSE_PH:[a-z.]+]] + +; CHECK: [[SPLIT_TRUE_PH]]: +; CHECK-NEXT: br label %[[TRUE_HEADER:[a-z.]+]] + +; CHECK: [[TRUE_HEADER]]: +; CHECK-NEXT: phi i32 +; CHECK-NEXT: [[TRUE_RED:%[a-z.0-9]+]] = phi i32 [ 20, %[[SPLIT_TRUE_PH]] ], [ [[TRUE_RED_NEXT:%[a-z.0-9]+]], %[[TRUE_LATCH:[a-z.0-9]+]] +; CHECK-NEXT: [[TRUE_LV:%[a-z.0-9]+]] = load i32, i32* %ptr, align 4 +; CHECK-NEXT: [[TRUE_C:%[a-z.0-9]+]] = icmp eq i32 [[TRUE_LV]], 100 +; CHECK-NEXT: br i1 [[TRUE_C]], label %[[TRUE_CLOBBER:.+]], label %[[TRUE_NOCLOBBER:[a-z0-9._]+]] + +; CHECK: [[TRUE_NOCLOBBER]]: +; CHECK-NEXT: [[TRUE_ADD10:%.+]] = add i32 [[TRUE_RED]], 10 +; CHECK-NEXT: br label %[[TRUE_LATCH]] + +; CHECK: [[TRUE_CLOBBER]]: +; CHECK-NEXT: call +; CHECK-NEXT: [[TRUE_ADD5:%.+]] = add i32 [[TRUE_RED]], 5 +; CHECK-NEXT: br label %[[TRUE_LATCH]] + +; CHECK: [[TRUE_LATCH]]: +; CHECK-NEXT: [[TRUE_RED_NEXT]] = phi i32 [ [[TRUE_ADD5]], %[[TRUE_CLOBBER]] ], [ [[TRUE_ADD10]], %[[TRUE_NOCLOBBER]] ] +; CHECK-NEXT: icmp +; CHECK-NEXT: add +; CHECK-NEXT: br {{.*}} label %[[TRUE_HEADER]] + + +; CHECK: [[FALSE_PH]]: +; CHECK-NEXT: br label %[[FALSE_HEADER:[a-z.]+]] + +; CHECK: [[FALSE_HEADER]]: +; CHECK-NEXT: phi i32 +; CHECK-NEXT: [[FALSE_RED:%[a-z.0-9]+]] = phi i32 [ 20, %[[FALSE_PH]] ], [ [[FALSE_RED_NEXT:%[a-z.0-9]+]], %[[FALSE_LATCH:[a-z.0-9]+]] +; CHECK-NEXT: [[FALSE_LV:%[a-z.0-9]+]] = load i32, i32* %ptr, align 4 +; CHECK-NEXT: [[FALSE_C:%[a-z.0-9]+]] = icmp eq i32 [[FALSE_LV]], 100 +; CHECK-NEXT: br i1 false, label %[[FALSE_CLOBBER:.+]], label %[[FALSE_NOCLOBBER:[a-z0-9._]+]] + +; CHECK: [[FALSE_CLOBBER]]: +; CHECK-NEXT: call +; CHECK-NEXT: [[FALSE_ADD5:%.+]] = add i32 [[FALSE_RED]], 5 +; CHECK-NEXT: br label %[[FALSE_LATCH:[a-z0-9._]+]] + +; CHECK: [[FALSE_NOCLOBBER]]: +; CHECK-NEXT: [[FALSE_ADD10:%.+]] = add i32 [[FALSE_RED]], 10 +; CHECK-NEXT: br label %[[FALSE_LATCH:[a-z0-9._]+]] + +; CHECK: [[FALSE_LATCH]]: +; CHECK-NEXT: [[FALSE_RED_NEXT]] = phi i32 [ [[FALSE_ADD5]], %[[FALSE_CLOBBER]] ], [ [[FALSE_ADD10]], %[[FALSE_NOCLOBBER]] ] +; CHECK-NEXT: icmp +; CHECK-NEXT: add +; CHECK-NEXT: br {{.*}} label %[[FALSE_HEADER]] ; entry: br label %loop.header @@ -134,7 +340,59 @@ define i32 @partial_unswitch_true_successor_noclobber(i32* noalias %ptr.1, i32* noalias %ptr.2, i32 %N) { ; CHECK-LABEL: @partial_unswitch_true_successor ; CHECK-NEXT: entry: -; CHECK-NEXT: br label %loop.header +; CHECK-NEXT: [[LV:%[0-9]+]] = load i32, i32* %ptr.1, align 4 +; CHECK-NEXT: [[C:%[0-9]+]] = icmp eq i32 [[LV]], 100 +; CHECK-NEXT: br i1 [[C]], label %[[SPLIT_TRUE_PH:[a-z._]+]], label %[[FALSE_CRIT:[a-z._]+]] + +; CHECK: [[FALSE_CRIT]]: +; CHECK-NEXT: br label %[[FALSE_PH:[a-z.]+]] + +; CHECK: [[SPLIT_TRUE_PH]]: +; CHECK-NEXT: br label %[[TRUE_HEADER:[a-z.]+]] + +; CHECK: [[TRUE_HEADER]]: +; CHECK-NEXT: phi i32 +; CHECK-NEXT: [[TRUE_LV:%[a-z.0-9]+]] = load i32, i32* %ptr.1, align 4 +; CHECK-NEXT: [[TRUE_C:%[a-z.0-9]+]] = icmp eq i32 [[TRUE_LV]], 100 +; CHECK-NEXT: br i1 true, label %[[TRUE_NOCLOBBER:.+]], label %[[TRUE_CLOBBER:[a-z0-9._]+]] + +; CHECK: [[TRUE_CLOBBER]]: +; CHECK-NEXT: call +; CHECK-NEXT: br label %[[TRUE_LATCH:[a-z0-9._]+]] + +; CHECK: [[TRUE_NOCLOBBER]]: +; CHECK-NEXT: [[TRUE_GEP:%[a-z0-9._]+]] = getelementptr i32, i32* %ptr.2 +; CHECK-NEXT: store i32 [[TRUE_LV]], i32* [[TRUE_GEP]], align 4 +; CHECK-NEXT: br label %[[TRUE_LATCH:[a-z0-9._]+]] + +; CHECK: [[TRUE_LATCH]]: +; CHECK-NEXT: icmp +; CHECK-NEXT: add +; CHECK-NEXT: br {{.*}} label %[[TRUE_HEADER]] + + +; CHECK: [[FALSE_PH]]: +; CHECK-NEXT: br label %[[FALSE_HEADER:[a-z.]+]] + +; CHECK: [[FALSE_HEADER]]: +; CHECK-NEXT: phi i32 +; CHECK-NEXT: [[FALSE_LV:%[a-z.0-9]+]] = load i32, i32* %ptr.1, align 4 +; CHECK-NEXT: [[FALSE_C:%[a-z.0-9]+]] = icmp eq i32 [[FALSE_LV]], 100 +; CHECK-NEXT: br i1 [[FALSE_C]], label %[[FALSE_NOCLOBBER:.+]], label %[[FALSE_CLOBBER:[a-z0-9._]+]] + +; CHECK: [[FALSE_NOCLOBBER]]: +; CHECK-NEXT: [[FALSE_GEP:%[a-z0-9._]+]] = getelementptr i32, i32* %ptr.2 +; CHECK-NEXT: store i32 [[FALSE_LV]], i32* [[FALSE_GEP]], align 4 +; CHECK-NEXT: br label %[[FALSE_LATCH:[a-z0-9._]+]] + +; CHECK: [[FALSE_CLOBBER]]: +; CHECK-NEXT: call +; CHECK-NEXT: br label %[[FALSE_LATCH:[a-z0-9._]+]] + +; CHECK: [[FALSE_LATCH]]: +; CHECK-NEXT: icmp +; CHECK-NEXT: add +; CHECK-NEXT: br {{.*}} label %[[FALSE_HEADER]] ; entry: br label %loop.header @@ -321,9 +579,10 @@ ; duplicated load being a MemoryPHI outside the loop. define void @partial_unswitch_memssa_update(i32* noalias %ptr, i1 %c) { ; CHECK-LABEL: @partial_unswitch_memssa_update( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 %c, label %loop.ph, label %outside.clobber -; +; CHECK-LABEL: loop.ph: +; CHECK-NEXT: [[LV:%[a-z0-9]+]] = load i32, i32* %ptr, align 4 +; CHECK-NEXT: [[C:%[a-z0-9]+]] = icmp eq i32 [[LV]], 0 +; CHECK-NEXT: br i1 [[C]] entry: br i1 %c, label %loop.ph, label %outside.clobber @@ -359,8 +618,13 @@ define i32 @partial_unswitch_true_successor_preheader_insertion(i32* %ptr, i32 %N) { ; CHECK-LABEL: @partial_unswitch_true_successor_preheader_insertion( ; CHECK-NEXT: entry: -; CHECK-NEXT: %ec = icmp ne i32* %ptr, null -; CHECK-NEXT: br i1 %ec, label %loop.ph, label %exit +; CHECK-NEXT: [[EC:%[a-z]+]] = icmp ne i32* %ptr, null +; CHECK-NEXT: br i1 [[EC]], label %[[PH:[a-z0-9.]+]], label %[[EXIT:[a-z0-9.]+]] + +; CHECK: [[PH]]: +; CHECK-NEXT: [[LV:%[0-9]+]] = load i32, i32* %ptr, align 4 +; CHECK-NEXT: [[C:%[0-9]+]] = icmp eq i32 [[LV]], 100 +; CHECK-NEXT: br i1 [[C]] ; entry: %ec = icmp ne i32* %ptr, null @@ -397,8 +661,10 @@ define i32 @partial_unswitch_true_successor_insert_point(i32* %ptr, i32 %N) { ; CHECK-LABEL: @partial_unswitch_true_successor_insert_point( ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @clobber() -; CHECK-NEXT: br label %loop.header +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: [[LV:%[0-9]+]] = load i32, i32* %ptr, align 4 +; CHECK-NEXT: [[C:%[0-9]+]] = icmp eq i32 [[LV]], 100 +; CHECK-NEXT: br i1 [[C]] ; entry: call void @clobber() @@ -432,7 +698,10 @@ define i32 @partial_unswitch_true_successor_hoist_invariant(i32* %ptr, i32 %N) { ; CHECK-LABEL: @partial_unswitch_true_successor_hoist_invariant( ; CHECK-NEXT: entry: -; CHECK-NEXT: br label %loop.header +; CHECK-NEXT: [[GEP:%[0-9]+]] = getelementptr i32, i32* %ptr, i64 1 +; CHECK-NEXT: [[LV:%[0-9]+]] = load i32, i32* [[GEP]], align 4 +; CHECK-NEXT: [[C:%[0-9]+]] = icmp eq i32 [[LV]], 100 +; CHECK-NEXT: br i1 [[C]] ; entry: br label %loop.header