diff --git a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp --- a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -249,19 +249,22 @@ bool tryTrivialLoopUnswitch(bool &Changed); bool unswitchIfProfitable(Value *LoopCond, Constant *Val, - Instruction *TI = nullptr); + Instruction *TI = nullptr, + ArrayRef ToDuplicate = {}); void unswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, BasicBlock *ExitBlock, Instruction *TI); void unswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L, - Instruction *TI); + Instruction *TI, + ArrayRef ToDuplicate = {}); void rewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, Constant *Val, bool IsEqual); - void emitPreheaderBranchOnCondition(Value *LIC, Constant *Val, - BasicBlock *TrueDest, - BasicBlock *FalseDest, - BranchInst *OldBranch, Instruction *TI); + void + emitPreheaderBranchOnCondition(Value *LIC, Constant *Val, + BasicBlock *TrueDest, BasicBlock *FalseDest, + BranchInst *OldBranch, Instruction *TI, + ArrayRef ToDuplicate = {}); void simplifyCode(std::vector &Worklist, Loop *L); @@ -629,6 +632,80 @@ return false; } +/// Check if the loop header has a conditional branch that is not +/// loop-invariant, because it involves load instructions. If all paths from +/// either the true or false successor to the header or loop exists do not +/// modify the memory feeding the condition, perform 'partial unswitching'. That +/// is, duplicate the instructions feeding the condition in the pre-header. Then +/// unswitch on the duplicated condition. The condition is now known in the +/// unswitched version for the 'invariant' path through the original loop. +static std::pair, Constant *> +hasPartialIVCondition(Loop *L) { + SmallVector ToDuplicate; + + auto *TI = dyn_cast(L->getHeader()->getTerminator()); + if (!TI || !TI->isConditional()) + return {}; + + auto *CondI = dyn_cast(TI->getCondition()); + if (!CondI || L->isLoopInvariant(CondI)) + return {}; + + ToDuplicate.push_back(CondI); + + SmallVector WorkList; + WorkList.append(CondI->op_begin(), CondI->op_end()); + + while (!WorkList.empty()) { + Instruction *I = dyn_cast(WorkList.back()); + WorkList.pop_back(); + if (!I || L->isLoopInvariant(I)) + continue; + + if (!isa(I) && !isa(I)) + return {}; + ToDuplicate.push_back(I); + WorkList.append(I->op_begin(), I->op_end()); + } + + if (ToDuplicate.size() <= 1) + return {}; + + auto HasNoSideEffectsOnPath = [L](BasicBlock *Succ, BasicBlock *Header) { + SmallVector WorkList; + WorkList.push_back(Succ); + WorkList.push_back(Header); + SmallPtrSet Seen; + + while (!WorkList.empty()) { + BasicBlock *Current = WorkList.back(); + WorkList.pop_back(); + if (!L->contains(Current)) + continue; + auto SeenIns = Seen.insert(Current); + if (!SeenIns.second) + continue; + + if (any_of(*Current, + [](Instruction &I) { return I.mayHaveSideEffects(); })) + return false; + + if (Current == L->getHeader()) + continue; + + WorkList.append(succ_begin(Current), succ_end(Current)); + } + return true; + }; + + if (HasNoSideEffectsOnPath(TI->getSuccessor(0), L->getHeader())) + return {ToDuplicate, ConstantInt::getTrue(TI->getContext())}; + if (HasNoSideEffectsOnPath(TI->getSuccessor(1), L->getHeader())) + return {ToDuplicate, ConstantInt::getFalse(TI->getContext())}; + + return {}; +} + /// Do actual work and unswitch loop if possible and profitable. bool LoopUnswitch::processCurrentLoop() { bool Changed = false; @@ -828,6 +905,25 @@ } } } + + // Check if there is a header condition that is invariant along the patch from + // either the true or false successors to the header. This allows unswitching + // conditions depending on memory accesses, if there's a path not clobbering + // the memory locations. Check if this transform has been disabled using + // metadata, to avoid unswitching the same loop multiple times. + if (!findOptionMDForLoop(CurrentLoop, "llvm.loop.unswitch.partial.disable")) { + auto ToDuplicate = hasPartialIVCondition(CurrentLoop); + if (!ToDuplicate.first.empty()) { + ++NumBranches; + unswitchIfProfitable(ToDuplicate.first[0], ToDuplicate.second, + CurrentLoop->getHeader()->getTerminator(), + ToDuplicate.first); + + RedoLoop = false; + return true; + } + } + return Changed; } @@ -885,7 +981,8 @@ /// simplify the loop. If we decide that this is profitable, /// unswitch the loop, reprocess the pieces, then return true. bool LoopUnswitch::unswitchIfProfitable(Value *LoopCond, Constant *Val, - Instruction *TI) { + Instruction *TI, + ArrayRef ToDuplicate) { // Check to see if it would be profitable to unswitch current loop. if (!BranchesInfo.costAllowsUnswitching()) { LLVM_DEBUG(dbgs() << "NOT unswitching loop %" @@ -905,31 +1002,61 @@ return false; } - unswitchNontrivialCondition(LoopCond, Val, CurrentLoop, TI); + unswitchNontrivialCondition(LoopCond, Val, CurrentLoop, TI, ToDuplicate); return true; } /// Emit a conditional branch on two values if LIC == Val, branch to TrueDst, /// otherwise branch to FalseDest. Insert the code immediately before OldBranch /// and remove (but not erase!) it from the function. -void LoopUnswitch::emitPreheaderBranchOnCondition(Value *LIC, Constant *Val, - BasicBlock *TrueDest, - BasicBlock *FalseDest, - BranchInst *OldBranch, - Instruction *TI) { +void LoopUnswitch::emitPreheaderBranchOnCondition( + Value *LIC, Constant *Val, BasicBlock *TrueDest, BasicBlock *FalseDest, + BranchInst *OldBranch, Instruction *TI, + ArrayRef ToDuplicate) { assert(OldBranch->isUnconditional() && "Preheader is not split correctly"); assert(TrueDest != FalseDest && "Branch targets should be different"); + // Insert a conditional branch on LIC to the two preheaders. The original // code is the true version and the new code is the false version. Value *BranchVal = LIC; bool Swapped = false; - if (!isa(Val) || - Val->getType() != Type::getInt1Ty(LIC->getContext())) - BranchVal = new ICmpInst(OldBranch, ICmpInst::ICMP_EQ, LIC, Val); - else if (Val != ConstantInt::getTrue(Val->getContext())) { - // We want to enter the new loop when the condition is true. - std::swap(TrueDest, FalseDest); - Swapped = true; + + if (!ToDuplicate.empty()) { + ValueToValueMapTy Old2New; + for (Instruction *I : reverse(ToDuplicate)) { + auto *New = I->clone(); + New->insertBefore(OldBranch); + RemapInstruction(New, Old2New, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + Old2New[I] = New; + + if (MSSAU) { + MemorySSA *MSSA = MSSAU->getMemorySSA(); + auto *MemA = dyn_cast_or_null(MSSA->getMemoryAccess(I)); + if (!MemA) + continue; + + Loop *L = LI->getLoopFor(I->getParent()); + auto *DefiningAccess = MemA->getDefiningAccess(); + if (auto *MemPhi = dyn_cast(DefiningAccess)) { + DefiningAccess = + MemPhi->getIncomingValueForBlock(L->getLoopPreheader()); + } + MSSAU->createMemoryAccessInBB(New, DefiningAccess, New->getParent(), + MemorySSA::BeforeTerminator); + } + } + BranchVal = Old2New[ToDuplicate[0]]; + } else { + + if (!isa(Val) || + Val->getType() != Type::getInt1Ty(LIC->getContext())) + BranchVal = new ICmpInst(OldBranch, ICmpInst::ICMP_EQ, LIC, Val); + else if (Val != ConstantInt::getTrue(Val->getContext())) { + // We want to enter the new loop when the condition is true. + std::swap(TrueDest, FalseDest); + Swapped = true; + } } // Old branch will be removed, so save its parent and successor to update the @@ -1212,8 +1339,9 @@ /// We determined that the loop is profitable to unswitch when LIC equal Val. /// Split it into loop versions and test the condition outside of either loop. /// Return the loops created as Out1/Out2. -void LoopUnswitch::unswitchNontrivialCondition(Value *LIC, Constant *Val, - Loop *L, Instruction *TI) { +void LoopUnswitch::unswitchNontrivialCondition( + Value *LIC, Constant *Val, Loop *L, Instruction *TI, + ArrayRef ToDuplicate) { Function *F = LoopHeader->getParent(); LLVM_DEBUG(dbgs() << "loop-unswitch: Unswitching loop %" << LoopHeader->getName() << " [" << L->getBlocks().size() @@ -1345,7 +1473,7 @@ // Emit the new branch that selects between the two versions of this loop. emitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR, - TI); + TI, ToDuplicate); if (MSSAU) { // Update MemoryPhis in Exit blocks. MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMap, *DT); @@ -1367,17 +1495,38 @@ // iteration. WeakTrackingVH LICHandle(LIC); - // Now we rewrite the original code to know that the condition is true and the - // new code to know that the condition is false. - rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/false); - - // It's possible that simplifying one loop could cause the other to be - // changed to another value or a constant. If its a constant, don't simplify - // it. - if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop && - LICHandle && !isa(LICHandle)) - rewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, - /*IsEqual=*/true); + if (ToDuplicate.empty()) { + // Now we rewrite the original code to know that the condition is true and + // the new code to know that the condition is false. + rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/false); + + // It's possible that simplifying one loop could cause the other to be + // changed to another value or a constant. If its a constant, don't + // simplify it. + if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop && + LICHandle && !isa(LICHandle)) + rewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, + /*IsEqual=*/true); + } else { + // Partial unswitching. Update the condition in the right loop with the + // constant. + auto *CC = cast(Val); + if (CC->isOneValue()) { + rewriteLoopBodyWithConditionConstant(NewLoop, VMap[LIC], Val, + /*IsEqual=*/true); + } else + rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/true); + + // Mark the new loop as partially unswitched, to avoid unswitching on the + // same condition again. + auto &Context = NewLoop->getHeader()->getContext(); + MDNode *DisableUnswitchMD = MDNode::get( + Context, MDString::get(Context, "llvm.loop.unswitch.partial.disable")); + MDNode *NewLoopID = makePostTransformationMetadata( + Context, L->getLoopID(), {"llvm.loop.unswitch.partial"}, + {DisableUnswitchMD}); + NewLoop->setLoopID(NewLoopID); + } if (MSSA && VerifyMemorySSA) MSSA->verifyMemorySSA(); diff --git a/llvm/test/Transforms/LoopUnswitch/partial-unswitch.ll b/llvm/test/Transforms/LoopUnswitch/partial-unswitch.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopUnswitch/partial-unswitch.ll @@ -0,0 +1,465 @@ +; RUN: opt -loop-unswitch -S %s | FileCheck %s + +declare void @clobber() + +define i32 @partial_unswitch_true_successor(i32* %ptr, i32 %N) { +; CHECK-LABEL: @partial_unswitch_true_successor +; CHECK-LABEL: entry: +; CHECK-NEXT: [[LV:%[0-9]+]] = load i32, i32* %ptr, align 4 +; CHECK-NEXT: [[C:%[0-9]+]] = icmp eq i32 [[LV]], 100 +; CHECK-NEXT: br i1 [[C]], label %[[SPLIT_TRUE_PH:[a-z._]+]], label %[[FALSE_CRIT:[a-z._]+]] + +; CHECK: [[FALSE_CRIT]]: +; CHECK-NEXT: br label %[[FALSE_PH:[a-z.]+]] + +; CHECK: [[SPLIT_TRUE_PH]]: +; CHECK-NEXT: br label %[[TRUE_HEADER:[a-z.]+]] + +; CHECK: [[TRUE_HEADER]]: +; CHECK-NEXT: phi i32 +; CHECK-NEXT: [[TRUE_LV:%[a-z.0-9]+]] = load i32, i32* %ptr, align 4 +; CHECK-NEXT: [[TRUE_C:%[a-z.0-9]+]] = icmp eq i32 [[TRUE_LV]], 100 +; CHECK-NEXT: br i1 true, label %[[TRUE_NOCLOBBER:.+]], label %[[TRUE_CLOBBER:[a-z0-9._]+]] + +; CHECK: [[TRUE_CLOBBER]]: +; CHECK-NEXT: call +; CHECK-NEXT: br label %[[TRUE_LATCH:[a-z0-9._]+]] + +; CHECK: [[TRUE_NOCLOBBER]]: +; CHECK-NEXT: br label %[[TRUE_LATCH:[a-z0-9._]+]] + +; CHECK: [[TRUE_LATCH]]: +; CHECK-NEXT: icmp +; CHECK-NEXT: add +; CHECK-NEXT: br {{.*}} label %[[TRUE_HEADER]] + + +; CHECK: [[FALSE_PH]]: +; CHECK-NEXT: br label %[[FALSE_HEADER:[a-z.]+]] + +; CHECK: [[FALSE_HEADER]]: +; CHECK-NEXT: phi i32 +; CHECK-NEXT: [[FALSE_LV:%[a-z.0-9]+]] = load i32, i32* %ptr, align 4 +; CHECK-NEXT: [[FALSE_C:%[a-z.0-9]+]] = icmp eq i32 [[FALSE_LV]], 100 +; CHECK-NEXT: br i1 [[FALSE_C]], label %[[FALSE_NOCLOBBER:.+]], label %[[FALSE_CLOBBER:[a-z0-9._]+]] + +; CHECK: [[FALSE_NOCLOBBER]]: +; CHECK-NEXT: br label %[[FALSE_LATCH:[a-z0-9._]+]] + +; CHECK: [[FALSE_CLOBBER]]: +; CHECK-NEXT: call +; CHECK-NEXT: br label %[[FALSE_LATCH:[a-z0-9._]+]] + +; CHECK: [[FALSE_LATCH]]: +; CHECK-NEXT: icmp +; CHECK-NEXT: add +; CHECK-NEXT: br {{.*}} label %[[FALSE_HEADER]] +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %lv = load i32, i32* %ptr + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %noclobber, label %clobber + +noclobber: + br label %loop.latch + +clobber: + call void @clobber() + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret i32 10 + +} + +define i32 @partial_unswitch_false_successor(i32* %ptr, i32 %N) { +; CHECK-LABEL: @partial_unswitch_false_successor +; CHECK-LABEL: entry: +; CHECK-NEXT: [[LV:%[0-9]+]] = load i32, i32* %ptr, align 4 +; CHECK-NEXT: [[C:%[0-9]+]] = icmp eq i32 [[LV]], 100 +; CHECK-NEXT: br i1 [[C]], label %[[SPLIT_TRUE_PH:[a-z._]+]], label %[[FALSE_CRIT:[a-z._]+]] + +; CHECK: [[FALSE_CRIT]]: +; CHECK-NEXT: br label %[[FALSE_PH:[a-z.]+]] + +; CHECK: [[SPLIT_TRUE_PH]]: +; CHECK-NEXT: br label %[[TRUE_HEADER:[a-z.]+]] + +; CHECK: [[TRUE_HEADER]]: +; CHECK-NEXT: phi i32 +; CHECK-NEXT: [[TRUE_LV:%[a-z.0-9]+]] = load i32, i32* %ptr, align 4 +; CHECK-NEXT: [[TRUE_C:%[a-z.0-9]+]] = icmp eq i32 [[TRUE_LV]], 100 +; CHECK-NEXT: br i1 [[TRUE_C]], label %[[TRUE_CLOBBER:.+]], label %[[TRUE_NOCLOBBER:[a-z0-9._]+]] + +; CHECK: [[TRUE_NOCLOBBER]]: +; CHECK-NEXT: br label %[[TRUE_LATCH:[a-z0-9._]+]] + +; CHECK: [[TRUE_CLOBBER]]: +; CHECK-NEXT: call +; CHECK-NEXT: br label %[[TRUE_LATCH:[a-z0-9._]+]] + +; CHECK: [[TRUE_LATCH]]: +; CHECK-NEXT: icmp +; CHECK-NEXT: add +; CHECK-NEXT: br {{.*}} label %[[TRUE_HEADER]] + + +; CHECK: [[FALSE_PH]]: +; CHECK-NEXT: br label %[[FALSE_HEADER:[a-z.]+]] + +; CHECK: [[FALSE_HEADER]]: +; CHECK-NEXT: phi i32 +; CHECK-NEXT: [[FALSE_LV:%[a-z.0-9]+]] = load i32, i32* %ptr, align 4 +; CHECK-NEXT: [[FALSE_C:%[a-z.0-9]+]] = icmp eq i32 [[FALSE_LV]], 100 +; CHECK-NEXT: br i1 false, label %[[FALSE_CLOBBER:.+]], label %[[FALSE_NOCLOBBER:[a-z0-9._]+]] + +; CHECK: [[FALSE_CLOBBER]]: +; CHECK-NEXT: call +; CHECK-NEXT: br label %[[FALSE_LATCH:[a-z0-9._]+]] + +; CHECK: [[FALSE_NOCLOBBER]]: +; CHECK-NEXT: br label %[[FALSE_LATCH:[a-z0-9._]+]] + +; CHECK: [[FALSE_LATCH]]: +; CHECK-NEXT: icmp +; CHECK-NEXT: add +; CHECK-NEXT: br {{.*}} label %[[FALSE_HEADER]] +; + +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %lv = load i32, i32* %ptr + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %clobber, label %noclobber + +clobber: + call void @clobber() + br label %loop.latch + +noclobber: + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret i32 10 +} + +define i32 @partial_unswtich_gep_load_icmp(i32** %ptr, i32 %N) { +; CHECK-LABEL: @partial_unswtich_gep_load_icmp +; CHECK-LABEL: entry: +; CHECK-NEXT: [[GEP:%[a-z.0-9]+]] = getelementptr i32*, i32** %ptr, i32 1 +; CHECK-NEXT: [[LV0:%[a-z.0-9]+]] = load i32*, i32** [[GEP]] +; CHECK-NEXT: [[LV1:%[a-z.0-9]+]] = load i32, i32* [[LV0]] +; CHECK-NEXT: [[C:%[a-z.0-9]+]] = icmp eq i32 [[LV1]], 100 +; CHECK-NEXT: br i1 [[C]], label %[[SPLIT_TRUE_PH:[a-z._]+]], label %[[FALSE_CRIT:[a-z._]+]] + +; CHECK: [[FALSE_CRIT]]: +; CHECK-NEXT: br label %[[FALSE_PH:[a-z.]+]] + +; CHECK: [[SPLIT_TRUE_PH]]: +; CHECK-NEXT: br label %[[TRUE_HEADER:[a-z.]+]] + +; CHECK: [[TRUE_HEADER]]: +; CHECK-NEXT: phi i32 +; CHECK-NEXT: [[TRUE_GEP:%[a-z.0-9]+]] = getelementptr i32*, i32** %ptr, i32 1 +; CHECK-NEXT: [[TRUE_LV0:%[a-z.0-9]+]] = load i32*, i32** [[TRUE_GEP]] +; CHECK-NEXT: [[TRUE_LV1:%[a-z.0-9]+]] = load i32, i32* [[TRUE_LV0]] +; CHECK-NEXT: [[TRUE_C:%[a-z.0-9]+]] = icmp eq i32 [[TRUE_LV1]], 100 +; CHECK-NEXT: br i1 true, label %[[TRUE_NOCLOBBER:.+]], label %[[TRUE_CLOBBER:[a-z0-9._]+]] + +; CHECK: [[TRUE_CLOBBER]]: +; CHECK-NEXT: call +; CHECK-NEXT: br label %[[TRUE_LATCH:[a-z0-9._]+]] + +; CHECK: [[TRUE_NOCLOBBER]]: +; CHECK-NEXT: br label %[[TRUE_LATCH:[a-z0-9._]+]] + +; CHECK: [[TRUE_LATCH]]: +; CHECK-NEXT: icmp +; CHECK-NEXT: add +; CHECK-NEXT: br {{.*}} label %[[TRUE_HEADER]] + +; CHECK: [[FALSE_PH]]: +; CHECK-NEXT: br label %[[FALSE_HEADER:[a-z.]+]] + +; CHECK: [[FALSE_HEADER]]: +; CHECK-NEXT: phi i32 +; CHECK-NEXT: [[FALSE_GEP:%[a-z.0-9]+]] = getelementptr i32*, i32** %ptr, i32 1 +; CHECK-NEXT: [[FALSE_LV0:%[a-z.0-9]+]] = load i32*, i32** [[FALSE_GEP]] +; CHECK-NEXT: [[FALSE_LV1:%[a-z.0-9]+]] = load i32, i32* [[FALSE_LV0]] +; CHECK-NEXT: [[FALSE_C:%[a-z.0-9]+]] = icmp eq i32 [[FALSE_LV1]], 100 +; CHECK-NEXT: br i1 [[FALSE_C]], label %[[FALSE_NOCLOBBER:.+]], label %[[FALSE_CLOBBER:[a-z0-9._]+]] + +; CHECK: [[FALSE_NOCLOBBER]]: +; CHECK-NEXT: br label %[[FALSE_LATCH:[a-z0-9._]+]] + +; CHECK: [[FALSE_CLOBBER]]: +; CHECK-NEXT: call +; CHECK-NEXT: br label %[[FALSE_LATCH:[a-z0-9._]+]] + + +; CHECK: [[FALSE_LATCH]]: +; CHECK-NEXT: icmp +; CHECK-NEXT: add +; CHECK-NEXT: br {{.*}} label %[[FALSE_HEADER]] +; + + +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %gep = getelementptr i32*, i32** %ptr, i32 1 + %lv.1 = load i32*, i32** %gep + %lv = load i32, i32* %lv.1 + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %noclobber, label %clobber + +noclobber: + br label %loop.latch + +clobber: + call void @clobber() + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret i32 10 + +} + +define i32 @partial_unswitch_reduction_phi(i32* %ptr, i32 %N) { +; CHECK-LABEL: @partial_unswitch_reduction_phi +; CHECK-LABEL: entry: +; CHECK-NEXT: [[LV:%[0-9]+]] = load i32, i32* %ptr, align 4 +; CHECK-NEXT: [[C:%[0-9]+]] = icmp eq i32 [[LV]], 100 +; CHECK-NEXT: br i1 [[C]], label %[[SPLIT_TRUE_PH:[a-z._]+]], label %[[FALSE_CRIT:[a-z._]+]] + +; CHECK: [[FALSE_CRIT]]: +; CHECK-NEXT: br label %[[FALSE_PH:[a-z.]+]] + +; CHECK: [[SPLIT_TRUE_PH]]: +; CHECK-NEXT: br label %[[TRUE_HEADER:[a-z.]+]] + +; CHECK: [[TRUE_HEADER]]: +; CHECK-NEXT: phi i32 +; CHECK-NEXT: [[TRUE_RED:%[a-z.0-9]+]] = phi i32 [ 20, %[[SPLIT_TRUE_PH]] ], [ [[TRUE_RED_NEXT:%[a-z.0-9]+]], %[[TRUE_LATCH:[a-z.0-9]+]] +; CHECK-NEXT: [[TRUE_LV:%[a-z.0-9]+]] = load i32, i32* %ptr, align 4 +; CHECK-NEXT: [[TRUE_C:%[a-z.0-9]+]] = icmp eq i32 [[TRUE_LV]], 100 +; CHECK-NEXT: br i1 [[TRUE_C]], label %[[TRUE_CLOBBER:.+]], label %[[TRUE_NOCLOBBER:[a-z0-9._]+]] + +; CHECK: [[TRUE_NOCLOBBER]]: +; CHECK-NEXT: [[TRUE_ADD10:%.+]] = add i32 [[TRUE_RED]], 10 +; CHECK-NEXT: br label %[[TRUE_LATCH]] + +; CHECK: [[TRUE_CLOBBER]]: +; CHECK-NEXT: call +; CHECK-NEXT: [[TRUE_ADD5:%.+]] = add i32 [[TRUE_RED]], 5 +; CHECK-NEXT: br label %[[TRUE_LATCH]] + +; CHECK: [[TRUE_LATCH]]: +; CHECK-NEXT: [[TRUE_RED_NEXT]] = phi i32 [ [[TRUE_ADD5]], %[[TRUE_CLOBBER]] ], [ [[TRUE_ADD10]], %[[TRUE_NOCLOBBER]] ] +; CHECK-NEXT: icmp +; CHECK-NEXT: add +; CHECK-NEXT: br {{.*}} label %[[TRUE_HEADER]] + + +; CHECK: [[FALSE_PH]]: +; CHECK-NEXT: br label %[[FALSE_HEADER:[a-z.]+]] + +; CHECK: [[FALSE_HEADER]]: +; CHECK-NEXT: phi i32 +; CHECK-NEXT: [[FALSE_RED:%[a-z.0-9]+]] = phi i32 [ 20, %[[FALSE_PH]] ], [ [[FALSE_RED_NEXT:%[a-z.0-9]+]], %[[FALSE_LATCH:[a-z.0-9]+]] +; CHECK-NEXT: [[FALSE_LV:%[a-z.0-9]+]] = load i32, i32* %ptr, align 4 +; CHECK-NEXT: [[FALSE_C:%[a-z.0-9]+]] = icmp eq i32 [[FALSE_LV]], 100 +; CHECK-NEXT: br i1 false, label %[[FALSE_CLOBBER:.+]], label %[[FALSE_NOCLOBBER:[a-z0-9._]+]] + +; CHECK: [[FALSE_CLOBBER]]: +; CHECK-NEXT: call +; CHECK-NEXT: [[FALSE_ADD5:%.+]] = add i32 [[FALSE_RED]], 5 +; CHECK-NEXT: br label %[[FALSE_LATCH:[a-z0-9._]+]] + +; CHECK: [[FALSE_NOCLOBBER]]: +; CHECK-NEXT: [[FALSE_ADD10:%.+]] = add i32 [[FALSE_RED]], 10 +; CHECK-NEXT: br label %[[FALSE_LATCH:[a-z0-9._]+]] + +; CHECK: [[FALSE_LATCH]]: +; CHECK-NEXT: [[FALSE_RED_NEXT]] = phi i32 [ [[FALSE_ADD5]], %[[FALSE_CLOBBER]] ], [ [[FALSE_ADD10]], %[[FALSE_NOCLOBBER]] ] +; CHECK-NEXT: icmp +; CHECK-NEXT: add +; CHECK-NEXT: br {{.*}} label %[[FALSE_HEADER]] +; + +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %red = phi i32 [ 20, %entry ], [ %red.next, %loop.latch ] + %lv = load i32, i32* %ptr + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %clobber, label %noclobber + +clobber: + call void @clobber() + %add.5 = add i32 %red, 5 + br label %loop.latch + +noclobber: + %add.10 = add i32 %red, 10 + br label %loop.latch + +loop.latch: + %red.next = phi i32 [ %add.5, %clobber ], [ %add.10, %noclobber ] + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + %red.next.lcssa = phi i32 [ %red.next, %loop.latch ] + ret i32 %red.next.lcssa +} + +define void @no_partial_unswitch_phi_cond(i1 %lc, i32 %N) { +; CHECK-LABEL: @no_partial_unswitch_phi_cond +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %loop.header +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %sc = phi i1 [ %lc, %entry ], [ true, %loop.latch ] + br i1 %sc, label %clobber, label %noclobber + +clobber: + call void @clobber() + br label %loop.latch + +noclobber: + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret void +} + +define void @no_partial_unswitch_clobber_latch(i32* %ptr, i32 %N) { +; CHECK-LABEL: @no_partial_unswitch_clobber_latch +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %loop.header +; + +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %lv = load i32, i32* %ptr + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %noclobber, label %clobber + +noclobber: + br label %loop.latch + +clobber: + call void @clobber() + br label %loop.latch + +loop.latch: + call void @clobber() + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret void +} + +define void @no_partial_unswitch_clobber_header(i32* %ptr, i32 %N) { +; CHECK-LABEL: @no_partial_unswitch_clobber_header +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %loop.header +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + call void @clobber() + %lv = load i32, i32* %ptr + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %noclobber, label %clobber + +noclobber: + br label %loop.latch + +clobber: + call void @clobber() + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret void +} + +define void @no_partial_unswitch_clobber_both(i32* %ptr, i32 %N) { +; CHECK-LABEL: @no_partial_unswitch_clobber_both +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %loop.header +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %lv = load i32, i32* %ptr + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %noclobber, label %clobber + +noclobber: + call void @clobber() + br label %loop.latch + +clobber: + call void @clobber() + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret void +}