diff --git a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp --- a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -249,19 +249,22 @@ bool tryTrivialLoopUnswitch(bool &Changed); bool unswitchIfProfitable(Value *LoopCond, Constant *Val, - Instruction *TI = nullptr); + Instruction *TI = nullptr, + ArrayRef ToDuplicate = {}); void unswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, BasicBlock *ExitBlock, Instruction *TI); void unswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L, - Instruction *TI); + Instruction *TI, + ArrayRef ToDuplicate = {}); void rewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, Constant *Val, bool IsEqual); - void emitPreheaderBranchOnCondition(Value *LIC, Constant *Val, - BasicBlock *TrueDest, - BasicBlock *FalseDest, - BranchInst *OldBranch, Instruction *TI); + void + emitPreheaderBranchOnCondition(Value *LIC, Constant *Val, + BasicBlock *TrueDest, BasicBlock *FalseDest, + BranchInst *OldBranch, Instruction *TI, + ArrayRef ToDuplicate = {}); void simplifyCode(std::vector &Worklist, Loop *L); @@ -629,6 +632,80 @@ return false; } +/// Check if the loop header has a conditional branch that is not +/// loop-invariant, because it involves load instructions. If all paths from +/// either the true or false successor to the header or loop exists do not +/// modify the memory feeding the condition, perform 'partial unswitching'. That +/// is, duplicate the instructions feeding the condition in the pre-header. Then +/// unswitch on the duplicated condition. The condition is now known in the +/// unswitched version for the 'invariant' path through the original loop. +static std::pair, Constant *> +hasPartialIVCondition(Loop *L) { + SmallVector ToDuplicate; + + auto *TI = dyn_cast(L->getHeader()->getTerminator()); + if (!TI || !TI->isConditional()) + return {}; + + auto *CondI = dyn_cast(TI->getCondition()); + if (!CondI || L->isLoopInvariant(CondI)) + return {}; + + ToDuplicate.push_back(CondI); + + SmallVector WorkList; + WorkList.append(CondI->op_begin(), CondI->op_end()); + + while (!WorkList.empty()) { + Instruction *I = dyn_cast(WorkList.back()); + WorkList.pop_back(); + if (!I || L->isLoopInvariant(I)) + continue; + + if (!isa(I) && !isa(I)) + return {}; + ToDuplicate.push_back(I); + WorkList.append(I->op_begin(), I->op_end()); + } + + if (ToDuplicate.size() <= 1) + return {}; + + auto HasNoSideEffectsOnPath = [L](BasicBlock *Succ, BasicBlock *Header) { + SmallVector WorkList; + WorkList.push_back(Succ); + WorkList.push_back(Header); + SmallPtrSet Seen; + + while (!WorkList.empty()) { + BasicBlock *Current = WorkList.back(); + WorkList.pop_back(); + if (!L->contains(Current)) + continue; + auto SeenIns = Seen.insert(Current); + if (!SeenIns.second) + continue; + + if (any_of(*Current, + [](Instruction &I) { return I.mayHaveSideEffects(); })) + return false; + + if (Current == L->getHeader()) + continue; + + WorkList.append(succ_begin(Current), succ_end(Current)); + } + return true; + }; + + if (HasNoSideEffectsOnPath(TI->getSuccessor(0), L->getHeader())) + return {ToDuplicate, ConstantInt::getTrue(TI->getContext())}; + if (HasNoSideEffectsOnPath(TI->getSuccessor(1), L->getHeader())) + return {ToDuplicate, ConstantInt::getFalse(TI->getContext())}; + + return {}; +} + /// Do actual work and unswitch loop if possible and profitable. bool LoopUnswitch::processCurrentLoop() { bool Changed = false; @@ -668,6 +745,17 @@ LoopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize)) return Changed; + auto ToDuplicate = isPartialIVCondition(CurrentLoop); + if (!ToDuplicate.first.empty()) { + ++NumBranches; + unswitchIfProfitable(ToDuplicate.first[0], ToDuplicate.second, + CurrentLoop->getHeader()->getTerminator(), + ToDuplicate.first); + + RedoLoop = false; + return true; + } + // Run through the instructions in the loop, keeping track of three things: // // - That we do not unswitch loops containing convergent operations, as we @@ -828,6 +916,7 @@ } } } + return Changed; } @@ -885,7 +974,8 @@ /// simplify the loop. If we decide that this is profitable, /// unswitch the loop, reprocess the pieces, then return true. bool LoopUnswitch::unswitchIfProfitable(Value *LoopCond, Constant *Val, - Instruction *TI) { + Instruction *TI, + ArrayRef ToDuplicate) { // Check to see if it would be profitable to unswitch current loop. if (!BranchesInfo.costAllowsUnswitching()) { LLVM_DEBUG(dbgs() << "NOT unswitching loop %" @@ -905,31 +995,61 @@ return false; } - unswitchNontrivialCondition(LoopCond, Val, CurrentLoop, TI); + unswitchNontrivialCondition(LoopCond, Val, CurrentLoop, TI, ToDuplicate); return true; } /// Emit a conditional branch on two values if LIC == Val, branch to TrueDst, /// otherwise branch to FalseDest. Insert the code immediately before OldBranch /// and remove (but not erase!) it from the function. -void LoopUnswitch::emitPreheaderBranchOnCondition(Value *LIC, Constant *Val, - BasicBlock *TrueDest, - BasicBlock *FalseDest, - BranchInst *OldBranch, - Instruction *TI) { +void LoopUnswitch::emitPreheaderBranchOnCondition( + Value *LIC, Constant *Val, BasicBlock *TrueDest, BasicBlock *FalseDest, + BranchInst *OldBranch, Instruction *TI, + ArrayRef ToDuplicate) { assert(OldBranch->isUnconditional() && "Preheader is not split correctly"); assert(TrueDest != FalseDest && "Branch targets should be different"); + // Insert a conditional branch on LIC to the two preheaders. The original // code is the true version and the new code is the false version. Value *BranchVal = LIC; bool Swapped = false; - if (!isa(Val) || - Val->getType() != Type::getInt1Ty(LIC->getContext())) - BranchVal = new ICmpInst(OldBranch, ICmpInst::ICMP_EQ, LIC, Val); - else if (Val != ConstantInt::getTrue(Val->getContext())) { - // We want to enter the new loop when the condition is true. - std::swap(TrueDest, FalseDest); - Swapped = true; + + if (!ToDuplicate.empty()) { + ValueToValueMapTy Old2New; + for (Instruction *I : reverse(ToDuplicate)) { + auto *New = I->clone(); + New->insertBefore(OldBranch); + RemapInstruction(New, Old2New, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + Old2New[I] = New; + + if (MSSAU) { + MemorySSA *MSSA = MSSAU->getMemorySSA(); + auto *MemA = dyn_cast_or_null(MSSA->getMemoryAccess(I)); + if (!MemA) + continue; + + Loop *L = LI->getLoopFor(I->getParent()); + auto *DefiningAccess = MemA->getDefiningAccess(); + if (auto *MemPhi = dyn_cast(DefiningAccess)) { + DefiningAccess = + MemPhi->getIncomingValueForBlock(L->getLoopPreheader()); + } + MSSAU->createMemoryAccessInBB(New, DefiningAccess, New->getParent(), + MemorySSA::BeforeTerminator); + } + } + BranchVal = Old2New[ToDuplicate[0]]; + } else { + + if (!isa(Val) || + Val->getType() != Type::getInt1Ty(LIC->getContext())) + BranchVal = new ICmpInst(OldBranch, ICmpInst::ICMP_EQ, LIC, Val); + else if (Val != ConstantInt::getTrue(Val->getContext())) { + // We want to enter the new loop when the condition is true. + std::swap(TrueDest, FalseDest); + Swapped = true; + } } // Old branch will be removed, so save its parent and successor to update the @@ -1212,8 +1332,9 @@ /// We determined that the loop is profitable to unswitch when LIC equal Val. /// Split it into loop versions and test the condition outside of either loop. /// Return the loops created as Out1/Out2. -void LoopUnswitch::unswitchNontrivialCondition(Value *LIC, Constant *Val, - Loop *L, Instruction *TI) { +void LoopUnswitch::unswitchNontrivialCondition( + Value *LIC, Constant *Val, Loop *L, Instruction *TI, + ArrayRef ToDuplicate) { Function *F = LoopHeader->getParent(); LLVM_DEBUG(dbgs() << "loop-unswitch: Unswitching loop %" << LoopHeader->getName() << " [" << L->getBlocks().size() @@ -1345,7 +1466,7 @@ // Emit the new branch that selects between the two versions of this loop. emitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR, - TI); + TI, ToDuplicate); if (MSSAU) { // Update MemoryPhis in Exit blocks. MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMap, *DT); @@ -1367,17 +1488,26 @@ // iteration. WeakTrackingVH LICHandle(LIC); - // Now we rewrite the original code to know that the condition is true and the - // new code to know that the condition is false. - rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/false); - - // It's possible that simplifying one loop could cause the other to be - // changed to another value or a constant. If its a constant, don't simplify - // it. - if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop && - LICHandle && !isa(LICHandle)) - rewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, - /*IsEqual=*/true); + if (ToDuplicate.empty()) { + // Now we rewrite the original code to know that the condition is true and + // the new code to know that the condition is false. + rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/false); + + // It's possible that simplifying one loop could cause the other to be + // changed to another value or a constant. If its a constant, don't + // simplify it. + if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop && + LICHandle && !isa(LICHandle)) + rewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, + /*IsEqual=*/true); + } else { + auto *CC = cast(Val); + if (CC->isOneValue()) { + rewriteLoopBodyWithConditionConstant(NewLoop, VMap[LIC], Val, + /*IsEqual=*/true); + } else + rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/true); + } if (MSSA && VerifyMemorySSA) MSSA->verifyMemorySSA(); diff --git a/llvm/test/Transforms/LoopUnswitch/partial-unswitch.ll b/llvm/test/Transforms/LoopUnswitch/partial-unswitch.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopUnswitch/partial-unswitch.ll @@ -0,0 +1,143 @@ +; RUN: opt -loop-unswitch -S %s | FileCheck %s + +declare void @clobber() + +define i32 @partial_unswitch_true_successor(i32* %ptr, i32 %N) { +; CHECK-LABEL: @partial_unswitch_true_successor +; CHECK-LABEL: entry: +; CHECK-NEXT: [[LV:%[0-9]+]] = load i32, i32* %ptr, align 4 +; CHECK-NEXT: [[C:%[0-9]+]] = icmp eq i32 [[LV]], 100 +; CHECK-NEXT: br i1 [[C]], label %[[SPLIT_TRUE_PH:[a-z._]+]], label %[[FALSE_CRIT:[a-z._]+]] + +; CHECK: [[FALSE_CRIT]]: +; CHECK-NEXT: br label %[[FALSE_PH:[a-z.]+]] + +; CHECK: [[SPLIT_TRUE_PH]]: +; CHECK-NEXT: br label %[[TRUE_HEADER:[a-z.]+]] + +; CHECK: [[TRUE_HEADER]]: +; CHECK-NEXT: phi i32 +; CHECK-NEXT: [[TRUE_LV:%[a-z.0-9]+]] = load i32, i32* %ptr, align 4 +; CHECK-NEXT: [[TRUE_C:%[a-z.0-9]+]] = icmp eq i32 [[TRUE_LV]], 100 +; CHECK-NEXT: br i1 true, label + +; CHECK: [[FALSE_PH]]: +; CHECK-NEXT: br label %[[FALSE_HEADER:[a-z.]+]] + +; CHECK: [[FALSE_HEADER]]: +; CHECK-NEXT: phi i32 +; CHECK-NEXT: [[FALSE_LV:%[a-z.0-9]+]] = load i32, i32* %ptr, align 4 +; CHECK-NEXT: [[FALSE_C:%[a-z.0-9]+]] = icmp eq i32 [[FALSE_LV]], 100 +; CHECK-NEXT: br i1 [[FALSE_C]], label + +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %lv = load i32, i32* %ptr + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %noclobber, label %clobber + +noclobber: + br label %loop.latch + +clobber: + call void @clobber() + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret i32 10 + +} + +define i32 @partial_unswitch_false_successor(i32* %ptr, i32 %N) { +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %lv = load i32, i32* %ptr + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %clobber, label %noclobber + +clobber: + call void @clobber() + br label %loop.latch + +noclobber: + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret i32 10 +} + +define i32 @partial_unswtich_gep_load_icmp(i32** %ptr, i32 %N) { +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %gep = getelementptr i32*, i32** %ptr, i32 1 + %lv.1 = load i32*, i32** %gep + %lv = load i32, i32* %lv.1 + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %noclobber, label %clobber + +noclobber: + br label %loop.latch + +clobber: + call void @clobber() + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret i32 10 + +} + +define i32 @partial_unswitch_phi(i32* %ptr, i32 %N) { +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %red = phi i32 [ 20, %entry ], [ %red.next, %loop.latch ] + %lv = load i32, i32* %ptr + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %clobber, label %noclobber + +clobber: + call void @clobber() + %add.5 = add i32 %red, 5 + br label %loop.latch + +noclobber: + %add.10 = add i32 %red, 10 + br label %loop.latch + +loop.latch: + %red.next = phi i32 [ %add.5, %clobber ], [ %add.10, %noclobber ] + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + %red.next.lcssa = phi i32 [ %red.next, %loop.latch ] + ret i32 %red.next.lcssa +}