diff --git a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp --- a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -640,6 +640,26 @@ return false; } +namespace { +/// Struct to hold information about a partially invariant condition. +struct IVConditionInfo { + /// Instructions that need to be duplicated and checked for the unswitching + /// condition. + SmallVector InstToDuplicate; + + /// Constant to indicate for which value the condition is invariant. + Constant *KnownValue = nullptr; + + /// True if the partially invariant path is no-op (=does not have any + /// side-effects and no loop value is used outside the loop). + bool PathIsNoop = false; + + /// If the partially invariant path reaches a single exit block, ExitForPath + /// is set to that block. Otherwise it is nullptr. + BasicBlock *ExitForPath = nullptr; +}; +} // namespace + /// Check if the loop header has a conditional branch that is not /// loop-invariant, because it involves load instructions. If all paths from /// either the true or false successor to the header or loop exists do not @@ -651,9 +671,8 @@ /// If the branch condition of the header is partially invariant, return a pair /// containing the instructions to duplicate and a boolean Constant to update /// the condition in the loops created for the true or false successors. -static std::pair, Constant *> -hasPartialIVCondition(Loop *L, MemorySSA &MSSA, AAResults *AA) { - SmallVector ToDuplicate; +static Optional hasPartialIVCondition(Loop *L, MemorySSA &MSSA, + AAResults *AA) { auto *TI = dyn_cast(L->getHeader()->getTerminator()); if (!TI || !TI->isConditional()) @@ -665,7 +684,8 @@ if (!CondI || !L->contains(CondI)) return {}; - ToDuplicate.push_back(CondI); + IVConditionInfo Info; + Info.InstToDuplicate.push_back(CondI); SmallVector WorkList; WorkList.append(CondI->op_begin(), CondI->op_end()); @@ -686,7 +706,7 @@ if (LI->isVolatile() || LI->isAtomic()) return {}; - ToDuplicate.push_back(I); + Info.InstToDuplicate.push_back(I); if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) { if (auto *MemUse = dyn_cast_or_null(MA)) { // Queue the defining access to check for alias checks. @@ -701,12 +721,18 @@ WorkList.append(I->op_begin(), I->op_end()); } - if (ToDuplicate.size() <= 1) + if (Info.InstToDuplicate.size() <= 1) return {}; + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + auto HasNoClobbersOnPath = - [L, AA, &AccessedLocs](BasicBlock *Succ, BasicBlock *Header, - SmallVector AccessesToCheck) { + [L, AA, &AccessedLocs, &Info, + &ExitingBlocks](BasicBlock *Succ, BasicBlock *Header, + SmallVector AccessesToCheck) { + Info.PathIsNoop = true; + Info.ExitForPath = nullptr; // First, collect all blocks in the loop that are on a patch from Succ // to the header. SmallVector WorkList; @@ -714,6 +740,9 @@ WorkList.push_back(Header); SmallPtrSet Seen; Seen.insert(Header); + Info.PathIsNoop &= all_of( + *Header, [](Instruction &I) { return !I.mayHaveSideEffects(); }); + while (!WorkList.empty()) { BasicBlock *Current = WorkList.pop_back_val(); if (!L->contains(Current)) @@ -722,6 +751,8 @@ if (!SeenIns.second) continue; + Info.PathIsNoop &= all_of( + *Current, [](Instruction &I) { return !I.mayHaveSideEffects(); }); WorkList.append(succ_begin(Current), succ_end(Current)); } @@ -763,6 +794,30 @@ AccessesToCheck.push_back(cast(U.getUser())); } + // We could also allow loops with constant trip counts without + // mustprogress, but ScalarEvolution may not be available. + Info.PathIsNoop &= + L->getHeader()->getParent()->mustProgress() || hasMustProgress(L); + + // If the path is considered a no-op so far, check if it reaches a + // single exit block without any phis. This ensures no values from the + // loop are used outside of the loop. + if (Info.PathIsNoop) { + for (auto *Exiting : ExitingBlocks) { + if (!Seen.contains(Exiting)) + continue; + for (auto *Succ : successors(Exiting)) { + if (L->contains(Succ)) + continue; + + Info.PathIsNoop &= empty(Succ->phis()) && !Info.ExitForPath; + if (!Info.PathIsNoop) + break; + assert(!Info.ExitForPath); + Info.ExitForPath = Succ; + } + } + } return true; }; @@ -772,11 +827,14 @@ return {}; if (HasNoClobbersOnPath(TI->getSuccessor(0), L->getHeader(), AccessesToCheck)) - return {ToDuplicate, ConstantInt::getTrue(TI->getContext())}; - if (HasNoClobbersOnPath(TI->getSuccessor(1), L->getHeader(), AccessesToCheck)) - return {ToDuplicate, ConstantInt::getFalse(TI->getContext())}; + Info.KnownValue = ConstantInt::getTrue(TI->getContext()); + else if (HasNoClobbersOnPath(TI->getSuccessor(1), L->getHeader(), + AccessesToCheck)) + Info.KnownValue = ConstantInt::getFalse(TI->getContext()); + else + return {}; - return {}; + return Info; } /// Do actual work and unswitch loop if possible and profitable. @@ -986,17 +1044,54 @@ // metadata, to avoid unswitching the same loop multiple times. if (MSSA && !findOptionMDForLoop(CurrentLoop, "llvm.loop.unswitch.partial.disable")) { - auto ToDuplicate = hasPartialIVCondition(CurrentLoop, *MSSA, AA); - if (!ToDuplicate.first.empty()) { + if (auto Info = hasPartialIVCondition(CurrentLoop, *MSSA, AA)) { + assert(!Info->InstToDuplicate.empty() && + "need at least a partially invariant condition"); LLVM_DEBUG(dbgs() << "loop-unswitch: Found partially invariant condition " - << *ToDuplicate.first[0] << "\n"); - ++NumBranches; - unswitchIfProfitable(ToDuplicate.first[0], ToDuplicate.second, - CurrentLoop->getHeader()->getTerminator(), - ToDuplicate.first); + << *Info->InstToDuplicate[0] << "\n"); + + Instruction *TI = CurrentLoop->getHeader()->getTerminator(); + Value *LoopCond = Info->InstToDuplicate[0]; + + // If the partially unswitched path is a no-op and has a single exit + // block, we do not need to do full unswitching. Instead, we can directly + // branch to the exit. + // TODO: Instead of duplicating the checks, we could also just directly + // branch to the exit from the conditional branch in the loop. + if (Info->PathIsNoop) { + if (HasBranchDivergence && + getAnalysis().isDivergent(LoopCond)) { + LLVM_DEBUG(dbgs() << "NOT unswitching loop %" + << CurrentLoop->getHeader()->getName() + << " at non-trivial condition '" + << *Info->KnownValue << "' == " << *LoopCond << "\n" + << ". Condition is divergent.\n"); + return false; + } - RedoLoop = false; - return true; + ++NumBranches; + + BasicBlock *TrueDest = LoopHeader; + BasicBlock *FalseDest = Info->ExitForPath; + if (Info->KnownValue->isOneValue()) + std::swap(TrueDest, FalseDest); + + auto *OldBr = + cast(CurrentLoop->getLoopPreheader()->getTerminator()); + emitPreheaderBranchOnCondition(LoopCond, Info->KnownValue, TrueDest, + FalseDest, OldBr, TI, + Info->InstToDuplicate); + delete OldBr; + RedoLoop = false; + return true; + } + if (unswitchIfProfitable(LoopCond, Info->KnownValue, + CurrentLoop->getHeader()->getTerminator(), + Info->InstToDuplicate)) { + ++NumBranches; + RedoLoop = false; + return true; + } } } @@ -1595,6 +1690,7 @@ if (CC->isOneValue()) { rewriteLoopBodyWithConditionConstant(NewLoop, VMap[LIC], Val, /*IsEqual=*/true); + } else rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/true); diff --git a/llvm/test/Transforms/LoopUnswitch/partial-unswitch-cost.ll b/llvm/test/Transforms/LoopUnswitch/partial-unswitch-cost.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopUnswitch/partial-unswitch-cost.ll @@ -0,0 +1,287 @@ +; RUN: opt -loop-unswitch -loop-unswitch-threshold=10 -verify-dom-info -verify-memoryssa -S -enable-new-pm=0 %s | FileCheck %s + +declare void @clobber() + +define i32 @no_partial_unswitch_size_too_large_no_mustprogress(i32* %ptr, i32 %N) { +; CHECK-LABEL: @no_partial_unswitch_size_too_large_no_mustprogress +; CHECK-LABEL: entry: +; CHECK-NEXT: br label %loop.header +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %lv = load i32, i32* %ptr + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %noclobber, label %clobber + +noclobber: + br label %loop.latch + +clobber: + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret i32 10 +} + +define i32 @partial_unswitch_shortcut_mustprogress(i32* %ptr, i32 %N) mustprogress { +; CHECK-LABEL: @partial_unswitch_shortcut_mustprogress +; CHECK-LABEL: entry: +; CHECK-NEXT: [[LV:%[0-9]+]] = load i32, i32* %ptr, align 4 +; CHECK-NEXT: [[C:%[0-9]+]] = icmp eq i32 [[LV]], 100 +; CHECK-NEXT: br i1 [[C]], label %[[CRIT_TO_EXIT:[a-z._]+]], label %[[CRIT_TO_HEADER:[a-z._]+]] +; +; CHECK: [[CRIT_TO_HEADER]]: +; CHECK-NEXT: br label %loop.header +; +; CHECK: [[CRIT_TO_EXIT]]: +; CHECK-NEXT: br label %exit +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %lv = load i32, i32* %ptr + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %noclobber, label %clobber + +noclobber: + br label %loop.latch + +clobber: + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret i32 10 +} + +define i32 @no_partial_unswitch_shortcut_mustprogress_exit_value_used(i32* %ptr, i32 %N) mustprogress { +; CHECK-LABEL: @no_partial_unswitch_shortcut_mustprogress_exit_value_use +; CHECK-LABEL: entry: +; CHECK-NEXT: br label %loop.header +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %red = phi i32 [ 0, %entry ], [ %red.next, %loop.latch ] + %lv = load i32, i32* %ptr + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %noclobber, label %clobber + +noclobber: + %red.add = add i32 %red, %lv + br label %loop.latch + +clobber: + %red.mul = mul i32 %red, %lv + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + br label %loop.latch + +loop.latch: + %red.next = phi i32 [ %red.add, %noclobber ], [ %red.mul, %clobber ] + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret i32 %red.next +} + +define i32 @no_partial_unswitch_shortcut_multi_exit(i32* %ptr, i32 %N, i1 %ec.1) mustprogress { +; CHECK-LABEL: @no_partial_unswitch_shortcut_multi_exit +; CHECK-LABEL: entry: +; CHECK-NEXT: br label %loop.header +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %lv = load i32, i32* %ptr + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %noclobber, label %clobber + +noclobber: + br i1 %ec.1, label %loop.latch, label %exit + +clobber: + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret i32 10 +} + + +define i32 @no_partial_unswitch_shortcut_mustprogress_store(i32* %ptr, i32* noalias %dst, i32 %N) mustprogress { +; CHECK-LABEL: @no_partial_unswitch_shortcut_mustprogress_store +; CHECK-LABEL: entry: +; CHECK-NEXT: br label %loop.header +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %lv = load i32, i32* %ptr + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %noclobber, label %clobber + +noclobber: + store i32 0, i32* %dst + br label %loop.latch + +clobber: + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret i32 10 +} + + +define i32 @no_partial_unswitch_shortcut_mustprogress_store2(i32* %ptr, i32* noalias %dst, i32 %N) mustprogress { +; CHECK-LABEL: @no_partial_unswitch_shortcut_mustprogress_store +; CHECK-LABEL: entry: +; CHECK-NEXT: br label %loop.header +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %lv = load i32, i32* %ptr + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %noclobber, label %clobber + +noclobber: + br label %loop.latch + +clobber: + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + br label %loop.latch + +loop.latch: + store i32 0, i32* %dst + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret i32 10 +} + + +define i32 @no_partial_unswitch_shortcut_mustprogress_store3(i32* %ptr, i32* noalias %dst, i32 %N) mustprogress { +; CHECK-LABEL: @no_partial_unswitch_shortcut_mustprogress_store +; CHECK-LABEL: entry: +; CHECK-NEXT: br label %loop.header +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + store i32 0, i32* %dst + %lv = load i32, i32* %ptr + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %noclobber, label %clobber + +noclobber: + br label %loop.latch + +clobber: + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + call void @clobber() + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret i32 10 +}