diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -484,6 +484,38 @@ const SmallVectorImpl &PointerChecks, SCEVExpander &Expander); +/// Struct to hold information about a partially invariant condition. +struct IVConditionInfo { + /// Instructions that need to be duplicated and checked for the unswitching + /// condition. + SmallVector InstToDuplicate; + + /// Constant to indicate for which value the condition is invariant. + Constant *KnownValue = nullptr; + + /// True if the partially invariant path is no-op (=does not have any + /// side-effects and no loop value is used outside the loop). + bool PathIsNoop = true; + + /// If the partially invariant path reaches a single exit block, ExitForPath + /// is set to that block. Otherwise it is nullptr. + BasicBlock *ExitForPath = nullptr; +}; + +/// Check if the loop header has a conditional branch that is not +/// loop-invariant, because it involves load instructions. If all paths from +/// either the true or false successor to the header or loop exists do not +/// modify the memory feeding the condition, perform 'partial unswitching'. That +/// is, duplicate the instructions feeding the condition in the pre-header. Then +/// unswitch on the duplicated condition. The condition is now known in the +/// unswitched version for the 'invariant' path through the original loop. +/// +/// If the branch condition of the header is partially invariant, return a pair +/// containing the instructions to duplicate and a boolean Constant to update +/// the condition in the loops created for the true or false successors. +Optional hasPartialIVCondition(Loop *L, unsigned MSSAThreshold, + MemorySSA *MSSA, AAResults *AA); + } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_LOOPUTILS_H diff --git a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp --- a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -640,211 +640,6 @@ return false; } -namespace { -/// Struct to hold information about a partially invariant condition. -struct IVConditionInfo { - /// Instructions that need to be duplicated and checked for the unswitching - /// condition. - SmallVector InstToDuplicate; - - /// Constant to indicate for which value the condition is invariant. - Constant *KnownValue = nullptr; - - /// True if the partially invariant path is no-op (=does not have any - /// side-effects and no loop value is used outside the loop). - bool PathIsNoop = true; - - /// If the partially invariant path reaches a single exit block, ExitForPath - /// is set to that block. Otherwise it is nullptr. - BasicBlock *ExitForPath = nullptr; -}; -} // namespace - -/// Check if the loop header has a conditional branch that is not -/// loop-invariant, because it involves load instructions. If all paths from -/// either the true or false successor to the header or loop exists do not -/// modify the memory feeding the condition, perform 'partial unswitching'. That -/// is, duplicate the instructions feeding the condition in the pre-header. Then -/// unswitch on the duplicated condition. The condition is now known in the -/// unswitched version for the 'invariant' path through the original loop. -/// -/// If the branch condition of the header is partially invariant, return a pair -/// containing the instructions to duplicate and a boolean Constant to update -/// the condition in the loops created for the true or false successors. -static Optional hasPartialIVCondition(Loop *L, MemorySSA &MSSA, - AAResults *AA) { - - auto *TI = dyn_cast(L->getHeader()->getTerminator()); - if (!TI || !TI->isConditional()) - return {}; - - auto *CondI = dyn_cast(TI->getCondition()); - // The case with the condition outside the loop should already be handled - // earlier. - if (!CondI || !L->contains(CondI)) - return {}; - - SmallVector InstToDuplicate; - InstToDuplicate.push_back(CondI); - - SmallVector WorkList; - WorkList.append(CondI->op_begin(), CondI->op_end()); - - SmallVector AccessesToCheck; - SmallVector AccessedLocs; - while (!WorkList.empty()) { - Instruction *I = dyn_cast(WorkList.pop_back_val()); - if (!I || !L->contains(I)) - continue; - - // TODO: support additional instructions. - if (!isa(I) && !isa(I)) - return {}; - - // Do not duplicate volatile and atomic loads. - if (auto *LI = dyn_cast(I)) - if (LI->isVolatile() || LI->isAtomic()) - return {}; - - InstToDuplicate.push_back(I); - if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) { - if (auto *MemUse = dyn_cast_or_null(MA)) { - // Queue the defining access to check for alias checks. - AccessesToCheck.push_back(MemUse->getDefiningAccess()); - AccessedLocs.push_back(MemoryLocation::get(I)); - } else { - // MemoryDefs may clobber the location or may be atomic memory - // operations. Bail out. - return {}; - } - } - WorkList.append(I->op_begin(), I->op_end()); - } - - if (InstToDuplicate.size() <= 1) - return {}; - - SmallVector ExitingBlocks; - L->getExitingBlocks(ExitingBlocks); - auto HasNoClobbersOnPath = - [L, AA, &AccessedLocs, &ExitingBlocks, - &InstToDuplicate](BasicBlock *Succ, BasicBlock *Header, - SmallVector AccessesToCheck) - -> Optional { - IVConditionInfo Info; - // First, collect all blocks in the loop that are on a patch from Succ - // to the header. - SmallVector WorkList; - WorkList.push_back(Succ); - WorkList.push_back(Header); - SmallPtrSet Seen; - Seen.insert(Header); - Info.PathIsNoop &= - all_of(*Header, [](Instruction &I) { return !I.mayHaveSideEffects(); }); - - while (!WorkList.empty()) { - BasicBlock *Current = WorkList.pop_back_val(); - if (!L->contains(Current)) - continue; - const auto &SeenIns = Seen.insert(Current); - if (!SeenIns.second) - continue; - - Info.PathIsNoop &= all_of( - *Current, [](Instruction &I) { return !I.mayHaveSideEffects(); }); - WorkList.append(succ_begin(Current), succ_end(Current)); - } - - // Require at least 2 blocks on a path through the loop. This skips - // paths that directly exit the loop. - if (Seen.size() < 2) - return {}; - - // Next, check if there are any MemoryDefs that are on the path through - // the loop (in the Seen set) and they may-alias any of the locations in - // AccessedLocs. If that is the case, they may modify the condition and - // partial unswitching is not possible. - SmallPtrSet SeenAccesses; - while (!AccessesToCheck.empty()) { - MemoryAccess *Current = AccessesToCheck.pop_back_val(); - auto SeenI = SeenAccesses.insert(Current); - if (!SeenI.second || !Seen.contains(Current->getBlock())) - continue; - - // Bail out if exceeded the threshold. - if (SeenAccesses.size() >= MSSAThreshold) - return {}; - - // MemoryUse are read-only accesses. - if (isa(Current)) - continue; - - // For a MemoryDef, check if is aliases any of the location feeding - // the original condition. - if (auto *CurrentDef = dyn_cast(Current)) { - if (any_of(AccessedLocs, [AA, CurrentDef](MemoryLocation &Loc) { - return isModSet( - AA->getModRefInfo(CurrentDef->getMemoryInst(), Loc)); - })) - return {}; - } - - for (Use &U : Current->uses()) - AccessesToCheck.push_back(cast(U.getUser())); - } - - // We could also allow loops with known trip counts without mustprogress, - // but ScalarEvolution may not be available. - Info.PathIsNoop &= - L->getHeader()->getParent()->mustProgress() || hasMustProgress(L); - - // If the path is considered a no-op so far, check if it reaches a - // single exit block without any phis. This ensures no values from the - // loop are used outside of the loop. - if (Info.PathIsNoop) { - for (auto *Exiting : ExitingBlocks) { - if (!Seen.contains(Exiting)) - continue; - for (auto *Succ : successors(Exiting)) { - if (L->contains(Succ)) - continue; - - Info.PathIsNoop &= llvm::empty(Succ->phis()) && - (!Info.ExitForPath || Info.ExitForPath == Succ); - if (!Info.PathIsNoop) - break; - assert((!Info.ExitForPath || Info.ExitForPath == Succ) && - "cannot have multiple exit blocks"); - Info.ExitForPath = Succ; - } - } - } - if (!Info.ExitForPath) - Info.PathIsNoop = false; - - Info.InstToDuplicate = InstToDuplicate; - return Info; - }; - - // If we branch to the same successor, partial unswitching will not be - // beneficial. - if (TI->getSuccessor(0) == TI->getSuccessor(1)) - return {}; - - if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(0), L->getHeader(), - AccessesToCheck)) { - Info->KnownValue = ConstantInt::getTrue(TI->getContext()); - return Info; - } - if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(1), L->getHeader(), - AccessesToCheck)) { - Info->KnownValue = ConstantInt::getFalse(TI->getContext()); - return Info; - } - - return {}; -} - /// Do actual work and unswitch loop if possible and profitable. bool LoopUnswitch::processCurrentLoop() { bool Changed = false; @@ -1052,7 +847,8 @@ // metadata, to avoid unswitching the same loop multiple times. if (MSSA && !findOptionMDForLoop(CurrentLoop, "llvm.loop.unswitch.partial.disable")) { - if (auto Info = hasPartialIVCondition(CurrentLoop, *MSSA, AA)) { + if (auto Info = + llvm::hasPartialIVCondition(CurrentLoop, MSSAThreshold, MSSA, AA)) { assert(!Info->InstToDuplicate.empty() && "need at least a partially invariant condition"); LLVM_DEBUG(dbgs() << "loop-unswitch: Found partially invariant condition " diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1705,3 +1705,189 @@ FirstInst = GetFirstInst(FirstInst, Check, Loc); return std::make_pair(FirstInst, Check); } + +/// Check if the loop header has a conditional branch that is not +/// loop-invariant, because it involves load instructions. If all paths from +/// either the true or false successor to the header or loop exists do not +/// modify the memory feeding the condition, perform 'partial unswitching'. That +/// is, duplicate the instructions feeding the condition in the pre-header. Then +/// unswitch on the duplicated condition. The condition is now known in the +/// unswitched version for the 'invariant' path through the original loop. +/// +/// If the branch condition of the header is partially invariant, return a pair +/// containing the instructions to duplicate and a boolean Constant to update +/// the condition in the loops created for the true or false successors. +Optional llvm::hasPartialIVCondition(Loop *L, + unsigned MSSAThreshold, + MemorySSA *MSSA, + AAResults *AA) { + auto *TI = dyn_cast(L->getHeader()->getTerminator()); + if (!TI || !TI->isConditional()) + return {}; + + auto *CondI = dyn_cast(TI->getCondition()); + // The case with the condition outside the loop should already be handled + // earlier. + if (!CondI || !L->contains(CondI)) + return {}; + + SmallVector InstToDuplicate; + InstToDuplicate.push_back(CondI); + + SmallVector WorkList; + WorkList.append(CondI->op_begin(), CondI->op_end()); + + SmallVector AccessesToCheck; + SmallVector AccessedLocs; + while (!WorkList.empty()) { + Instruction *I = dyn_cast(WorkList.pop_back_val()); + if (!I || !L->contains(I)) + continue; + + // TODO: support additional instructions. + if (!isa(I) && !isa(I)) + return {}; + + // Do not duplicate volatile and atomic loads. + if (auto *LI = dyn_cast(I)) + if (LI->isVolatile() || LI->isAtomic()) + return {}; + + InstToDuplicate.push_back(I); + if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) { + if (auto *MemUse = dyn_cast_or_null(MA)) { + // Queue the defining access to check for alias checks. + AccessesToCheck.push_back(MemUse->getDefiningAccess()); + AccessedLocs.push_back(MemoryLocation::get(I)); + } else { + // MemoryDefs may clobber the location or may be atomic memory + // operations. Bail out. + return {}; + } + } + WorkList.append(I->op_begin(), I->op_end()); + } + + if (InstToDuplicate.empty()) + return {}; + + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + auto HasNoClobbersOnPath = + [L, AA, &AccessedLocs, &ExitingBlocks, &InstToDuplicate, + MSSAThreshold](BasicBlock *Succ, BasicBlock *Header, + SmallVector AccessesToCheck) + -> Optional { + IVConditionInfo Info; + // First, collect all blocks in the loop that are on a patch from Succ + // to the header. + SmallVector WorkList; + WorkList.push_back(Succ); + WorkList.push_back(Header); + SmallPtrSet Seen; + Seen.insert(Header); + Info.PathIsNoop &= + all_of(*Header, [](Instruction &I) { return !I.mayHaveSideEffects(); }); + + while (!WorkList.empty()) { + BasicBlock *Current = WorkList.pop_back_val(); + if (!L->contains(Current)) + continue; + const auto &SeenIns = Seen.insert(Current); + if (!SeenIns.second) + continue; + + Info.PathIsNoop &= all_of( + *Current, [](Instruction &I) { return !I.mayHaveSideEffects(); }); + WorkList.append(succ_begin(Current), succ_end(Current)); + } + + // Require at least 2 blocks on a path through the loop. This skips + // paths that directly exit the loop. + if (Seen.size() < 2) + return {}; + + // Next, check if there are any MemoryDefs that are on the path through + // the loop (in the Seen set) and they may-alias any of the locations in + // AccessedLocs. If that is the case, they may modify the condition and + // partial unswitching is not possible. + SmallPtrSet SeenAccesses; + while (!AccessesToCheck.empty()) { + MemoryAccess *Current = AccessesToCheck.pop_back_val(); + auto SeenI = SeenAccesses.insert(Current); + if (!SeenI.second || !Seen.contains(Current->getBlock())) + continue; + + // Bail out if exceeded the threshold. + if (SeenAccesses.size() >= MSSAThreshold) + return {}; + + // MemoryUse are read-only accesses. + if (isa(Current)) + continue; + + // For a MemoryDef, check if is aliases any of the location feeding + // the original condition. + if (auto *CurrentDef = dyn_cast(Current)) { + if (any_of(AccessedLocs, [AA, CurrentDef](MemoryLocation &Loc) { + return isModSet( + AA->getModRefInfo(CurrentDef->getMemoryInst(), Loc)); + })) + return {}; + } + + for (Use &U : Current->uses()) + AccessesToCheck.push_back(cast(U.getUser())); + } + + // We could also allow loops with known trip counts without mustprogress, + // but ScalarEvolution may not be available. + Info.PathIsNoop &= + L->getHeader()->getParent()->mustProgress() || hasMustProgress(L); + + // If the path is considered a no-op so far, check if it reaches a + // single exit block without any phis. This ensures no values from the + // loop are used outside of the loop. + if (Info.PathIsNoop) { + for (auto *Exiting : ExitingBlocks) { + if (!Seen.contains(Exiting)) + continue; + for (auto *Succ : successors(Exiting)) { + if (L->contains(Succ)) + continue; + + Info.PathIsNoop &= llvm::empty(Succ->phis()) && + (!Info.ExitForPath || Info.ExitForPath == Succ); + if (!Info.PathIsNoop) + break; + assert((!Info.ExitForPath || Info.ExitForPath == Succ) && + "cannot have multiple exit blocks"); + Info.ExitForPath = Succ; + } + } + } + if (!Info.ExitForPath) + Info.PathIsNoop = false; + + Info.InstToDuplicate = InstToDuplicate; + return Info; + }; + + // If we branch to the same successor, partial unswitching will not be + // beneficial. + if (TI->getSuccessor(0) == TI->getSuccessor(1)) + return {}; + + if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(0), L->getHeader(), + AccessesToCheck)) { + Info->KnownValue = ConstantInt::getTrue(TI->getContext()); + return Info; + } + if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(1), L->getHeader(), + AccessesToCheck)) { + Info->KnownValue = ConstantInt::getFalse(TI->getContext()); + return Info; + } + + return {}; +}