Index: llvm/include/llvm/Transforms/Scalar/LNICM.h =================================================================== --- /dev/null +++ llvm/include/llvm/Transforms/Scalar/LNICM.h @@ -0,0 +1,62 @@ +//===- LNICM.h - Loop Invariant Code Motion Pass -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass performs loop invariant code motion, attempting to remove as much +// code from the body of a loop as possible. It does this by either hoisting +// code into the preheader block, or by sinking code to the exit blocks if it is +// safe. This pass also promotes must-aliased memory locations in the loop to +// live in registers, thus hoisting and sinking "invariant" loads and stores. +// +// This pass uses alias analysis for two purposes: +// +// 1. Moving loop invariant loads and calls out of loops. If we can determine +// that a load or call inside of a loop never aliases anything stored to, +// we can hoist it or sink it like any other instruction. +// 2. Scalar Promotion of Memory - If there is a store instruction inside of +// the loop, we try to move the store to happen AFTER the loop instead of +// inside of the loop. This can only happen if a few conditions are true: +// A. The pointer stored through is loop invariant +// B. There are no stores or loads in the loop which _may_ alias the +// pointer. There are no calls in the loop which mod/ref the pointer. +// If these conditions are true, we can promote the loads and stores in the +// loop of the pointer to use a temporary alloca'd variable. We then use +// the SSAUpdater to construct the appropriate SSA form for the value. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_LNICM_H +#define LLVM_TRANSFORMS_SCALAR_LNICM_H + +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" + +namespace llvm { + +extern cl::opt SetLnicmMssaOptCap; +extern cl::opt SetLnicmMssaNoAccForPromotionCap; + +/// Performs Loop Invariant Code Motion Pass. +class LNICMPass : public PassInfoMixin { + unsigned LicmMssaOptCap; + unsigned LicmMssaNoAccForPromotionCap; + +public: + LNICMPass() + : LicmMssaOptCap(SetLnicmMssaOptCap), + LicmMssaNoAccForPromotionCap(SetLnicmMssaNoAccForPromotionCap) {} + LNICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap) + : LicmMssaOptCap(LicmMssaOptCap), + LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {} + PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); +}; +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_LNICM_H Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -158,6 +158,7 @@ #include "llvm/Transforms/Scalar/InstSimplifyPass.h" #include "llvm/Transforms/Scalar/JumpThreading.h" #include "llvm/Transforms/Scalar/LICM.h" +#include "llvm/Transforms/Scalar/LNICM.h" #include "llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h" #include "llvm/Transforms/Scalar/LoopDataPrefetch.h" #include "llvm/Transforms/Scalar/LoopDeletion.h" Index: llvm/lib/Passes/PassRegistry.def =================================================================== --- llvm/lib/Passes/PassRegistry.def +++ llvm/lib/Passes/PassRegistry.def @@ -387,6 +387,7 @@ LOOP_PASS("dot-ddg", DDGDotPrinterPass()) LOOP_PASS("invalidate", InvalidateAllAnalysesPass()) LOOP_PASS("licm", LICMPass()) +LOOP_PASS("lnicm", LNICMPass()) LOOP_PASS("loop-flatten", LoopFlattenPass()) LOOP_PASS("loop-idiom", LoopIdiomRecognizePass()) LOOP_PASS("loop-instsimplify", LoopInstSimplifyPass()) Index: llvm/lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/Scalar/CMakeLists.txt +++ llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -24,6 +24,7 @@ InstSimplifyPass.cpp JumpThreading.cpp LICM.cpp + LNICM.cpp LoopAccessAnalysisPrinter.cpp LoopSink.cpp LoopDeletion.cpp Index: llvm/lib/Transforms/Scalar/LNICM.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/Scalar/LNICM.cpp @@ -0,0 +1,465 @@ +//===-- LNICM.cpp - Loop Nest Invariant Code Motion Pass +//-------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass performs loop invariant code motion, attempting to remove as much +// code from the body of a loop as possible. It does this by either hoisting +// code into the preheader block, or by sinking code to the exit blocks if it is +// safe. This pass also promotes must-aliased memory locations in the loop to +// live in registers, thus hoisting and sinking "invariant" loads and stores. +// +// Hoisting operations out of loops is a canonicalization transform. It +// enables and simplifies subsequent optimizations in the middle-end. +// Rematerialization of hoisted instructions to reduce register pressure is the +// responsibility of the back-end, which has more accurate information about +// register pressure and also handles other optimizations than LNICM that +// increase live-ranges. +// +// This pass uses alias analysis for two purposes: +// +// 1. Moving loop invariant loads and calls out of loops. If we can determine +// that a load or call inside of a loop never aliases anything stored to, +// we can hoist it or sink it like any other instruction. +// 2. Scalar Promotion of Memory - If there is a store instruction inside of +// the loop, we try to move the store to happen AFTER the loop instead of +// inside of the loop. This can only happen if a few conditions are true: +// A. The pointer stored through is loop invariant +// B. There are no stores or loads in the loop which _may_ alias the +// pointer. There are no calls in the loop which mod/ref the pointer. +// If these conditions are true, we can promote the loads and stores in the +// loop of the pointer to use a temporary alloca'd variable. We then use +// the SSAUpdater to construct the appropriate SSA form for the value. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/LNICM.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/GuardUtils.h" +#include "llvm/Analysis/LazyBlockFrequencyInfo.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/Analysis/MustExecute.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/PredIteratorCache.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" +#include "llvm/Transforms/Utils/AssumeBundleBuilder.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" +#include +#include +using namespace llvm; + +#define DEBUG_TYPE "lnicm" + +/// Memory promotion is enabled by default. +static cl::opt + DisablePromotion("disable-lnicm-promotion", cl::Hidden, cl::init(false), + cl::desc("Disable memory promotion in LNICM pass")); + +static cl::opt ControlFlowHoisting( + "lnicm-control-flow-hoisting", cl::Hidden, cl::init(false), + cl::desc("Enable control flow (and PHI) hoisting in LNICM")); + +static cl::opt HoistSinkColdnessThreshold( + "lnicm-coldness-threshold", cl::Hidden, cl::init(4), + cl::desc("Relative coldness Threshold of hoisting/sinking destination " + "block for LNICM to be considered beneficial")); + +static cl::opt MaxNumUsesTraversed( + "lnicm-max-num-uses-traversed", cl::Hidden, cl::init(8), + cl::desc("Max num uses visited for identifying load " + "invariance in loop using invariant start (default = 8)")); + +// Default value of zero implies we use the regular alias set tracker mechanism +// instead of the cross product using AA to identify aliasing of the memory +// location we are interested in. +static cl::opt + LNICMN2Theshold("lnicm-n2-threshold", cl::Hidden, cl::init(0), + cl::desc("How many instruction to cross product using AA")); + +// Experimental option to allow imprecision in LNICM in pathological cases, in +// exchange for faster compile. This is to be removed if MemorySSA starts to +// address the same issue. This flag applies only when LNICM uses MemorySSA +// instead on AliasSetTracker. LNICM calls MemorySSAWalker's +// getClobberingMemoryAccess, up to the value of the Cap, getting perfect +// accuracy. Afterwards, LNICM will call into MemorySSA's getDefiningAccess, +// which may not be precise, since optimizeUses is capped. The result is +// correct, but we may not get as "far up" as possible to get which access is +// clobbering the one queried. +cl::opt llvm::SetLnicmMssaOptCap( + "lnicm-mssa-optimization-cap", cl::init(100), cl::Hidden, + cl::desc("Enable imprecision in LNICM in pathological cases, in exchange " + "for faster compile. Caps the MemorySSA clobbering calls.")); + +// Experimentally, memory promotion carries less importance than sinking and +// hoisting. Limit when we do promotion when using MemorySSA, in order to save +// compile time. +cl::opt llvm::SetLnicmMssaNoAccForPromotionCap( + "lnicm-mssa-max-acc-promotion", cl::init(250), cl::Hidden, + cl::desc("[LNICM & MemorySSA] When MSSA in LNICM is disabled, this has no " + "effect. When MSSA in LNICM is enabled, then this is the maximum " + "number of accesses allowed to be present in a loop in order to " + "enable memory promotion.")); + +static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L, + function_ref Fn); +static SmallVector, 0> +collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L); + +namespace { +struct LoopInvariantCodeMotion { + bool runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT, + BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, + TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA *MSSA, + OptimizationRemarkEmitter *ORE); + // bool runOnLoopNest(LoopNest *LN, AAResults *AA, LoopInfo *LI, DominatorTree + // *DT, + // BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, + // TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA + // *MSSA, OptimizationRemarkEmitter *ORE); + + LoopInvariantCodeMotion(unsigned LicmMssaOptCap, + unsigned LicmMssaNoAccForPromotionCap) + : LicmMssaOptCap(LicmMssaOptCap), + LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {} + +private: + unsigned LicmMssaOptCap; + unsigned LicmMssaNoAccForPromotionCap; + + std::unique_ptr + collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AAResults *AA); +}; +} // namespace + +PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &) { + // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis + // pass. Function analyses need to be preserved across loop transformations + // but ORE cannot be preserved (see comment before the pass definition). + OptimizationRemarkEmitter ORE(LN.getParent()); + + LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap); + + ArrayRef Loops = LN.getLoops(); + // unsigned NestDepth = LN.getNestDepth(); + // dbgs() << "depth = " << NestDepth << '\n'; + + bool Changed = false; + SmallPriorityWorklist Worklist; + appendLoopsToWorklist(Loops, Worklist); + while (!Worklist.empty()) { + Loop *L = Worklist.pop_back_val(); + Changed |= LICM.runOnLoop(L, &AR.AA, &AR.LI, &AR.DT, AR.BFI, &AR.TLI, + &AR.TTI, &AR.SE, AR.MSSA, &ORE); + } + + if (!Changed) + return PreservedAnalyses::all(); + + auto PA = getLoopPassPreservedAnalyses(); + + PA.preserve(); + PA.preserve(); + if (AR.MSSA) + PA.preserve(); + + return PA; +} + +/// Hoist expressions out of the specified loop. Note, alias info for inner +/// loop is not preserved so it is not a good idea to run LICM multiple +/// times on one loop. +bool LoopInvariantCodeMotion::runOnLoop( + Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT, + BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, + ScalarEvolution *SE, MemorySSA *MSSA, OptimizationRemarkEmitter *ORE) { + bool Changed = false; + + assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); + + // If this loop has metadata indicating that LICM is not to be performed then + // just exit. + if (hasDisableLICMTransformsHint(L)) { + return false; + } + + std::unique_ptr CurAST; + std::unique_ptr MSSAU; + std::unique_ptr Flags; + + // Don't sink stores from loops with coroutine suspend instructions. + // LICM would sink instructions into the default destination of + // the coroutine switch. The default destination of the switch is to + // handle the case where the coroutine is suspended, by which point the + // coroutine frame may have been destroyed. No instruction can be sunk there. + // FIXME: This would unfortunately hurt the performance of coroutines, however + // there is currently no general solution for this. Similar issues could also + // potentially happen in other passes where instructions are being moved + // across that edge. + bool HasCoroSuspendInst = llvm::any_of(L->getBlocks(), [](BasicBlock *BB) { + return llvm::any_of(*BB, [](Instruction &I) { + IntrinsicInst *II = dyn_cast(&I); + return II && II->getIntrinsicID() == Intrinsic::coro_suspend; + }); + }); + + if (!MSSA) { + LLVM_DEBUG(dbgs() << "LNICM: Using Alias Set Tracker.\n"); + CurAST = collectAliasInfoForLoop(L, LI, AA); + Flags = std::make_unique( + LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*IsSink=*/true); + } else { + LLVM_DEBUG(dbgs() << "LNICM: Using MemorySSA.\n"); + MSSAU = std::make_unique(MSSA); + Flags = std::make_unique( + LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*IsSink=*/true, L, MSSA); + } + + // Get the preheader block to move instructions into... + BasicBlock *Preheader = L->getLoopPreheader(); + + // Compute loop safety information. + ICFLoopSafetyInfo SafetyInfo; + SafetyInfo.computeLoopSafetyInfo(L); + + // We want to visit all of the instructions in this loop... that are not parts + // of our subloops (they have already had their invariants hoisted out of + // their loop, into this loop, so there is no need to process the BODIES of + // the subloops). + // + // Traverse the body of the loop in depth first order on the dominator tree so + // that we are guaranteed to see definitions before we see uses. This allows + // us to sink instructions in one pass, without iteration. After sinking + // instructions, we perform another pass to hoist them out of the loop. + if (L->hasDedicatedExits()) + Changed |= + sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L, + CurAST.get(), MSSAU.get(), &SafetyInfo, *Flags.get(), ORE); + Flags->setIsSink(false); + if (Preheader) + Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L, + CurAST.get(), MSSAU.get(), SE, &SafetyInfo, + *Flags.get(), ORE); + + // Now that all loop invariants have been removed from the loop, promote any + // memory references to scalars that we can. + // Don't sink stores from loops without dedicated block exits. Exits + // containing indirect branches are not transformed by loop simplify, + // make sure we catch that. An additional load may be generated in the + // preheader for SSA updater, so also avoid sinking when no preheader + // is available. + if (!DisablePromotion && Preheader && L->hasDedicatedExits() && + !Flags->tooManyMemoryAccesses() && !HasCoroSuspendInst) { + // Figure out the loop exits and their insertion points + SmallVector ExitBlocks; + L->getUniqueExitBlocks(ExitBlocks); + + // We can't insert into a catchswitch. + bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) { + return isa(Exit->getTerminator()); + }); + + if (!HasCatchSwitch) { + SmallVector InsertPts; + SmallVector MSSAInsertPts; + InsertPts.reserve(ExitBlocks.size()); + if (MSSAU) + MSSAInsertPts.reserve(ExitBlocks.size()); + for (BasicBlock *ExitBlock : ExitBlocks) { + InsertPts.push_back(&*ExitBlock->getFirstInsertionPt()); + if (MSSAU) + MSSAInsertPts.push_back(nullptr); + } + + PredIteratorCache PIC; + + bool Promoted = false; + if (CurAST.get()) { + // Loop over all of the alias sets in the tracker object. + for (AliasSet &AS : *CurAST) { + // We can promote this alias set if it has a store, if it is a "Must" + // alias set, if the pointer is loop invariant, and if we are not + // eliminating any volatile loads or stores. + if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || + !L->isLoopInvariant(AS.begin()->getValue())) + continue; + + assert( + !AS.empty() && + "Must alias set should have at least one pointer element in it!"); + + SmallSetVector PointerMustAliases; + for (const auto &ASI : AS) + PointerMustAliases.insert(ASI.getValue()); + + Promoted |= promoteLoopAccessesToScalars( + PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI, + DT, TLI, L, CurAST.get(), MSSAU.get(), &SafetyInfo, ORE); + } + } else { + // Promoting one set of accesses may make the pointers for another set + // loop invariant, so run this in a loop (with the MaybePromotable set + // decreasing in size over time). + bool LocalPromoted; + do { + LocalPromoted = false; + for (const SmallSetVector &PointerMustAliases : + collectPromotionCandidates(MSSA, AA, L)) { + LocalPromoted |= promoteLoopAccessesToScalars( + PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, + LI, DT, TLI, L, /*AST*/ nullptr, MSSAU.get(), &SafetyInfo, ORE); + } + Promoted |= LocalPromoted; + } while (LocalPromoted); + } + + // Once we have promoted values across the loop body we have to + // recursively reform LCSSA as any nested loop may now have values defined + // within the loop used in the outer loop. + // FIXME: This is really heavy handed. It would be a bit better to use an + // SSAUpdater strategy during promotion that was LCSSA aware and reformed + // it as it went. + if (Promoted) + formLCSSARecursively(*L, *DT, LI, SE); + + Changed |= Promoted; + } + } + + // Check that neither this loop nor its parent have had LCSSA broken. LICM is + // specifically moving instructions across the loop boundary and so it is + // especially in need of sanity checking here. + assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!"); + assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) && + "Parent loop not left in LCSSA form after LICM!"); + + if (MSSAU.get() && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + + if (Changed && SE) + SE->forgetLoopDispositions(L); + return Changed; +} + +static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L, + function_ref Fn) { + for (const BasicBlock *BB : L->blocks()) + if (const auto *Accesses = MSSA->getBlockAccesses(BB)) + for (const auto &Access : *Accesses) + if (const auto *MUD = dyn_cast(&Access)) + Fn(MUD->getMemoryInst()); +} + +static SmallVector, 0> +collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) { + AliasSetTracker AST(*AA); + + auto IsPotentiallyPromotable = [L](const Instruction *I) { + if (const auto *SI = dyn_cast(I)) + return L->isLoopInvariant(SI->getPointerOperand()); + if (const auto *LI = dyn_cast(I)) + return L->isLoopInvariant(LI->getPointerOperand()); + return false; + }; + + // Populate AST with potentially promotable accesses and remove them from + // MaybePromotable, so they will not be checked again on the next iteration. + SmallPtrSet AttemptingPromotion; + foreachMemoryAccess(MSSA, L, [&](Instruction *I) { + if (IsPotentiallyPromotable(I)) { + AttemptingPromotion.insert(I); + AST.add(I); + } + }); + + // We're only interested in must-alias sets that contain a mod. + SmallVector Sets; + for (AliasSet &AS : AST) + if (!AS.isForwardingAliasSet() && AS.isMod() && AS.isMustAlias()) + Sets.push_back(&AS); + + if (Sets.empty()) + return {}; // Nothing to promote... + + // Discard any sets for which there is an aliasing non-promotable access. + foreachMemoryAccess(MSSA, L, [&](Instruction *I) { + if (AttemptingPromotion.contains(I)) + return; + + llvm::erase_if(Sets, [&](const AliasSet *AS) { + return AS->aliasesUnknownInst(I, *AA); + }); + }); + + SmallVector, 0> Result; + for (const AliasSet *Set : Sets) { + SmallSetVector PointerMustAliases; + for (const auto &ASI : *Set) + PointerMustAliases.insert(ASI.getValue()); + Result.push_back(std::move(PointerMustAliases)); + } + + return Result; +} + +/// Returns an owning pointer to an alias set which incorporates aliasing info +/// from L and all subloops of L. +std::unique_ptr +LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI, + AAResults *AA) { + auto CurAST = std::make_unique(*AA); + + // Add everything from all the sub loops. + for (Loop *InnerL : L->getSubLoops()) + for (BasicBlock *BB : InnerL->blocks()) + CurAST->add(*BB); + + // And merge in this loop (without anything from inner loops). + for (BasicBlock *BB : L->blocks()) + if (LI->getLoopFor(BB) == L) + CurAST->add(*BB); + + return CurAST; +} Index: llvm/test/Transforms/LICM/call-hoisting.ll =================================================================== --- llvm/test/Transforms/LICM/call-hoisting.ll +++ llvm/test/Transforms/LICM/call-hoisting.ll @@ -1,5 +1,6 @@ ; RUN: opt -S -basic-aa -licm %s | FileCheck %s ; RUN: opt -aa-pipeline=basic-aa -passes='require,require,require,require,loop(licm)' < %s -S | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='require,require,require,require,loop(lnicm)' < %s -S | FileCheck %s declare i32 @load(i32* %p) argmemonly readonly nounwind Index: llvm/test/Transforms/LICM/hoist-deref-load.ll =================================================================== --- llvm/test/Transforms/LICM/hoist-deref-load.ll +++ llvm/test/Transforms/LICM/hoist-deref-load.ll @@ -1,8 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -basic-aa -licm < %s | FileCheck %s ; RUN: opt -aa-pipeline=basic-aa -passes='require,loop(loop-simplifycfg,licm)' -S < %s | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='require,loop(loop-simplifycfg,lnicm)' -S < %s | FileCheck %s ; RUN: opt -S -basic-aa -licm -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s ; RUN: opt -aa-pipeline=basic-aa -passes='require,loop-mssa(loop-simplifycfg,licm)' -verify-memoryssa -S < %s | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='require,loop-mssa(loop-simplifycfg,lnicm)' -verify-memoryssa -S < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu"