Index: llvm/include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -151,7 +151,14 @@ BlockFrequencyInfo *, TargetLibraryInfo *, TargetTransformInfo *, Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *, - SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *); + SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, Loop *); + +bool sinkRegionForLoopNest(DomTreeNode *, AAResults *, LoopInfo *, + DominatorTree *, BlockFrequencyInfo *, + TargetLibraryInfo *, TargetTransformInfo *, Loop *, + AliasSetTracker *, MemorySSAUpdater *, + ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, + OptimizationRemarkEmitter *); /// Walk the specified region of the CFG (defined by all blocks /// dominated by the specified block, and that are in the current loop) in depth Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -151,7 +151,8 @@ static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, - TargetTransformInfo *TTI, bool &FreeInLoop); + TargetTransformInfo *TTI, bool &FreeInLoop, + bool LoopNestMode); static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, @@ -436,8 +437,13 @@ // instructions, we perform another pass to hoist them out of the loop. if (L->hasDedicatedExits()) Changed |= - sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L, - CurAST.get(), MSSAU.get(), &SafetyInfo, *Flags.get(), ORE); + LoopNestMode + ? sinkRegionForLoopNest(DT->getNode(L->getHeader()), AA, LI, DT, + BFI, TLI, TTI, L, CurAST.get(), MSSAU.get(), + &SafetyInfo, *Flags.get(), ORE) + : sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, + L, CurAST.get(), MSSAU.get(), &SafetyInfo, + *Flags.get(), ORE, L); Flags->setIsSink(false); if (Preheader) Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L, @@ -555,7 +561,7 @@ Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo, SinkAndHoistLICMFlags &Flags, - OptimizationRemarkEmitter *ORE) { + OptimizationRemarkEmitter *ORE, Loop *OutermostLoop) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && @@ -598,8 +604,10 @@ // operands of the instruction are loop invariant. // bool FreeInLoop = false; + bool LoopNestMode = OutermostLoop != nullptr; if (!I.mayHaveSideEffects() && - isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) && + isNotUsedOrFreeInLoop(I, LoopNestMode ? OutermostLoop : CurLoop, + SafetyInfo, TTI, FreeInLoop, LoopNestMode) && canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags, ORE)) { if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) { @@ -618,6 +626,26 @@ return Changed; } +bool llvm::sinkRegionForLoopNest( + DomTreeNode *N, AAResults *AA, LoopInfo *LI, DominatorTree *DT, + BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, + Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, + ICFLoopSafetyInfo *SafetyInfo, SinkAndHoistLICMFlags &Flags, + OptimizationRemarkEmitter *ORE) { + + bool Changed = false; + SmallPriorityWorklist Worklist; + Worklist.insert(CurLoop); + appendLoopsToWorklist(*CurLoop, Worklist); + while (!Worklist.empty()) { + Loop *L = Worklist.pop_back_val(); + Changed |= + sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L, + CurAST, MSSAU, SafetyInfo, Flags, ORE, CurLoop); + } + return Changed; +} + namespace { // This is a helper class for hoistRegion to make it able to hoist control flow // in order to be able to hoist phis. The way this works is that we initially @@ -1443,7 +1471,8 @@ /// (e.g., a GEP can be folded into a load as an addressing mode in the loop). static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, - TargetTransformInfo *TTI, bool &FreeInLoop) { + TargetTransformInfo *TTI, bool &FreeInLoop, + bool LoopNestMode) { const auto &BlockColors = SafetyInfo->getBlockColors(); bool IsFree = isFreeInLoop(I, CurLoop, TTI); for (const User *U : I.users()) { @@ -1460,6 +1489,15 @@ if (!BlockColors.empty() && BlockColors.find(const_cast(BB))->second.size() != 1) return false; + + if (LoopNestMode) { + while (isa(UI) && UI->hasOneUser() && + UI->getNumOperands() == 1) { + if (!CurLoop->contains(UI)) + break; + UI = cast(U->user_back()); + } + } } if (CurLoop->contains(UI)) { Index: llvm/test/Transforms/LICM/lnicm-sink.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/lnicm-sink.ll @@ -0,0 +1,70 @@ +; RUN: opt -passes='loop(licm)' -S %s | FileCheck %s --check-prefixes CHECK,LICM +; RUN: opt -passes='loop(lnicm)' -S %s | FileCheck %s --check-prefixes CHECK,LNICM + +; This test represents the following function: +; +; double sin(double); +; int abs(int); +; double test(double x, int y[10]) { +; double t = 0; int s = 0; +; for (int i = 0; i < 10; i++) { +; for (int j = 0; j < 10; j++) { +; t = sin(x); +; s = abs(i); +; } +; y[i] = s; +; } +; return t; +; } +; +; We only want to sink the call of sin out of the loop nest. +; LICM also sinks the call of abs out of j-loop, but LNICM doesn't do so +; to try to make a perfect loop nest. (though y[i] = s; still prevents the +; loop nest from being a perfect loop nest in this test case) + +define dso_local double @test(double %0, i32* noalias %1) { + br label %3 + +3: ; preds = %2, %9 + %.016 = phi i32 [ 0, %2 ], [ %12, %9 ] + br label %4 + +; CHECK: 4: +; LNICM: call i32 @abs(i32 %.016) +; LICM-NOT: call i32 @abs(i32 %.016) +4: ; preds = %3, %4 + %.05 = phi i32 [ 0, %3 ], [ %7, %4 ] + %5 = call double @sin(double %0) + %6 = call i32 @abs(i32 %.016) + %7 = add nsw i32 %.05, 1 + %8 = icmp slt i32 %7, 10 + br i1 %8, label %4, label %9 + +; LICM: 7: +; LICM: call i32 @abs(i32 %.016) +; LNICM: 8: +; LNICM-NOT: call i32 @abs(i32 %.016) +9: ; preds = %4 + %.14.lcssa = phi i32 [ %6, %4 ] + %.1.lcssa = phi double [ %5, %4 ] + %10 = sext i32 %.016 to i64 + %11 = getelementptr inbounds i32, i32* %1, i64 %10 + store i32 %.14.lcssa, i32* %11, align 4 + %12 = add nsw i32 %.016, 1 + %13 = icmp slt i32 %12, 10 + br i1 %13, label %3, label %14 + +; CHECK: 13: +; CHECK: call double @sin(double %0) +14: ; preds = %9 + %.02.lcssa = phi double [ %.1.lcssa, %9 ] + ret double %.02.lcssa +} + +; Function Attrs: nounwind readnone +declare dso_local double @sin(double) #0 + +; Function Attrs: nounwind readnone +declare dso_local i32 @abs(i32) #0 + +attributes #0 = { nounwind readnone }