Index: llvm/include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -151,7 +151,17 @@ BlockFrequencyInfo *, TargetLibraryInfo *, TargetTransformInfo *, Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *, - SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *); + SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, + Loop * = nullptr); + +/// Call sinkRegion on loops contained within the specified loop +/// in order from innermost to outermost. +bool sinkRegionForLoopNest(DomTreeNode *, AAResults *, LoopInfo *, + DominatorTree *, BlockFrequencyInfo *, + TargetLibraryInfo *, TargetTransformInfo *, Loop *, + AliasSetTracker *, MemorySSAUpdater *, + ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, + OptimizationRemarkEmitter *); /// Walk the specified region of the CFG (defined by all blocks /// dominated by the specified block, and that are in the current loop) in depth Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -151,7 +151,8 @@ static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, - TargetTransformInfo *TTI, bool &FreeInLoop); + TargetTransformInfo *TTI, bool &FreeInLoop, + bool LoopNestMode); static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, @@ -436,8 +437,13 @@ // instructions, we perform another pass to hoist them out of the loop. if (L->hasDedicatedExits()) Changed |= - sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L, - CurAST.get(), MSSAU.get(), &SafetyInfo, *Flags.get(), ORE); + LoopNestMode + ? sinkRegionForLoopNest(DT->getNode(L->getHeader()), AA, LI, DT, + BFI, TLI, TTI, L, CurAST.get(), MSSAU.get(), + &SafetyInfo, *Flags.get(), ORE) + : sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, + L, CurAST.get(), MSSAU.get(), &SafetyInfo, + *Flags.get(), ORE); Flags->setIsSink(false); if (Preheader) Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L, @@ -555,7 +561,7 @@ Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo, SinkAndHoistLICMFlags &Flags, - OptimizationRemarkEmitter *ORE) { + OptimizationRemarkEmitter *ORE, Loop *OutermostLoop) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && @@ -598,8 +604,10 @@ // operands of the instruction are loop invariant. // bool FreeInLoop = false; + bool LoopNestMode = OutermostLoop != nullptr; if (!I.mayHaveSideEffects() && - isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) && + isNotUsedOrFreeInLoop(I, LoopNestMode ? OutermostLoop : CurLoop, + SafetyInfo, TTI, FreeInLoop, LoopNestMode) && canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags, ORE)) { if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) { @@ -618,6 +626,26 @@ return Changed; } +bool llvm::sinkRegionForLoopNest( + DomTreeNode *N, AAResults *AA, LoopInfo *LI, DominatorTree *DT, + BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, + Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, + ICFLoopSafetyInfo *SafetyInfo, SinkAndHoistLICMFlags &Flags, + OptimizationRemarkEmitter *ORE) { + + bool Changed = false; + SmallPriorityWorklist Worklist; + Worklist.insert(CurLoop); + appendLoopsToWorklist(*CurLoop, Worklist); + while (!Worklist.empty()) { + Loop *L = Worklist.pop_back_val(); + Changed |= + sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L, + CurAST, MSSAU, SafetyInfo, Flags, ORE, CurLoop); + } + return Changed; +} + namespace { // This is a helper class for hoistRegion to make it able to hoist control flow // in order to be able to hoist phis. The way this works is that we initially @@ -1443,7 +1471,8 @@ /// (e.g., a GEP can be folded into a load as an addressing mode in the loop). static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, - TargetTransformInfo *TTI, bool &FreeInLoop) { + TargetTransformInfo *TTI, bool &FreeInLoop, + bool LoopNestMode) { const auto &BlockColors = SafetyInfo->getBlockColors(); bool IsFree = isFreeInLoop(I, CurLoop, TTI); for (const User *U : I.users()) { @@ -1460,6 +1489,15 @@ if (!BlockColors.empty() && BlockColors.find(const_cast(BB))->second.size() != 1) return false; + + if (LoopNestMode) { + while (isa(UI) && UI->hasOneUser() && + UI->getNumOperands() == 1) { + if (!CurLoop->contains(UI)) + break; + UI = cast(U->user_back()); + } + } } if (CurLoop->contains(UI)) { Index: llvm/test/Transforms/LICM/lnicm-sink.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/lnicm-sink.ll @@ -0,0 +1,68 @@ +; RUN: opt -passes='loop(licm)' -S %s | FileCheck %s --check-prefixes CHECK,LICM +; RUN: opt -passes='loop(lnicm)' -S %s | FileCheck %s --check-prefixes CHECK,LNICM + +; This test represents the following function: +; +; double sin(double); +; int abs(int); +; double test(double x, int y[10]) { +; double t = 0; int s = 0; +; for (int i = 0; i < 10; i++) { +; for (int j = 0; j < 10; j++) { +; t = sin(x); +; s = abs(i); +; } +; y[i] = s; +; } +; return t; +; } +; +; We only want to sink the call of sin out of the loop nest. +; LICM also sinks the call of abs out of j-loop, but LNICM doesn't do so +; to try to make a perfect loop nest. (though y[i] = s; still prevents the +; loop nest from being a perfect loop nest in this test case) + +define dso_local double @test(double %x, i32* noalias %y) { +entry: + br label %for.body + +for.body: + %i.02 = phi i32 [ 0, %entry ], [ %inc6, %for.end ] + br label %for.body3 + +; CHECK: for.body3: +; LNICM: call i32 @abs(i32 %i.02) +; LICM-NOT: call i32 @abs(i32 %i.02) +for.body3: + %j.01 = phi i32 [ 0, %for.body ], [ %inc, %for.body3 ] + %call = call double @sin(double %x) + %call4 = call i32 @abs(i32 %i.02) + %inc = add nsw i32 %j.01, 1 + %cmp2 = icmp slt i32 %inc, 10 + br i1 %cmp2, label %for.body3, label %for.end + +; CHECK: for.end: +; LICM: call i32 @abs(i32 %i.02) +; LNICM-NOT: call i32 @abs(i32 %i.02) +for.end: + %s.1.lcssa = phi i32 [ %call4, %for.body3 ] + %t.1.lcssa = phi double [ %call, %for.body3 ] + %idxprom = sext i32 %i.02 to i64 + %arrayidx = getelementptr inbounds i32, i32* %y, i64 %idxprom + store i32 %s.1.lcssa, i32* %arrayidx, align 4 + %inc6 = add nsw i32 %i.02, 1 + %cmp = icmp slt i32 %inc6, 10 + br i1 %cmp, label %for.body, label %for.end7 + +; CHECK: for.end7: +; CHECK: call double @sin(double %x) +for.end7: + %t.0.lcssa = phi double [ %t.1.lcssa, %for.end ] + ret double %t.0.lcssa +} + +declare dso_local double @sin(double) #0 + +declare dso_local i32 @abs(i32) #0 + +attributes #0 = { nounwind readnone willreturn }