diff --git a/llvm/include/llvm/Analysis/CodeMetrics.h b/llvm/include/llvm/Analysis/CodeMetrics.h --- a/llvm/include/llvm/Analysis/CodeMetrics.h +++ b/llvm/include/llvm/Analysis/CodeMetrics.h @@ -75,7 +75,8 @@ /// Add information about a block to the current state. void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, - const SmallPtrSetImpl &EphValues); + const SmallPtrSetImpl &EphValues, + bool PrepareForLTO = false); /// Collect a loop's ephemeral values (those used only by an assume /// or similar intrinsics in the loop). diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -210,7 +210,7 @@ // // LoopRotate - This pass is a simple loop rotating pass. // -Pass *createLoopRotatePass(int MaxHeaderSize = -1); +Pass *createLoopRotatePass(int MaxHeaderSize = -1, bool PrepareForLTO = false); //===----------------------------------------------------------------------===// // diff --git a/llvm/include/llvm/Transforms/Scalar/LoopRotation.h b/llvm/include/llvm/Transforms/Scalar/LoopRotation.h --- a/llvm/include/llvm/Transforms/Scalar/LoopRotation.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopRotation.h @@ -22,12 +22,14 @@ /// A simple loop rotation transformation. class LoopRotatePass : public PassInfoMixin { public: - LoopRotatePass(bool EnableHeaderDuplication = true); + LoopRotatePass(bool EnableHeaderDuplication = true, + bool PrepareForLTO = false); PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); private: const bool EnableHeaderDuplication; + const bool PrepareForLTO; }; } diff --git a/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h b/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h --- a/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h @@ -33,7 +33,8 @@ bool LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC, DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, const SimplifyQuery &SQ, - bool RotationOnly, unsigned Threshold, bool IsUtilMode); + bool RotationOnly, unsigned Threshold, bool IsUtilMode, + bool PrepareForLTO = false); } // namespace llvm diff --git a/llvm/lib/Analysis/CodeMetrics.cpp b/llvm/lib/Analysis/CodeMetrics.cpp --- a/llvm/lib/Analysis/CodeMetrics.cpp +++ b/llvm/lib/Analysis/CodeMetrics.cpp @@ -112,9 +112,9 @@ /// Fill in the current structure with information gleaned from the specified /// block. -void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, - const TargetTransformInfo &TTI, - const SmallPtrSetImpl &EphValues) { +void CodeMetrics::analyzeBasicBlock( + const BasicBlock *BB, const TargetTransformInfo &TTI, + const SmallPtrSetImpl &EphValues, bool PrepareForLTO) { ++NumBlocks; unsigned NumInstsBeforeThisBB = NumInsts; for (const Instruction &I : *BB) { @@ -128,8 +128,12 @@ // If a function is both internal and has a single use, then it is // extremely likely to get inlined in the future (it was probably // exposed by an interleaved devirtualization pass). - if (!Call->isNoInline() && F->hasInternalLinkage() && F->hasOneUse()) + // When preparing for LTO, liberally consider calls as inline + // candidates. + if (!Call->isNoInline() && + ((F->hasInternalLinkage() && F->hasOneUse()) || PrepareForLTO)) { ++NumInlineCandidates; + } // If this call is to function itself, then the function is recursive. // Inlining it into other functions is a bad idea, because this is diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1230,7 +1230,8 @@ // First rotate loops that may have been un-rotated by prior passes. // Disable header duplication at -Oz. OptimizePM.addPass(createFunctionToLoopPassAdaptor( - LoopRotatePass(Level != OptimizationLevel::Oz), EnableMSSALoopDependency, + LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink), + EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/false, DebugLogging)); // Distribute loops to allow partial vectorization. I.e. isolate dependences diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -433,7 +433,7 @@ MPM.add(createLoopSimplifyCFGPass()); } // Rotate Loop - disable header duplication at -Oz - MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO)); // TODO: Investigate promotion cap for O1. MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); if (EnableSimpleLoopUnswitch) @@ -777,7 +777,7 @@ // Re-rotate loops in all our loop nests. These may have fallout out of // rotated form due to GVN or other transformations, and the vectorizer relies // on the rotated form. Disable header duplication at -Oz. - MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO)); // Distribute loops to allow partial vectorization. I.e. isolate dependences // into separate loop that would otherwise inhibit vectorization. This is diff --git a/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/llvm/lib/Transforms/Scalar/LoopRotation.cpp --- a/llvm/lib/Transforms/Scalar/LoopRotation.cpp +++ b/llvm/lib/Transforms/Scalar/LoopRotation.cpp @@ -34,8 +34,14 @@ "rotation-max-header-size", cl::init(16), cl::Hidden, cl::desc("The default maximum header size for automatic loop rotation")); -LoopRotatePass::LoopRotatePass(bool EnableHeaderDuplication) - : EnableHeaderDuplication(EnableHeaderDuplication) {} +static cl::opt PrepareForLTOOption( + "rotation-prepare-for-lto", cl::init(false), cl::Hidden, + cl::desc("Run loop-rotation in the prepare-for-lto stage. This option " + "should be used for testing only.")); + +LoopRotatePass::LoopRotatePass(bool EnableHeaderDuplication, bool PrepareForLTO) + : EnableHeaderDuplication(EnableHeaderDuplication), + PrepareForLTO(PrepareForLTO) {} PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, @@ -53,9 +59,10 @@ Optional MSSAU; if (AR.MSSA) MSSAU = MemorySSAUpdater(AR.MSSA); - bool Changed = LoopRotation(&L, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE, - MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, - SQ, false, Threshold, false); + bool Changed = + LoopRotation(&L, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE, + MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, SQ, false, + Threshold, false, PrepareForLTO || PrepareForLTOOption); if (!Changed) return PreservedAnalyses::all(); @@ -73,10 +80,13 @@ class LoopRotateLegacyPass : public LoopPass { unsigned MaxHeaderSize; + bool PrepareForLTO; public: static char ID; // Pass ID, replacement for typeid - LoopRotateLegacyPass(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) { + LoopRotateLegacyPass(int SpecifiedMaxHeaderSize = -1, + bool PrepareForLTO = false) + : LoopPass(ID), PrepareForLTO(PrepareForLTO) { initializeLoopRotateLegacyPassPass(*PassRegistry::getPassRegistry()); if (SpecifiedMaxHeaderSize == -1) MaxHeaderSize = DefaultRotationThreshold; @@ -121,7 +131,8 @@ return LoopRotation(L, LI, TTI, AC, &DT, &SE, MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, SQ, - false, Threshold, false); + false, Threshold, false, + PrepareForLTO || PrepareForLTOOption); } }; } // end namespace @@ -136,6 +147,6 @@ INITIALIZE_PASS_END(LoopRotateLegacyPass, "loop-rotate", "Rotate Loops", false, false) -Pass *llvm::createLoopRotatePass(int MaxHeaderSize) { - return new LoopRotateLegacyPass(MaxHeaderSize); +Pass *llvm::createLoopRotatePass(int MaxHeaderSize, bool PrepareForLTO) { + return new LoopRotateLegacyPass(MaxHeaderSize, PrepareForLTO); } diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp --- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -65,15 +65,17 @@ const SimplifyQuery &SQ; bool RotationOnly; bool IsUtilMode; + bool PrepareForLTO; public: LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC, DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, - const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode) + const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode, + bool PrepareForLTO) : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE), MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly), - IsUtilMode(IsUtilMode) {} + IsUtilMode(IsUtilMode), PrepareForLTO(PrepareForLTO) {} bool processLoop(Loop *L); private: @@ -301,7 +303,7 @@ CodeMetrics::collectEphemeralValues(L, AC, EphValues); CodeMetrics Metrics; - Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues); + Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues, PrepareForLTO); if (Metrics.notDuplicatable) { LLVM_DEBUG( dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable" @@ -324,6 +326,11 @@ ++NumNotRotatedDueToHeaderSize; return Rotated; } + + // When preparing for LTO, avoid rotating loops with calls that could be + // inlined during the LTO stage. + if (PrepareForLTO && Metrics.NumInlineCandidates > 0) + return Rotated; } // Now, this loop is suitable for rotation. @@ -745,8 +752,8 @@ ScalarEvolution *SE, MemorySSAUpdater *MSSAU, const SimplifyQuery &SQ, bool RotationOnly = true, unsigned Threshold = unsigned(-1), - bool IsUtilMode = true) { + bool IsUtilMode = true, bool PrepareForLTO) { LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly, - IsUtilMode); + IsUtilMode, PrepareForLTO); return LR.processLoop(L); } diff --git a/llvm/test/Transforms/LoopRotate/call-prepare-for-lto.ll b/llvm/test/Transforms/LoopRotate/call-prepare-for-lto.ll --- a/llvm/test/Transforms/LoopRotate/call-prepare-for-lto.ll +++ b/llvm/test/Transforms/LoopRotate/call-prepare-for-lto.ll @@ -1,5 +1,7 @@ ; RUN: opt -S -loop-rotate < %s | FileCheck --check-prefix=FULL %s +; RUN: opt -S -loop-rotate -rotation-prepare-for-lto < %s | FileCheck --check-prefix=PREPARE %s ; RUN: opt -S -passes='require,require,loop(loop-rotate)' < %s | FileCheck --check-prefix=FULL %s +; RUN: opt -S -passes='require,require,loop(loop-rotate)' -rotation-prepare-for-lto < %s | FileCheck --check-prefix=PREPARE %s ; Test case to make sure loop-rotate avoids rotating during the prepare-for-lto ; stage, when the header contains a call which may be inlined during the LTO stage. @@ -11,6 +13,11 @@ ; FULL-NEXT: call void @may_be_inlined() ; FULL-NEXT: br label %for.body ; +; PREPARE-LABEL: @test_prepare_for_lto( +; PREPARE-NEXT: entry: +; PREPARE-NEXT: %array = alloca [20 x i32], align 16 +; PREPARE-NEXT: br label %for.cond +; entry: %array = alloca [20 x i32], align 16 br label %for.cond