Index: lib/Transform/ScopInliner.cpp =================================================================== --- lib/Transform/ScopInliner.cpp +++ lib/Transform/ScopInliner.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/PassManager.h" #include "llvm/Passes/PassBuilder.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" +#include "llvm/Transforms/IPO/Inliner.h" #define DEBUG_TYPE "polly-scop-inliner" @@ -29,11 +30,15 @@ extern bool polly::PollyAllowFullFunction; namespace { -class ScopInliner : public CallGraphSCCPass { +class ScopInliner : public LegacyInlinerBase { +private: + std::map InlineCostCache; public: static char ID; - ScopInliner() : CallGraphSCCPass(ID) {} + ScopInliner() : LegacyInlinerBase(ID, /*InsertLifetime*/ true) { + // initializeAlwaysInlinerLegacyPassPass(*PassRegistry::getPassRegistry()); + } bool doInitialization(CallGraph &CG) override { if (!polly::PollyAllowFullFunction) { @@ -45,24 +50,26 @@ " enabled. " " If not, the entry block is not included in the Scop"); } - return true; + return LegacyInlinerBase::doInitialization(CG); + } + + bool doFinalization(CallGraph &CG) override { + InlineCostCache.clear(); + return LegacyInlinerBase::doFinalization(CG); } - bool runOnSCC(CallGraphSCC &SCC) override { - // We do not try to inline non-trivial SCCs because this would lead to - // "infinite" inlining if we are not careful. - if (SCC.size() > 1) - return false; - assert(SCC.size() == 1 && "found empty SCC"); - Function *F = (*SCC.begin())->getFunction(); - - // If the function is a nullptr, or the function is a declaration. - if (!F) - return false; - if (F->isDeclaration()) { - DEBUG(dbgs() << "Skipping " << F->getName() - << "because it is a declaration.\n"); - return false; + InlineCost getInlineCost(CallSite CS) override { + Function *F = CS.getCalledFunction(); + + if (!F || F->isDeclaration()) + return InlineCost::getNever(); + + DEBUG(dbgs() << "Scop inliner running on: " << F->getName() << " | "); + + std::map::iterator It; + if ((It = InlineCostCache.find(F)) != InlineCostCache.end()) { + DEBUG(dbgs() << "(cached) will inline? " << (bool)It->second << ".\n"); + return It->second; } PassBuilder PB; @@ -73,32 +80,47 @@ RegionInfo &RI = FAM.getResult(*F); ScopDetection &SD = FAM.getResult(*F); - const bool HasScopAsTopLevelRegion = - SD.ValidRegions.count(RI.getTopLevelRegion()) > 0; - - if (HasScopAsTopLevelRegion) { - DEBUG(dbgs() << "Skipping " << F->getName() - << " has scop as top level region"); - F->addFnAttr(llvm::Attribute::AlwaysInline); - - ModuleAnalysisManager MAM; - PB.registerModuleAnalyses(MAM); - ModulePassManager MPM; - MPM.addPass(AlwaysInlinerPass()); - Module *M = F->getParent(); - assert(M && "Function has illegal module"); - MPM.run(*M, MAM); - } else { - DEBUG(dbgs() << F->getName() - << " does NOT have scop as top level region\n"); + const auto TopLevelRegion = RI.getTopLevelRegion(); + + // Whether the entire function can be modeled as a Scop. + const bool IsFullyModeledAsScop = + SD.ValidRegions.count(TopLevelRegion) > 0; + + // Whether the scop contains all the children of the top-level region. + const bool IsModeledByTopLevelChildren = [&] { + for (auto ScopRegion : SD.ValidRegions) + if (ScopRegion->getParent() == TopLevelRegion) + return true; + return false; + }(); + + const InlineCost AnalyzedInlineCost = [&] { + if (IsFullyModeledAsScop || IsModeledByTopLevelChildren) + return InlineCost::getAlways(); + return InlineCost::getNever(); + }(); + + assert(InlineCostCache.find(F) == InlineCostCache.end() && + "Cached inlining analysis was not used."); + // Can't use InlineCostCache[F] = AnalyzedInlineCost because + // copy-ctor of InlineCost has been deleted. Joy. + InlineCostCache.insert(std::make_pair(F, AnalyzedInlineCost)); + DEBUG(dbgs() << "will inline? " << (bool)(AnalyzedInlineCost) << ".\n"); + + // If we decided to inline, then invalidate call site. + if (AnalyzedInlineCost) { + Function *Caller = CS.getCaller(); + assert(Caller && "Callsite has invalid caller"); + + InlineCostCache.erase(Caller); } - return false; - }; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - CallGraphSCCPass::getAnalysisUsage(AU); + return AnalyzedInlineCost; } + + // Do whatever alwaysinliner does. + bool runOnSCC(CallGraphSCC &SCC) override { return inlineCalls(SCC); } + }; } // namespace Index: test/ScopInliner/scop-from-entry-successor.ll =================================================================== --- /dev/null +++ test/ScopInliner/scop-from-entry-successor.ll @@ -0,0 +1,66 @@ +; RUN: opt %loadPolly -polly-detect-full-functions -polly-scop-inliner \ +; RUN: -polly-scops -analyze < %s | FileCheck %s + +; Check that we get the 2 nested loops by inlining `to_be_inlined` into +; `inline_site`, when the `to_be_inlined` has a `Scop` from the *successor* of +; the entry block to the exit node. + +; CHECK: Max Loop Depth: 2 + +; static const int N = 1000; +; +; void to_be_inlined(int A[]) { +; int B; // we do not allow dead code elimination on purpose to have an alloca. +; for(int i = 0; i < N; i++) +; A[i] *= 10; +; } +; +; void inline_site(int A[]) { +; for(int i = 0; i < N; i++) +; to_be_inlined(A); +; } + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.12.0" + + +define void @to_be_inlined(i32* %A) { +entry: + %alloc_to_block_modelling = alloca i32 + br label %entry.split + +entry.split: ; preds = %entry + br label %for.body + +for.body: ; preds = %entry.split, %for.body + %indvars.iv1 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv1 + %tmp = load i32, i32* %arrayidx, align 4 + %mul = mul nsw i32 %tmp, 10 + store i32 %mul, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +define void @inline_site(i32* %A) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %for.body + +for.body: ; preds = %entry.split, %for.body + %i.01 = phi i32 [ 0, %entry.split ], [ %inc, %for.body ] + tail call void @to_be_inlined(i32* %A) + %inc = add nuw nsw i32 %i.01, 1 + %exitcond = icmp eq i32 %inc, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} +