Index: include/polly/LinkAllPasses.h =================================================================== --- include/polly/LinkAllPasses.h +++ include/polly/LinkAllPasses.h @@ -32,6 +32,7 @@ namespace polly { llvm::Pass *createCodePreparationPass(); +llvm::Pass *createScopInlinerPass(); llvm::Pass *createDeadCodeElimPass(); llvm::Pass *createDependenceInfoPass(); llvm::Pass *createDependenceInfoWrapperPassPass(); @@ -108,6 +109,7 @@ namespace llvm { class PassRegistry; void initializeCodePreparationPass(llvm::PassRegistry &); +void initializeScopInlinerPass(llvm::PassRegistry &); void initializeDeadCodeElimPass(llvm::PassRegistry &); void initializeJSONExporterPass(llvm::PassRegistry &); void initializeJSONImporterPass(llvm::PassRegistry &); Index: include/polly/ScopDetection.h =================================================================== --- include/polly/ScopDetection.h +++ include/polly/ScopDetection.h @@ -113,6 +113,7 @@ extern bool PollyProcessUnprofitable; extern bool PollyInvariantLoadHoisting; extern bool PollyAllowUnsignedOperations; +extern bool PollyAllowFullFunction; /// A function attribute which will cause Polly to skip the function extern llvm::StringRef PollySkipFnAttr; Index: lib/Analysis/ScopDetection.cpp =================================================================== --- lib/Analysis/ScopDetection.cpp +++ lib/Analysis/ScopDetection.cpp @@ -1,4 +1,3 @@ -//===----- ScopDetection.cpp - Detect Scops --------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -107,9 +106,11 @@ "ANY of the regexes provided."), cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); -static cl::opt - AllowFullFunction("polly-detect-full-functions", +bool polly::PollyAllowFullFunction; +static cl::opt + XAllowFullFunction("polly-detect-full-functions", cl::desc("Allow the detection of full functions"), + cl::location(polly::PollyAllowFullFunction), cl::init(false), cl::cat(PollyCategory)); static cl::opt OnlyRegion( @@ -1541,7 +1542,7 @@ DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t"); - if (!AllowFullFunction && CurRegion.isTopLevelRegion()) { + if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) { DEBUG(dbgs() << "Top level region is invalid\n"); return false; } @@ -1564,7 +1565,7 @@ // SCoP cannot contain the entry block of the function, because we need // to insert alloca instruction there when translate scalar to array. - if (!AllowFullFunction && + if (!PollyAllowFullFunction && CurRegion.getEntry() == &(CurRegion.getEntry()->getParent()->getEntryBlock())) return invalid(Context, /*Assert=*/true, CurRegion.getEntry()); Index: lib/CMakeLists.txt =================================================================== --- lib/CMakeLists.txt +++ lib/CMakeLists.txt @@ -65,6 +65,7 @@ Transform/Simplify.cpp Transform/MaximalStaticExpansion.cpp Transform/RewriteByReferenceParameters.cpp + Transform/ScopInliner.cpp ${POLLY_HEADER_FILES} ) set_target_properties(PollyCore PROPERTIES FOLDER "Polly") Index: lib/Support/RegisterPasses.cpp =================================================================== --- lib/Support/RegisterPasses.cpp +++ lib/Support/RegisterPasses.cpp @@ -264,6 +264,7 @@ initializePollyCanonicalizePass(Registry); initializePolyhedralInfoPass(Registry); initializeScopDetectionWrapperPassPass(Registry); + initializeScopInlinerPass(Registry); initializeScopInfoRegionPassPass(Registry); initializeScopInfoWrapperPassPass(Registry); initializeRewriteByrefParamsPass(Registry); Index: lib/Transform/ScopInliner.cpp =================================================================== --- /dev/null +++ lib/Transform/ScopInliner.cpp @@ -0,0 +1,113 @@ +//===---- ScopInliner.cpp - Polyhedral based inliner ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +/// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Take a SCC and: +// 1. If it has more than one component, bail out (contains cycles) +// 2. If it has just one component, and if the function is entirely a scop, +// inline it. +// +//===----------------------------------------------------------------------===// + + +#define DEBUG_TYPE "polly-scop-inliner" + +#include "llvm/IR/LLVMContext.h" +#include "polly/ScopDetection.h" +#include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Passes/PassBuilder.h" +#include "polly/RegisterPasses.h" +#include "polly/LinkAllPasses.h" +#include "llvm/Transforms/IPO/AlwaysInliner.h" + +using namespace polly; +extern bool polly::PollyAllowFullFunction; + +namespace { +class ScopInliner : public CallGraphSCCPass { +public: + static char ID; + + ScopInliner() : CallGraphSCCPass(ID) { } + + bool doInitialization(CallGraph &CG) override { + if (!polly::PollyAllowFullFunction) { + report_fatal_error("Aborting from ScopInliner because it only makes sense " + "to run this with -polly-allow-full-function. The heurtistic " + "for ScopInliner checks that the full function is a Scop, which only happens if " + "polly-allow-full-function is enabled. If not, the entry block is not included in the Scop"); + + } + return true; + } + + bool runOnSCC(CallGraphSCC &SCC) override { + PassBuilder PB; + FunctionAnalysisManager FAM; + FAM.registerPass([] { return ScopAnalysis(); }); + PB.registerFunctionAnalyses(FAM); + + // We do not try to inline non-trivial SCCs because this would lead to + // "infinite" inlining if we are not careful. + if (SCC.size() > 1) return false; + assert(SCC.size() == 1 && "found empty SCC"); + Function *F = (*SCC.begin())->getFunction(); + + // If the function is a nullptr, or the function is a declaration. + if (!F) return false; + if (F->isDeclaration()) { + DEBUG(dbgs() << "Skipping " << F->getName() << "because it is a declaration.\n"); + } + + RegionInfo &RI = FAM.getResult(*F); + ScopDetection &SD = FAM.getResult(*F); + + const bool HasScopAsTopLevelRegion = SD.ValidRegions.count(RI.getTopLevelRegion()) > 0; + + if (HasScopAsTopLevelRegion) { + F->addFnAttr(llvm::Attribute::AlwaysInline); + + ModuleAnalysisManager MAM; + PB.registerModuleAnalyses(MAM); + ModulePassManager MPM; + MPM.addPass(AlwaysInlinerPass()); + Module *M = F->getParent(); + assert(M && "Function has illegal module"); + MPM.run(*M, MAM); + } + + DEBUG(dbgs() << F->getName() << " has scop as top level region: " << HasScopAsTopLevelRegion << "\n"); + return false; + + }; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + CallGraphSCCPass::getAnalysisUsage(AU); + + } + +}; + +} // namespace +char ScopInliner::ID; + +Pass *polly::createScopInlinerPass() { + ScopInliner *pass = new ScopInliner(); + return pass; +} + +INITIALIZE_PASS_BEGIN( + ScopInliner, "polly-scop-inliner", + "inline functions based on how much of the function is a scop.", + false, false) +INITIALIZE_PASS_END( + ScopInliner, "polly-scop-inliner", + "inline functions based on how much of the function is a scop.", + false, false) + Index: test/ScopInliner/inline-fn-call-in-loop.ll =================================================================== --- /dev/null +++ test/ScopInliner/inline-fn-call-in-loop.ll @@ -0,0 +1,59 @@ +; RUN: opt %loadPolly -polly-detect-full-functions -polly-scop-inliner \ +; RUN: -polly-scops -analyze < %s | FileCheck %s + +; CHECK: Max Loop Depth: 2 + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.12.0" + +define void @to_be_inlined(i32* %A) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %for.body + +for.body: ; preds = %entry.split, %for.body + %indvars.iv1 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv1 + %tmp = load i32, i32* %arrayidx, align 4, !tbaa !3 + %mul = mul nsw i32 %tmp, 10 + store i32 %mul, i32* %arrayidx, align 4, !tbaa !3 + %indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +define void @inline_site(i32* %A) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %for.body + +for.body: ; preds = %entry.split, %for.body + %i.01 = phi i32 [ 0, %entry.split ], [ %inc, %for.body ] + tail call void @to_be_inlined(i32* %A) + %inc = add nuw nsw i32 %i.01, 1 + %exitcond = icmp eq i32 %inc, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +attributes #0 = { argmemonly nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 6.0.0 (http://llvm.org/git/clang.git 6660f0d30ef23b3142a6b08f9f41aad3d47c084f) (http://llvm.org/git/llvm.git 052dd78cb30f77a05dc8bb06b851402c4b6c6587)"} +!3 = !{!4, !4, i64 0} +!4 = !{!"int", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"}