Index: polly/trunk/include/polly/LinkAllPasses.h =================================================================== --- polly/trunk/include/polly/LinkAllPasses.h +++ polly/trunk/include/polly/LinkAllPasses.h @@ -32,6 +32,7 @@ namespace polly { llvm::Pass *createCodePreparationPass(); +llvm::Pass *createScopInlinerPass(); llvm::Pass *createDeadCodeElimPass(); llvm::Pass *createDependenceInfoPass(); llvm::Pass *createDependenceInfoWrapperPassPass(); @@ -108,6 +109,7 @@ namespace llvm { class PassRegistry; void initializeCodePreparationPass(llvm::PassRegistry &); +void initializeScopInlinerPass(llvm::PassRegistry &); void initializeDeadCodeElimPass(llvm::PassRegistry &); void initializeJSONExporterPass(llvm::PassRegistry &); void initializeJSONImporterPass(llvm::PassRegistry &); Index: polly/trunk/include/polly/ScopDetection.h =================================================================== --- polly/trunk/include/polly/ScopDetection.h +++ polly/trunk/include/polly/ScopDetection.h @@ -113,6 +113,7 @@ extern bool PollyProcessUnprofitable; extern bool PollyInvariantLoadHoisting; extern bool PollyAllowUnsignedOperations; +extern bool PollyAllowFullFunction; /// A function attribute which will cause Polly to skip the function extern llvm::StringRef PollySkipFnAttr; Index: polly/trunk/lib/Analysis/ScopDetection.cpp =================================================================== --- polly/trunk/lib/Analysis/ScopDetection.cpp +++ polly/trunk/lib/Analysis/ScopDetection.cpp @@ -1,4 +1,3 @@ -//===----- ScopDetection.cpp - Detect Scops --------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -107,10 +106,12 @@ "ANY of the regexes provided."), cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); -static cl::opt - AllowFullFunction("polly-detect-full-functions", - cl::desc("Allow the detection of full functions"), - cl::init(false), cl::cat(PollyCategory)); +bool polly::PollyAllowFullFunction; +static cl::opt + XAllowFullFunction("polly-detect-full-functions", + cl::desc("Allow the detection of full functions"), + cl::location(polly::PollyAllowFullFunction), + cl::init(false), cl::cat(PollyCategory)); static cl::opt OnlyRegion( "polly-only-region", @@ -1541,7 +1542,7 @@ DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t"); - if (!AllowFullFunction && CurRegion.isTopLevelRegion()) { + if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) { DEBUG(dbgs() << "Top level region is invalid\n"); return false; } @@ -1564,7 +1565,7 @@ // SCoP cannot contain the entry block of the function, because we need // to insert alloca instruction there when translate scalar to array. - if (!AllowFullFunction && + if (!PollyAllowFullFunction && CurRegion.getEntry() == &(CurRegion.getEntry()->getParent()->getEntryBlock())) return invalid(Context, /*Assert=*/true, CurRegion.getEntry()); Index: polly/trunk/lib/CMakeLists.txt =================================================================== --- polly/trunk/lib/CMakeLists.txt +++ polly/trunk/lib/CMakeLists.txt @@ -65,6 +65,7 @@ Transform/Simplify.cpp Transform/MaximalStaticExpansion.cpp Transform/RewriteByReferenceParameters.cpp + Transform/ScopInliner.cpp ${POLLY_HEADER_FILES} ) set_target_properties(PollyCore PROPERTIES FOLDER "Polly") Index: polly/trunk/lib/Support/RegisterPasses.cpp =================================================================== --- polly/trunk/lib/Support/RegisterPasses.cpp +++ polly/trunk/lib/Support/RegisterPasses.cpp @@ -264,6 +264,7 @@ initializePollyCanonicalizePass(Registry); initializePolyhedralInfoPass(Registry); initializeScopDetectionWrapperPassPass(Registry); + initializeScopInlinerPass(Registry); initializeScopInfoRegionPassPass(Registry); initializeScopInfoWrapperPassPass(Registry); initializeRewriteByrefParamsPass(Registry); Index: polly/trunk/lib/Transform/ScopInliner.cpp =================================================================== --- polly/trunk/lib/Transform/ScopInliner.cpp +++ polly/trunk/lib/Transform/ScopInliner.cpp @@ -0,0 +1,119 @@ +//===---- ScopInliner.cpp - Polyhedral based inliner ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +/// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Take a SCC and: +// 1. If it has more than one component, bail out (contains cycles) +// 2. If it has just one component, and if the function is entirely a scop, +// inline it. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "polly-scop-inliner" + +#include "polly/LinkAllPasses.h" +#include "polly/RegisterPasses.h" +#include "polly/ScopDetection.h" +#include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Transforms/IPO/AlwaysInliner.h" + +using namespace polly; +extern bool polly::PollyAllowFullFunction; + +namespace { +class ScopInliner : public CallGraphSCCPass { +public: + static char ID; + + ScopInliner() : CallGraphSCCPass(ID) {} + + bool doInitialization(CallGraph &CG) override { + if (!polly::PollyAllowFullFunction) { + report_fatal_error( + "Aborting from ScopInliner because it only makes sense to run with " + "-polly-allow-full-function. " + "The heurtistic for ScopInliner checks that the full function is a " + "Scop, which happens if and only if polly-allow-full-function is " + " enabled. " + " If not, the entry block is not included in the Scop"); + } + return true; + } + + bool runOnSCC(CallGraphSCC &SCC) override { + // We do not try to inline non-trivial SCCs because this would lead to + // "infinite" inlining if we are not careful. + if (SCC.size() > 1) + return false; + assert(SCC.size() == 1 && "found empty SCC"); + Function *F = (*SCC.begin())->getFunction(); + + // If the function is a nullptr, or the function is a declaration. + if (!F) + return false; + if (F->isDeclaration()) { + DEBUG(dbgs() << "Skipping " << F->getName() + << "because it is a declaration.\n"); + return false; + } + + PassBuilder PB; + FunctionAnalysisManager FAM; + FAM.registerPass([] { return ScopAnalysis(); }); + PB.registerFunctionAnalyses(FAM); + + RegionInfo &RI = FAM.getResult(*F); + ScopDetection &SD = FAM.getResult(*F); + + const bool HasScopAsTopLevelRegion = + SD.ValidRegions.count(RI.getTopLevelRegion()) > 0; + + if (HasScopAsTopLevelRegion) { + DEBUG(dbgs() << "Skipping " << F->getName() + << " has scop as top level region"); + F->addFnAttr(llvm::Attribute::AlwaysInline); + + ModuleAnalysisManager MAM; + PB.registerModuleAnalyses(MAM); + ModulePassManager MPM; + MPM.addPass(AlwaysInlinerPass()); + Module *M = F->getParent(); + assert(M && "Function has illegal module"); + MPM.run(*M, MAM); + } else { + DEBUG(dbgs() << F->getName() + << " does NOT have scop as top level region\n"); + } + + return false; + }; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + CallGraphSCCPass::getAnalysisUsage(AU); + } +}; + +} // namespace +char ScopInliner::ID; + +Pass *polly::createScopInlinerPass() { + ScopInliner *pass = new ScopInliner(); + return pass; +} + +INITIALIZE_PASS_BEGIN( + ScopInliner, "polly-scop-inliner", + "inline functions based on how much of the function is a scop.", false, + false) +INITIALIZE_PASS_END( + ScopInliner, "polly-scop-inliner", + "inline functions based on how much of the function is a scop.", false, + false) Index: polly/trunk/test/ScopInliner/ignore-declares.ll =================================================================== --- polly/trunk/test/ScopInliner/ignore-declares.ll +++ polly/trunk/test/ScopInliner/ignore-declares.ll @@ -0,0 +1,8 @@ +; RUN: opt %loadPolly -polly-detect-full-functions -polly-scop-inliner \ +; RUN: -polly-scops -analyze < %s + +; Check that we do not crash if there are declares. We should skip function +; declarations and not try to query for domtree. + +declare void @foo() + Index: polly/trunk/test/ScopInliner/invariant-load-func.ll =================================================================== --- polly/trunk/test/ScopInliner/invariant-load-func.ll +++ polly/trunk/test/ScopInliner/invariant-load-func.ll @@ -0,0 +1,76 @@ +; RUN: opt %loadPolly -polly-detect-full-functions -polly-scop-inliner \ +; RUN: -polly-scops -analyze -polly-invariant-load-hoisting < %s | FileCheck %s + +; Check that we inline a function that requires invariant load hoisting +; correctly. +; CHECK: Max Loop Depth: 2 + +; REQUIRES: pollyacc + + +; void to_be_inlined(int A[], int *begin, int *end) { +; for(int i = *begin; i < *end; i++) { +; A[i] = 10; +; } +; } +; +; static const int N = 1000; +; +; void inline_site(int A[], int *begin, int *end) { +; for(int i = 0; i < N; i++) +; to_be_inlined(A); +; } + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.12.0" + +define void @to_be_inlined(i32* %A, i32* %begin, i32* %end) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + %tmp = load i32, i32* %begin, align 4 + %tmp21 = load i32, i32* %end, align 4 + %cmp3 = icmp slt i32 %tmp, %tmp21 + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry.split + %tmp1 = sext i32 %tmp to i64 + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %indvars.iv4 = phi i64 [ %tmp1, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv4 + store i32 10, i32* %arrayidx, align 4 + %indvars.iv.next = add i64 %indvars.iv4, 1 + %tmp2 = load i32, i32* %end, align 4 + %tmp3 = sext i32 %tmp2 to i64 + %cmp = icmp slt i64 %indvars.iv.next, %tmp3 + br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry.split + ret void +} + + +define void @inline_site(i32* %A, i32* %begin, i32 *%end) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %for.body + +for.body: ; preds = %entry.split, %for.body + %i.01 = phi i32 [ 0, %entry.split ], [ %inc, %for.body ] + tail call void @to_be_inlined(i32* %A, i32* %begin, i32* %end) + %inc = add nuw nsw i32 %i.01, 1 + %exitcond = icmp eq i32 %inc, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + Index: polly/trunk/test/ScopInliner/simple-inline-loop.ll =================================================================== --- polly/trunk/test/ScopInliner/simple-inline-loop.ll +++ polly/trunk/test/ScopInliner/simple-inline-loop.ll @@ -0,0 +1,62 @@ +; RUN: opt %loadPolly -polly-detect-full-functions -polly-scop-inliner \ +; RUN: -polly-scops -analyze < %s | FileCheck %s + +; Check that we get the 2 nested loops by inlining `to_be_inlined` into +; `inline_site`. +; CHECK: Max Loop Depth: 2 + +; static const int N = 1000; +; +; void to_be_inlined(int A[]) { +; for(int i = 0; i < N; i++) +; A[i] *= 10; +; } +; +; void inline_site(int A[]) { +; for(int i = 0; i < N; i++) +; to_be_inlined(A); +; } + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.12.0" + + +define void @to_be_inlined(i32* %A) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %for.body + +for.body: ; preds = %entry.split, %for.body + %indvars.iv1 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv1 + %tmp = load i32, i32* %arrayidx, align 4 + %mul = mul nsw i32 %tmp, 10 + store i32 %mul, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +define void @inline_site(i32* %A) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %for.body + +for.body: ; preds = %entry.split, %for.body + %i.01 = phi i32 [ 0, %entry.split ], [ %inc, %for.body ] + tail call void @to_be_inlined(i32* %A) + %inc = add nuw nsw i32 %i.01, 1 + %exitcond = icmp eq i32 %inc, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} +