Index: include/polly/LinkAllPasses.h =================================================================== --- include/polly/LinkAllPasses.h +++ include/polly/LinkAllPasses.h @@ -32,6 +32,7 @@ namespace polly { llvm::Pass *createCodePreparationPass(); +llvm::Pass *createScopInlinerPass(); llvm::Pass *createDeadCodeElimPass(); llvm::Pass *createDependenceInfoPass(); llvm::Pass *createDependenceInfoWrapperPassPass(); @@ -108,6 +109,7 @@ namespace llvm { class PassRegistry; void initializeCodePreparationPass(llvm::PassRegistry &); +void initializeScopInlinerPass(llvm::PassRegistry &); void initializeDeadCodeElimPass(llvm::PassRegistry &); void initializeJSONExporterPass(llvm::PassRegistry &); void initializeJSONImporterPass(llvm::PassRegistry &); Index: lib/CMakeLists.txt =================================================================== --- lib/CMakeLists.txt +++ lib/CMakeLists.txt @@ -65,6 +65,7 @@ Transform/Simplify.cpp Transform/MaximalStaticExpansion.cpp Transform/RewriteByReferenceParameters.cpp + Transform/ScopInliner.cpp ${POLLY_HEADER_FILES} ) set_target_properties(PollyCore PROPERTIES FOLDER "Polly") Index: lib/Support/RegisterPasses.cpp =================================================================== --- lib/Support/RegisterPasses.cpp +++ lib/Support/RegisterPasses.cpp @@ -264,6 +264,7 @@ initializePollyCanonicalizePass(Registry); initializePolyhedralInfoPass(Registry); initializeScopDetectionWrapperPassPass(Registry); + initializeScopInlinerPass(Registry); initializeScopInfoRegionPassPass(Registry); initializeScopInfoWrapperPassPass(Registry); initializeRewriteByrefParamsPass(Registry); Index: lib/Transform/ScopInliner.cpp =================================================================== --- /dev/null +++ lib/Transform/ScopInliner.cpp @@ -0,0 +1,126 @@ +//===---- ScopInliner.cpp - Polyhedral based inliner ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +/// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Take a SCC and: +// 1. If it has more than one component, bail out (contains cycles) +// 2. If it has just one component, and if the function is entirely a scop, +// inline it. +// +//===----------------------------------------------------------------------===// + +#include "polly/CodeGen/CodeGeneration.h" +#include "polly/CodeGen/IslAst.h" +#include "polly/CodeGen/IslNodeBuilder.h" +#include "polly/CodeGen/PPCGCodeGeneration.h" +#include "polly/CodeGen/Utils.h" +#include "polly/DependenceInfo.h" +#include "polly/LinkAllPasses.h" +#include "polly/Options.h" +#include "polly/ScopDetection.h" +#include "polly/ScopInfo.h" +#include "polly/Support/SCEVValidator.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Verifier.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Linker/Linker.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Transforms/IPO/AlwaysInliner.h" + +#define DEBUG_TYPE "polly-scop-inliner" +namespace { +class ScopInliner : public CallGraphSCCPass { +public: + static char ID; + + + ScopInliner() : CallGraphSCCPass(ID) { + } + /// runOnSCC - This method should be implemented by the subclass to perform + /// whatever action is necessary for the specified SCC. Note that + /// non-recursive (or only self-recursive) functions will have an SCC size of + /// 1, where recursive portions of the call graph will have SCC size > 1. + /// + /// SCC passes that add or delete functions to the SCC are required to update + /// the SCC list, otherwise stale pointers may be dereferenced. + /// + bool runOnSCC(CallGraphSCC &SCC) override { + PassBuilder PB; + FunctionAnalysisManager FAM; + FAM.registerPass([] { return ScopAnalysis(); }); + PB.registerFunctionAnalyses(FAM); + + // We do not try to inline non-trivial SCCs because this would lead to + // "infinite" inlining if we are not careful. + if (SCC.size() > 1) return false; + assert(SCC.size() == 1 && "found empty SCC"); + Function *F = (*SCC.begin())->getFunction(); + + if (!F || F->isDeclaration()) return false; + RegionInfo &RI = FAM.getResult(*F); + ScopDetection &SD = FAM.getResult(*F); + + const bool HasScopAsTopLevelRegion = SD.ValidRegions.count(RI.getTopLevelRegion()) > 0; + + if (HasScopAsTopLevelRegion) { + F->addFnAttr(llvm::Attribute::AlwaysInline); + + ModuleAnalysisManager MAM; + PB.registerModuleAnalyses(MAM); + ModulePassManager MPM; + MPM.addPass(AlwaysInlinerPass()); + Module *M = F->getParent(); + assert(M && "Function has illegal module"); + MPM.run(*M, MAM); + } + + + + errs() << "-" << F->getName() << " | " << HasScopAsTopLevelRegion << "\n"; + return false; + + }; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + CallGraphSCCPass::getAnalysisUsage(AU); + + } + +}; + +} // namespace +char ScopInliner::ID; + +Pass *polly::createScopInlinerPass() { + ScopInliner *pass = new ScopInliner(); + return pass; +} + +INITIALIZE_PASS_BEGIN( + ScopInliner, "polly-scop-inliner", + "inline functions based on how much of the function is a scop.", + false, false) +INITIALIZE_PASS_END( + ScopInliner, "polly-scop-inliner", + "inline functions based on how much of the function is a scop.", + false, false) + Index: test/ScopInliner/inline-fn-call-in-loop.ll =================================================================== --- /dev/null +++ test/ScopInliner/inline-fn-call-in-loop.ll @@ -0,0 +1,59 @@ +; RUN: opt %loadPolly -polly-detect-full-functions -polly-scop-inliner \ +; RUN: -polly-scops -analyze < %s | FileCheck %s + +; CHECK: Max Loop Depth: 2 + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.12.0" + +define void @to_be_inlined(i32* %A) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %for.body + +for.body: ; preds = %entry.split, %for.body + %indvars.iv1 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv1 + %tmp = load i32, i32* %arrayidx, align 4, !tbaa !3 + %mul = mul nsw i32 %tmp, 10 + store i32 %mul, i32* %arrayidx, align 4, !tbaa !3 + %indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +define void @inline_site(i32* %A) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %for.body + +for.body: ; preds = %entry.split, %for.body + %i.01 = phi i32 [ 0, %entry.split ], [ %inc, %for.body ] + tail call void @to_be_inlined(i32* %A) + %inc = add nuw nsw i32 %i.01, 1 + %exitcond = icmp eq i32 %inc, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +attributes #0 = { argmemonly nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 6.0.0 (http://llvm.org/git/clang.git 6660f0d30ef23b3142a6b08f9f41aad3d47c084f) (http://llvm.org/git/llvm.git 052dd78cb30f77a05dc8bb06b851402c4b6c6587)"} +!3 = !{!4, !4, i64 0} +!4 = !{!"int", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"}