Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -71,6 +71,7 @@ void initializeAliasDebuggerPass(PassRegistry&); void initializeAliasSetPrinterPass(PassRegistry&); void initializeAlwaysInlinerPass(PassRegistry&); +void initializePrepareEliminateCallsPass(PassRegistry&); void initializeArgPromotionPass(PassRegistry&); void initializeAtomicExpandPass(PassRegistry&); void initializeSampleProfileLoaderPass(PassRegistry&); Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -85,6 +85,7 @@ (void) llvm::createInstrProfilingPass(); (void) llvm::createFunctionInliningPass(); (void) llvm::createAlwaysInlinerPass(); + (void) llvm::createPrepareEliminateCallsPass(); (void) llvm::createGlobalDCEPass(); (void) llvm::createGlobalOptimizerPass(); (void) llvm::createGlobalsModRefPass(); Index: include/llvm/Transforms/IPO.h =================================================================== --- include/llvm/Transforms/IPO.h +++ include/llvm/Transforms/IPO.h @@ -103,6 +103,10 @@ Pass *createAlwaysInlinerPass(); Pass *createAlwaysInlinerPass(bool InsertLifetime); +/// Prepare to eliminate function calls by marking functions with alwyas_inline +/// and cloning functions thtat cannot be marked always_inline. +ModulePass *createPrepareEliminateCallsPass(); + //===----------------------------------------------------------------------===// /// createPruneEHPass - Return a new pass object which transforms invoke /// instructions into calls, if the callee can _not_ unwind the stack. Index: lib/Target/AMDGPU/AMDGPU.h =================================================================== --- lib/Target/AMDGPU/AMDGPU.h +++ lib/Target/AMDGPU/AMDGPU.h @@ -63,7 +63,6 @@ FunctionPass *createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST); Pass *createAMDGPUStructurizeCFGPass(); FunctionPass *createAMDGPUISelDag(TargetMachine &tm); -ModulePass *createAMDGPUAlwaysInlinePass(); void initializeSIFixControlFlowLiveIntervalsPass(PassRegistry&); extern char &SIFixControlFlowLiveIntervalsID; Index: lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp +++ /dev/null @@ -1,64 +0,0 @@ -//===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -/// \file -/// This pass marks all internal functions as always_inline and creates -/// duplicates of all other functions a marks the duplicates as always_inline. -// -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "llvm/IR/Module.h" -#include "llvm/Transforms/Utils/Cloning.h" - -using namespace llvm; - -namespace { - -class AMDGPUAlwaysInline : public ModulePass { - static char ID; - -public: - AMDGPUAlwaysInline() : ModulePass(ID) { } - bool runOnModule(Module &M) override; - const char *getPassName() const override { return "AMDGPU Always Inline Pass"; } -}; - -} // End anonymous namespace - -char AMDGPUAlwaysInline::ID = 0; - -bool AMDGPUAlwaysInline::runOnModule(Module &M) { - std::vector FuncsToClone; - - for (Function &F : M) { - if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty() && - !F.hasFnAttribute(Attribute::NoInline)) - FuncsToClone.push_back(&F); - } - - for (Function *F : FuncsToClone) { - ValueToValueMapTy VMap; - Function *NewFunc = CloneFunction(F, VMap, false); - NewFunc->setLinkage(GlobalValue::InternalLinkage); - M.getFunctionList().push_back(NewFunc); - F->replaceAllUsesWith(NewFunc); - } - - for (Function &F : M) { - if (F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::NoInline)) { - F.addFnAttr(Attribute::AlwaysInline); - } - } - return false; -} - -ModulePass *llvm::createAMDGPUAlwaysInlinePass() { - return new AMDGPUAlwaysInline(); -} Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -164,7 +164,7 @@ void AMDGPUPassConfig::addIRPasses() { // Function calls are not supported, so make sure we inline everything. - addPass(createAMDGPUAlwaysInlinePass()); + addPass(createPrepareEliminateCallsPass()); addPass(createAlwaysInlinerPass()); // We need to add the barrier noop pass, otherwise adding the function // inlining pass will cause all of the PassConfigs passes to be run Index: lib/Target/AMDGPU/CMakeLists.txt =================================================================== --- lib/Target/AMDGPU/CMakeLists.txt +++ lib/Target/AMDGPU/CMakeLists.txt @@ -14,7 +14,6 @@ add_llvm_target(AMDGPUCodeGen AMDILCFGStructurizer.cpp - AMDGPUAlwaysInlinePass.cpp AMDGPUAsmPrinter.cpp AMDGPUFrameLowering.cpp AMDGPUIntrinsicInfo.cpp Index: lib/Transforms/IPO/CMakeLists.txt =================================================================== --- lib/Transforms/IPO/CMakeLists.txt +++ lib/Transforms/IPO/CMakeLists.txt @@ -19,6 +19,7 @@ MergeFunctions.cpp PartialInlining.cpp PassManagerBuilder.cpp + PrepareEliminateCalls.cpp PruneEH.cpp StripDeadPrototypes.cpp StripSymbols.cpp Index: lib/Transforms/IPO/IPO.cpp =================================================================== --- lib/Transforms/IPO/IPO.cpp +++ lib/Transforms/IPO/IPO.cpp @@ -31,6 +31,7 @@ initializeGlobalOptPass(Registry); initializeIPCPPass(Registry); initializeAlwaysInlinerPass(Registry); + initializePrepareEliminateCallsPass(Registry); initializeSimpleInlinerPass(Registry); initializeInternalizePassPass(Registry); initializeLoopExtractorPass(Registry); Index: lib/Transforms/IPO/PrepareEliminateCalls.cpp =================================================================== --- /dev/null +++ lib/Transforms/IPO/PrepareEliminateCalls.cpp @@ -0,0 +1,75 @@ +//===-- PrepareEliminateCallsPass.cpp - Prepare to remove calls -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This pass marks all internal functions as always_inline and creates +/// duplicates of all other functions a marks the duplicates as always_inline. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" + +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/Cloning.h" + +#define DEBUG_TYPE "prepare-eliminate-calls" + +using namespace llvm; + +namespace { + +class PrepareEliminateCalls : public ModulePass { +public: + static char ID; + + PrepareEliminateCalls() : ModulePass(ID) { + initializePrepareEliminateCallsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override; + const char *getPassName() const override { + return "Prepare Eliminate Calls"; + } +}; + +} // End anonymous namespace + +char PrepareEliminateCalls::ID = 0; + +INITIALIZE_PASS(PrepareEliminateCalls, "prepare-eliminate-calls", + "Prepare Eliminate Calls", false, false) + +bool PrepareEliminateCalls::runOnModule(Module &M) { + std::vector FuncsToClone; + + for (Function &F : M) { + if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty() && + !F.hasFnAttribute(Attribute::NoInline)) + FuncsToClone.push_back(&F); + } + + for (Function *F : FuncsToClone) { + ValueToValueMapTy VMap; + Function *NewFunc = CloneFunction(F, VMap, false); + NewFunc->setLinkage(GlobalValue::InternalLinkage); + M.getFunctionList().push_back(NewFunc); + F->replaceAllUsesWith(NewFunc); + } + + for (Function &F : M) { + if (F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::NoInline)) { + F.addFnAttr(Attribute::AlwaysInline); + } + } + return false; +} + +ModulePass *llvm::createPrepareEliminateCallsPass() { + return new PrepareEliminateCalls(); +} Index: test/Transforms/PrepareEliminateCalls/prepare-eliminate-calls.ll =================================================================== --- /dev/null +++ test/Transforms/PrepareEliminateCalls/prepare-eliminate-calls.ll @@ -0,0 +1,40 @@ +; RUN: opt -S -prepare-eliminate-calls < %s | FileCheck %s + +; CHECK: define internal fastcc i32 @func(i32 %a) #0 { +define internal fastcc i32 @func(i32 %a) { +entry: + %tmp0 = add i32 %a, 1 + ret i32 %tmp0 +} + +; CHECK: define void @kernel(i32 addrspace(1)* %out) { +define void @kernel(i32 addrspace(1)* %out) { +entry: + %tmp0 = call i32 @func(i32 1) + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; CHECK: define void @kernel2(i32 addrspace(1)* %out) { +define void @kernel2(i32 addrspace(1)* %out) { +entry: + call void @kernel(i32 addrspace(1)* %out) + ret void +} + +; CHECK: define internal void @noinline(i32 addrspace(1)* %ptr) #1 { +define internal void @noinline(i32 addrspace(1)* %ptr) noinline { + store i32 123, i32 addrspace(1)* %ptr + ret void +} + +; CHECK: define void @kernel3(i32 addrspace(1)* %out) { +; CHECK: call void @noinline(i32 addrspace(1)* %out) +define void @kernel3(i32 addrspace(1)* %out) { +entry: + call void @noinline(i32 addrspace(1)* %out) + ret void +} + +; CHECK: attributes #0 = { alwaysinline } +; CHECK: attributes #1 = { noinline }