Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -95,6 +95,13 @@ cl::init(false), cl::Hidden); +// Option to inline all early. +static cl::opt EarlyInlineAll( + "amdgpu-early-inline-all", + cl::desc("Inline all functions early"), + cl::init(false), + cl::Hidden); + static cl::opt EnableSDWAPeephole( "amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), @@ -273,12 +280,14 @@ bool Internalize = InternalizeSymbols && (getOptLevel() > CodeGenOpt::None) && (getTargetTriple().getArch() == Triple::amdgcn); + bool EarlyInline = EarlyInlineAll && + (getOptLevel() > CodeGenOpt::None); bool AMDGPUAA = EnableAMDGPUAliasAnalysis && getOptLevel() > CodeGenOpt::None; Builder.addExtension( PassManagerBuilder::EP_ModuleOptimizerEarly, - [Internalize, AMDGPUAA](const PassManagerBuilder &, - legacy::PassManagerBase &PM) { + [Internalize, EarlyInline, AMDGPUAA](const PassManagerBuilder &, + legacy::PassManagerBase &PM) { if (AMDGPUAA) { PM.add(createAMDGPUAAWrapperPass()); PM.add(createAMDGPUExternalAAWrapperPass()); @@ -304,8 +313,9 @@ return !GV.use_empty(); })); PM.add(createGlobalDCEPass()); - PM.add(createAMDGPUAlwaysInlinePass()); } + if (EarlyInline) + PM.add(createAMDGPUAlwaysInlinePass()); }); Builder.addExtension( Index: test/CodeGen/AMDGPU/early-inline.ll =================================================================== --- test/CodeGen/AMDGPU/early-inline.ll +++ test/CodeGen/AMDGPU/early-inline.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=amdgcn-- -O1 -S -inline-threshold=1 -amdgpu-internalize-symbols %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-- -O1 -S -inline-threshold=1 -amdgpu-early-inline-all %s | FileCheck %s define i32 @callee(i32 %x) { entry: