Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -94,6 +94,13 @@ cl::init(false), cl::Hidden); +// Option to enable early inlining. +static cl::opt EnableEarlyInline( + "amdgpu-early-inline", + cl::desc("Enable early inlining"), + cl::init(false), + cl::Hidden); + // Enable address space based alias analysis static cl::opt EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), @@ -228,9 +235,13 @@ bool Internalize = InternalizeSymbols && (getOptLevel() > CodeGenOpt::None) && (getTargetTriple().getArch() == Triple::amdgcn); + bool EarlyInline = EnableEarlyInline && + (getOptLevel() > CodeGenOpt::None) && + (getTargetTriple().getArch() == Triple::amdgcn); Builder.addExtension( PassManagerBuilder::EP_ModuleOptimizerEarly, - [Internalize](const PassManagerBuilder &, legacy::PassManagerBase &PM) { + [Internalize, EarlyInline](const PassManagerBuilder &, + legacy::PassManagerBase &PM) { PM.add(createAMDGPUUnifyMetadataPass()); if (Internalize) { PM.add(createInternalizePass([=](const GlobalValue &GV) -> bool { @@ -253,6 +264,9 @@ })); PM.add(createGlobalDCEPass()); } + + if (EarlyInline) + PM.add(createAMDGPUAlwaysInlinePass()); }); } Index: test/CodeGen/AMDGPU/early-inline-alias.ll =================================================================== --- test/CodeGen/AMDGPU/early-inline-alias.ll +++ test/CodeGen/AMDGPU/early-inline-alias.ll @@ -0,0 +1,12 @@ +; RUN: opt -mtriple=amdgcn-- -O1 -S -inline-threshold=1 %s | FileCheck %s + +; CHECK: @add1alias = alias i32 (i32), i32 (i32)* @add1 +; CHECK: @add1alias2 = alias i32 (i32), i32 (i32)* @add1 + +@add1alias = alias i32 (i32), i32 (i32)* @add1 +@add1alias2 = alias i32 (i32), i32 (i32)* @add1 + +define i32 @add1(i32) { + %2 = add nsw i32 %0, 1 + ret i32 %2 +} Index: test/CodeGen/AMDGPU/early-inline.ll =================================================================== --- test/CodeGen/AMDGPU/early-inline.ll +++ test/CodeGen/AMDGPU/early-inline.ll @@ -0,0 +1,21 @@ +; RUN: opt -mtriple=amdgcn-- -O1 -S -inline-threshold=1 -amdgpu-early-inline %s | FileCheck %s + +define i32 @callee(i32 %x) { +entry: + %mul1 = mul i32 %x, %x + %mul2 = mul i32 %mul1, %x + %mul3 = mul i32 %mul1, %mul2 + %mul4 = mul i32 %mul3, %mul2 + %mul5 = mul i32 %mul4, %mul3 + ret i32 %mul5 +} + +; CHECK-LABEL: @caller +; CHECK: mul i32 +; CHECK-NOT: call i32 + +define i32 @caller(i32 %x) { +entry: + %res = call i32 @callee(i32 %x) + ret i32 %res +}