Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -319,16 +319,36 @@ }); } +/// Predicate for Internalize pass. +bool mustPreserveGV(const GlobalValue &GV) { + if (const Function *F = dyn_cast(&GV)) { + if (F->isDeclaration() || AMDGPU::isEntryFunctionCC(F->getCallingConv())) + return true; + } + + return !GV.use_empty(); +} + void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) { Builder.DivergentTarget = true; bool EnableOpt = getOptLevel() > CodeGenOpt::None; - bool Internalize = InternalizeSymbols && EnableOpt && - (getTargetTriple().getArch() == Triple::amdgcn); + bool Internalize = InternalizeSymbols; bool EarlyInline = EarlyInlineAll && EnableOpt; bool AMDGPUAA = EnableAMDGPUAliasAnalysis && EnableOpt; bool LibCallSimplify = EnableLibCallSimplify && EnableOpt; + if (Internalize) { + // If we're generating code, we always have the whole program available. The + // relocations expected for externally visible functions aren't supported, + // so make sure every non-entry function is hidden. + Builder.addExtension( + PassManagerBuilder::EP_EnabledOnOptLevel0, + [](const PassManagerBuilder &, legacy::PassManagerBase &PM) { + PM.add(createInternalizePass(mustPreserveGV)); + }); + } + Builder.addExtension( PassManagerBuilder::EP_ModuleOptimizerEarly, [Internalize, EarlyInline, AMDGPUAA](const PassManagerBuilder &, @@ -339,25 +359,7 @@ } PM.add(createAMDGPUUnifyMetadataPass()); if (Internalize) { - PM.add(createInternalizePass([=](const GlobalValue &GV) -> bool { - if (const Function *F = dyn_cast(&GV)) { - if (F->isDeclaration()) - return true; - switch (F->getCallingConv()) { - default: - return false; - case CallingConv::AMDGPU_VS: - case CallingConv::AMDGPU_HS: - case CallingConv::AMDGPU_GS: - case CallingConv::AMDGPU_PS: - case CallingConv::AMDGPU_CS: - case CallingConv::AMDGPU_KERNEL: - case CallingConv::SPIR_KERNEL: - return true; - } - } - return !GV.use_empty(); - })); + PM.add(createInternalizePass(mustPreserveGV)); PM.add(createGlobalDCEPass()); } if (EarlyInline) Index: test/CodeGen/AMDGPU/internalize.ll =================================================================== --- test/CodeGen/AMDGPU/internalize.ll +++ test/CodeGen/AMDGPU/internalize.ll @@ -1,35 +1,46 @@ -; RUN: opt -O1 -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-internalize-symbols < %s | FileCheck %s -; CHECK-NOT: unused -; CHECK-NOT: foo_used -; CHECK: gvar_used -; CHECK: main_kernel +; RUN: opt -O1 -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-internalize-symbols < %s | FileCheck -check-prefix=ALL -check-prefix=OPT %s +; RUN: opt -O0 -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-internalize-symbols < %s | FileCheck -check-prefix=ALL -check-prefix=OPTNONE %s + +; OPT-NOT: gvar_unused +; OPTNONE: gvar_unused @gvar_unused = addrspace(1) global i32 undef, align 4 + +; CHECK: gvar_used @gvar_used = addrspace(1) global i32 undef, align 4 -; Function Attrs: alwaysinline nounwind +; CHECK: define internal void @func_used( +define void @func_used(i32 addrspace(1)* %out, i32 %tid) #2 { +entry: + store i32 %tid, i32 addrspace(1)* %out + ret void +} + +; CHECK: define amdgpu_kernel void @foo_unused( define amdgpu_kernel void @foo_unused(i32 addrspace(1)* %out) local_unnamed_addr #1 { entry: store i32 1, i32 addrspace(1)* %out ret void } -; Function Attrs: alwaysinline nounwind +; CHECK: define amdgpu_kernel void @foo_used( define amdgpu_kernel void @foo_used(i32 addrspace(1)* %out, i32 %tid) local_unnamed_addr #1 { entry: store i32 %tid, i32 addrspace(1)* %out ret void } +; CHECK: define amdgpu_kernel void @main_kernel() define amdgpu_kernel void @main_kernel() { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() - tail call void @foo_used(i32 addrspace(1)* @gvar_used, i32 %tid) nounwind + tail call void @foo_used(i32 addrspace(1)* @gvar_used, i32 %tid) + tail call void @func_used(i32 addrspace(1)* @gvar_used, i32 %tid) ret void } declare i32 @llvm.amdgcn.workitem.id.x() #0 attributes #0 = { nounwind readnone } - attributes #1 = { alwaysinline nounwind } +attributes #2 = { noinline nounwind }