diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5107,9 +5107,9 @@ } // Enable -mconstructor-aliases except on darwin, where we have to work around - // a linker bug (see ), and CUDA/AMDGPU device code, - // where aliases aren't supported. - if (!RawTriple.isOSDarwin() && !RawTriple.isNVPTX() && !RawTriple.isAMDGPU()) + // a linker bug (see ), and CUDA device code, where + // aliases aren't supported. + if (!RawTriple.isOSDarwin() && !RawTriple.isNVPTX()) CmdArgs.push_back("-mconstructor-aliases"); // Darwin's kernel doesn't support guard variables; just die if we diff --git a/clang/test/CodeGenCUDA/amdgpu-alias-undef-symbols.cu b/clang/test/CodeGenCUDA/amdgpu-alias-undef-symbols.cu new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCUDA/amdgpu-alias-undef-symbols.cu @@ -0,0 +1,17 @@ +// REQUIRES: amdgpu-registered-target, clang-driver + +// RUN: %clang --offload-arch=gfx906 --cuda-device-only -x hip -emit-llvm -S -o - %s \ +// RUN: -fgpu-rdc -O3 -mllvm -amdgpu-early-inline-all=true -mllvm -amdgpu-function-calls=false | \ +// RUN: FileCheck %s + +#include "Inputs/cuda.h" + +// CHECK: %struct.B = type { i8 } +struct B { + + // CHECK: @_ZN1BC1Ei = hidden unnamed_addr alias void (%struct.B*, i32), void (%struct.B*, i32)* @_ZN1BC2Ei + __device__ B(int x); +}; + +__device__ B::B(int x) { +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp @@ -93,6 +93,8 @@ for (GlobalAlias &A : M.aliases()) { if (Function* F = dyn_cast(A.getAliasee())) { + if (A.getLinkage() != GlobalValue::InternalLinkage) + continue; A.replaceAllUsesWith(F); AliasesToRemove.push_back(&A); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -913,14 +913,19 @@ if (Info.Callee.isReg()) { CallInst.addReg(Info.Callee.getReg()); CallInst.addImm(0); - } else if (Info.Callee.isGlobal() && Info.Callee.getOffset() == 0) { - // The call lowering lightly assumed we can directly encode a call target in - // the instruction, which is not the case. Materialize the address here. + } else if (Info.Callee.isGlobal()) { const GlobalValue *GV = Info.Callee.getGlobal(); - auto Ptr = MIRBuilder.buildGlobalValue( - LLT::pointer(GV->getAddressSpace(), 64), GV); - CallInst.addReg(Ptr.getReg(0)); - CallInst.add(Info.Callee); + if (!isa(GV)) + return false; + if (Info.Callee.getOffset() == 0) { + // The call lowering lightly assumed we can directly encode a call target + // in the instruction, which is not the case. Materialize the address + // here. + auto Ptr = MIRBuilder.buildGlobalValue( + LLT::pointer(GV->getAddressSpace(), 64), GV); + CallInst.addReg(Ptr.getReg(0)); + CallInst.add(Info.Callee); + } } else return false; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3007,6 +3007,13 @@ bool IsSibCall = false; bool IsThisReturn = false; MachineFunction &MF = DAG.getMachineFunction(); + GlobalAddressSDNode *GSD = dyn_cast(Callee); + + if (GSD) { + const GlobalValue *GV = GSD->getGlobal(); + if (!isa(GV)) + return lowerUnhandledCall(CLI, InVals, "callee is not a function "); + } if (Callee.isUndef() || isNullConstant(Callee)) { if (!CLI.IsTailCall) { @@ -3264,7 +3271,7 @@ Ops.push_back(Callee); // Add a redundant copy of the callee global which will not be legalized, as // we need direct access to the callee later. - if (GlobalAddressSDNode *GSD = dyn_cast(Callee)) { + if (GSD) { const GlobalValue *GV = GSD->getGlobal(); Ops.push_back(DAG.getTargetGlobalAddress(GV, DL, MVT::i64)); } else { diff --git a/llvm/test/CodeGen/AMDGPU/inline-calls.ll b/llvm/test/CodeGen/AMDGPU/inline-calls.ll --- a/llvm/test/CodeGen/AMDGPU/inline-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/inline-calls.ll @@ -1,6 +1,4 @@ ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s ; ALL-NOT: {{^}}func: define internal i32 @func(i32 %a) { @@ -18,8 +16,8 @@ ret void } -; CHECK-NOT: func_alias -; ALL-NOT: func_alias +; CHECK: func_alias +; ALL: func_alias @func_alias = alias i32 (i32), i32 (i32)* @func ; ALL: {{^}}kernel3: