diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -5107,9 +5107,9 @@
   }
 
   // Enable -mconstructor-aliases except on darwin, where we have to work around
-  // a linker bug (see <rdar://problem/7651567>), and CUDA/AMDGPU device code,
-  // where aliases aren't supported.
-  if (!RawTriple.isOSDarwin() && !RawTriple.isNVPTX() && !RawTriple.isAMDGPU())
+  // a linker bug (see <rdar://problem/7651567>), and CUDA device code, where
+  // aliases aren't supported.
+  if (!RawTriple.isOSDarwin() && !RawTriple.isNVPTX())
     CmdArgs.push_back("-mconstructor-aliases");
 
   // Darwin's kernel doesn't support guard variables; just die if we
diff --git a/clang/test/CodeGenCUDA/amdgpu-alias-undef-symbols.cu b/clang/test/CodeGenCUDA/amdgpu-alias-undef-symbols.cu
new file mode 100644
--- /dev/null
+++ b/clang/test/CodeGenCUDA/amdgpu-alias-undef-symbols.cu
@@ -0,0 +1,17 @@
+// REQUIRES: amdgpu-registered-target, clang-driver
+
+// RUN: %clang --offload-arch=gfx906 --cuda-device-only -x hip -emit-llvm -S -o - %s \
+// RUN:   -fgpu-rdc -O3 -mllvm -amdgpu-early-inline-all=true -mllvm -amdgpu-function-calls=false | \
+// RUN:   FileCheck %s
+
+#include "Inputs/cuda.h"
+
+// CHECK: %struct.B = type { i8 }
+struct B {
+
+  // CHECK: @_ZN1BC1Ei = hidden unnamed_addr alias void (%struct.B*, i32), void (%struct.B*, i32)* @_ZN1BC2Ei
+  __device__ B(int x);
+};
+
+__device__ B::B(int x) {
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
--- a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
@@ -93,6 +93,8 @@
 
   for (GlobalAlias &A : M.aliases()) {
     if (Function* F = dyn_cast<Function>(A.getAliasee())) {
+      if (A.getLinkage() != GlobalValue::InternalLinkage)
+        continue;
       A.replaceAllUsesWith(F);
       AliasesToRemove.push_back(&A);
     }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -913,14 +913,19 @@
   if (Info.Callee.isReg()) {
     CallInst.addReg(Info.Callee.getReg());
     CallInst.addImm(0);
-  } else if (Info.Callee.isGlobal() && Info.Callee.getOffset() == 0) {
-    // The call lowering lightly assumed we can directly encode a call target in
-    // the instruction, which is not the case. Materialize the address here.
+  } else if (Info.Callee.isGlobal()) {
     const GlobalValue *GV = Info.Callee.getGlobal();
-    auto Ptr = MIRBuilder.buildGlobalValue(
-      LLT::pointer(GV->getAddressSpace(), 64), GV);
-    CallInst.addReg(Ptr.getReg(0));
-    CallInst.add(Info.Callee);
+    if (!isa<Function>(GV))
+      return false;
+    if (Info.Callee.getOffset() == 0) {
+      // The call lowering lightly assumed we can directly encode a call target
+      // in the instruction, which is not the case. Materialize the address
+      // here.
+      auto Ptr = MIRBuilder.buildGlobalValue(
+          LLT::pointer(GV->getAddressSpace(), 64), GV);
+      CallInst.addReg(Ptr.getReg(0));
+      CallInst.add(Info.Callee);
+    }
   } else
     return false;
 
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3007,6 +3007,13 @@
   bool IsSibCall = false;
   bool IsThisReturn = false;
   MachineFunction &MF = DAG.getMachineFunction();
+  GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(Callee);
+
+  if (GSD) {
+    const GlobalValue *GV = GSD->getGlobal();
+    if (!isa<Function>(GV))
+      return lowerUnhandledCall(CLI, InVals, "callee is not a function ");
+  }
 
   if (Callee.isUndef() || isNullConstant(Callee)) {
     if (!CLI.IsTailCall) {
@@ -3264,7 +3271,7 @@
   Ops.push_back(Callee);
   // Add a redundant copy of the callee global which will not be legalized, as
   // we need direct access to the callee later.
-  if (GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(Callee)) {
+  if (GSD) {
     const GlobalValue *GV = GSD->getGlobal();
     Ops.push_back(DAG.getTargetGlobalAddress(GV, DL, MVT::i64));
   } else {
diff --git a/llvm/test/CodeGen/AMDGPU/inline-calls.ll b/llvm/test/CodeGen/AMDGPU/inline-calls.ll
--- a/llvm/test/CodeGen/AMDGPU/inline-calls.ll
+++ b/llvm/test/CodeGen/AMDGPU/inline-calls.ll
@@ -1,6 +1,4 @@
 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck  %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck  %s
-; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s
 
 ; ALL-NOT: {{^}}func:
 define internal i32 @func(i32 %a) {
@@ -18,8 +16,8 @@
   ret void
 }
 
-; CHECK-NOT: func_alias
-; ALL-NOT: func_alias
+; CHECK: func_alias
+; ALL: func_alias
 @func_alias = alias i32 (i32), i32 (i32)* @func
 
 ; ALL: {{^}}kernel3: