Index: clang/lib/CodeGen/CodeGenModule.h =================================================================== --- clang/lib/CodeGen/CodeGenModule.h +++ clang/lib/CodeGen/CodeGenModule.h @@ -371,6 +371,14 @@ llvm::SmallVector, 8> GlobalValReplacements; + /// Potentially unused address space casts of global variables to be cleaned + /// up. In CUDA/HIP, global variables are emitted as global variables in + /// device or constant address space which are then casted to default address + /// space. If the global variables are not used, the address space casts + /// become invisible LLVM constants, causing spurious use of the global + /// variables which prevents them from being erased. + llvm::DenseSet GlobalVarCasts; + /// Variables for which we've emitted globals containing their constant /// values along with the corresponding globals, for opportunistic reuse. llvm::DenseMap InitializerConstants; Index: clang/lib/CodeGen/CodeGenModule.cpp =================================================================== --- clang/lib/CodeGen/CodeGenModule.cpp +++ clang/lib/CodeGen/CodeGenModule.cpp @@ -768,6 +768,11 @@ // that might affect the DLL storage class or the visibility, and // before anything that might act on these. setVisibilityFromDLLStorageClass(LangOpts, getModule()); + + // Remove unused address space casts of global variables. + for (auto *Cast : GlobalVarCasts) + if (Cast->use_empty()) + Cast->destroyConstant(); } void CodeGenModule::EmitOpenCLMetadata() { @@ -3938,9 +3943,13 @@ : (LangOpts.OpenCL ? LangAS::opencl_global : LangAS::Default); assert(getContext().getTargetAddressSpace(ExpectedAS) == Ty->getPointerAddressSpace()); - if (AddrSpace != ExpectedAS) - return getTargetCodeGenInfo().performAddrSpaceCast(*this, GV, AddrSpace, - ExpectedAS, Ty); + if (AddrSpace != ExpectedAS) { + auto *Cast = getTargetCodeGenInfo().performAddrSpaceCast( + *this, GV, AddrSpace, ExpectedAS, Ty); + // Record address space casts of global variables for cleaning up if unused. + GlobalVarCasts.insert(Cast); + return Cast; + } return GV; } Index: clang/test/CodeGenCUDA/unused-global-var.cu =================================================================== --- /dev/null +++ clang/test/CodeGenCUDA/unused-global-var.cu @@ -0,0 +1,50 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \ +// RUN: -std=c++11 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \ +// RUN: | FileCheck %s + +#include "Inputs/cuda.h" + +// AMDGPU internalize unused global variables for whole-program compilation +// (-fno-gpu-rdc for each TU, or -fgpu-rdc for LTO), which are then +// eliminated by global DCE. If there are invisible unused address space casts +// for global variables, the internalization and elimination of unused global +// variales will be hindered. This test makes sure no such address space +// casts. + +// Check unused device/constant variables are eliminated. + +// CHECK-NOT: @v1 +__device__ int v1; + +// CHECK-NOT: @v2 +__constant__ int v2; + +// CHECK-NOT: @_ZL2v3 +constexpr int v3 = 1; + +// Check managed variables are always kept. + +// CHECK: @v4 +__managed__ int v4; + +// Check used device/constant variables are not eliminated. +// CHECK: @u1 +__device__ int u1; + +// CHECK: @u2 +__constant__ int u2; + +// Check u3 is kept because its address is taken. +// CHECK: @_ZL2u3 +constexpr int u3 = 2; + +// Check u4 is not kept because it is not ODR-use. +// CHECK-NOT: @_ZL2u4 +constexpr int u4 = 3; + +__device__ int fun1(const int& x); + +__global__ void kern1(int *x) { + *x = u1 + u2 + fun1(u3) + u4; +}