Index: lib/AST/ItaniumMangle.cpp =================================================================== --- lib/AST/ItaniumMangle.cpp +++ lib/AST/ItaniumMangle.cpp @@ -484,6 +484,14 @@ if (!Context.shouldMangleDeclName(FD)) return; + // CUDA __host__ __device__ functions co-exist with both __host__ and + // __device__ functions, so they need a different mangled name. We sort + // "device", "host", and "enable_if" attrs alphabetically. + bool IsCudaHostDevice = + FD->hasAttr() && FD->hasAttr(); + if (IsCudaHostDevice) + Out << "Ua6device"; + if (FD->hasAttr()) { FunctionTypeDepthState Saved = FunctionTypeDepth.push(); Out << "Ua9enable_ifI"; @@ -503,6 +511,9 @@ FunctionTypeDepth.pop(Saved); } + if (IsCudaHostDevice) + Out << "Ua4host"; + // Whether the mangling of a function type includes the return type depends on // the context and the nature of the function. The rules for deciding whether // the return type is included are: Index: test/CodeGenCUDA/convergent.cu =================================================================== --- test/CodeGenCUDA/convergent.cu +++ test/CodeGenCUDA/convergent.cu @@ -17,23 +17,23 @@ // HOST: Function Attrs: // HOST-NOT: convergent -// HOST-NEXT: define void @_Z3barv +// HOST-NEXT: define void @_Z3barUa6deviceUa4hostv // DEVICE: Function Attrs: // DEVICE-SAME: convergent -// DEVICE-NEXT: define void @_Z3barv +// DEVICE-NEXT: define void @_Z3barUa6deviceUa4hostv __host__ __device__ void baz(); __host__ __device__ void bar() { - // DEVICE: call void @_Z3bazv() [[CALL_ATTR:#[0-9]+]] + // DEVICE: call void @_Z3bazUa6deviceUa4hostv() [[CALL_ATTR:#[0-9]+]] baz(); } -// DEVICE: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]] +// DEVICE: declare void @_Z3bazUa6deviceUa4hostv() [[BAZ_ATTR:#[0-9]+]] // DEVICE: attributes [[BAZ_ATTR]] = { // DEVICE-SAME: convergent // DEVICE-SAME: } // DEVICE: attributes [[CALL_ATTR]] = { convergent } -// HOST: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]] +// HOST: declare void @_Z3bazUa6deviceUa4hostv() [[BAZ_ATTR:#[0-9]+]] // HOST: attributes [[BAZ_ATTR]] = { // HOST-NOT: convergent // NOST-SAME: } Index: test/CodeGenCUDA/device-var-init.cu =================================================================== --- test/CodeGenCUDA/device-var-init.cu +++ test/CodeGenCUDA/device-var-init.cu @@ -375,14 +375,14 @@ // CHECK: call void @_ZN2UCC1Ev(%struct.UC* %uc) // CHECK: call void @_ZN3ECIC1Ev(%struct.ECI* %eci) // CHECK: call void @_ZN3NECC1Ev(%struct.NEC* %nec) -// CHECK: call void @_ZN3NCVC1Ev(%struct.NCV* %ncv) -// CHECK: call void @_ZN3NCFC1Ev(%struct.NCF* %ncf) -// CHECK: call void @_ZN4NCFSC1Ev(%struct.NCFS* %ncfs) +// CHECK: call void @_ZN3NCVC1EUa6deviceUa4hostv(%struct.NCV* %ncv) +// CHECK: call void @_ZN3NCFC1EUa6deviceUa4hostv(%struct.NCF* %ncf) +// CHECK: call void @_ZN4NCFSC1EUa6deviceUa4hostv(%struct.NCFS* %ncfs) // CHECK: call void @_ZN3UTCC1IJEEEDpT_(%struct.UTC* %utc) // CHECK: call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC* %netc) // CHECK: call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC* %ec_i_ec) // CHECK: call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1* %ec_i_ec1) -// CHECK: call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %t_v_t) +// CHECK: call void @_ZN5T_V_TC1EUa6deviceUa4hostv(%struct.T_V_T* %t_v_t) // CHECK: call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* %t_b_nec) // CHECK: call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* %t_f_nec) // CHECK: call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* %t_fa_nec) Index: test/CodeGenCUDA/function-overload.cu =================================================================== --- test/CodeGenCUDA/function-overload.cu +++ test/CodeGenCUDA/function-overload.cu @@ -35,9 +35,9 @@ s_cd_dh scddh; // CHECK-BOTH: call void @_ZN7s_cd_dhC1Ev( s_cd_hd scdhd; - // CHECK-BOTH: call void @_ZN7s_cd_hdC1Ev + // CHECK-BOTH: call void @_ZN7s_cd_hdC1EUa6deviceUa4hostv - // CHECK-BOTH: call void @_ZN7s_cd_hdD1Ev( + // CHECK-BOTH: call void @_ZN7s_cd_hdD1EUa6deviceUa4hostv( // CHECK-BOTH: call void @_ZN7s_cd_dhD1Ev( } // CHECK-BOTH: ret void @@ -49,11 +49,11 @@ // CHECK-DEVICE: store i32 12, // CHECK-BOTH: ret void -// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdC2Ev( +// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdC2EUa6deviceUa4hostv( // CHECK-BOTH: store i32 31, // CHECK-BOTH: ret void -// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdD2Ev( +// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdD2EUa6deviceUa4hostv( // CHECK-BOTH: store i32 32, // CHECK-BOTH: ret void Index: test/CodeGenCUDA/mangling.cu =================================================================== --- /dev/null +++ test/CodeGenCUDA/mangling.cu @@ -0,0 +1,20 @@ +// REQUIRES: x86-registered-target +// REQUIRES: nvptx-registered-target + +// RUN: %clang_cc1 -triple nvptx-nvidia-cuda -fcuda-is-device -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s + +#include "Inputs/cuda.h" + +// Check that __host__ __device__ function mangled names explicitly contain +// "host" and "device" attributes. This is important because HD overloads may +// coexist with H and D overloads. + +// CHECK: define i32 @_Z11host_deviceUa6deviceUa4hostv() +__host__ __device__ int host_device() { return 0; } + +// The enable_if attribute should appear in-between the device and host attrs +// in the mangled name. +// CHECK: define i32 @_Z8enableifUa6deviceUa9enable_if{{.*}}Ua4hostv +__attribute__((enable_if(1, ""))) +__host__ __device__ int enableif() { return 0; }