Index: cfe/trunk/lib/CodeGen/CGDecl.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGDecl.cpp +++ cfe/trunk/lib/CodeGen/CGDecl.cpp @@ -229,12 +229,14 @@ LangAS AS = GetGlobalVarAddressSpace(&D); unsigned TargetAS = getContext().getTargetAddressSpace(AS); - // Local address space cannot have an initializer. + // OpenCL variables in local address space and CUDA shared + // variables cannot have an initializer. llvm::Constant *Init = nullptr; - if (Ty.getAddressSpace() != LangAS::opencl_local) - Init = EmitNullConstant(Ty); - else + if (Ty.getAddressSpace() == LangAS::opencl_local || + D.hasAttr()) Init = llvm::UndefValue::get(LTy); + else + Init = EmitNullConstant(Ty); llvm::GlobalVariable *GV = new llvm::GlobalVariable( getModule(), LTy, Ty.isConstant(getContext()), Linkage, Init, Name, Index: cfe/trunk/test/CodeGenCUDA/address-spaces.cu =================================================================== --- cfe/trunk/test/CodeGenCUDA/address-spaces.cu +++ cfe/trunk/test/CodeGenCUDA/address-spaces.cu @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple nvptx-unknown-unknown | FileCheck %s +// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple amdgcn | FileCheck %s // Verifies Clang emits correct address spaces and addrspacecast instructions // for CUDA code. @@ -19,11 +20,11 @@ int data2; }; -// CHECK: @_ZZ5func0vE1a = internal addrspace(3) global %struct.MyStruct zeroinitializer -// CHECK: @_ZZ5func1vE1a = internal addrspace(3) global float 0.000000e+00 -// CHECK: @_ZZ5func2vE1a = internal addrspace(3) global [256 x float] zeroinitializer -// CHECK: @_ZZ5func3vE1a = internal addrspace(3) global float 0.000000e+00 -// CHECK: @_ZZ5func4vE1a = internal addrspace(3) global float 0.000000e+00 +// CHECK: @_ZZ5func0vE1a = internal addrspace(3) global %struct.MyStruct undef +// CHECK: @_ZZ5func1vE1a = internal addrspace(3) global float undef +// CHECK: @_ZZ5func2vE1a = internal addrspace(3) global [256 x float] undef +// CHECK: @_ZZ5func3vE1a = internal addrspace(3) global float undef +// CHECK: @_ZZ5func4vE1a = internal addrspace(3) global float undef // CHECK: @b = addrspace(3) global float undef __device__ void foo() { @@ -48,7 +49,7 @@ ap->data2 = 2; } // CHECK: define void @_Z5func0v() -// CHECK: store %struct.MyStruct* addrspacecast (%struct.MyStruct addrspace(3)* @_ZZ5func0vE1a to %struct.MyStruct*), %struct.MyStruct** %ap +// CHECK: store %struct.MyStruct* addrspacecast (%struct.MyStruct addrspace(3)* @_ZZ5func0vE1a to %struct.MyStruct*), %struct.MyStruct** %{{.*}} __device__ void callee(float *ap) { *ap = 1.0f; @@ -67,7 +68,7 @@ *ap = 1.0f; } // CHECK: define void @_Z5func2v() -// CHECK: store float* getelementptr inbounds ([256 x float], [256 x float]* addrspacecast ([256 x float] addrspace(3)* @_ZZ5func2vE1a to [256 x float]*), i32 0, i32 128), float** %ap +// CHECK: store float* getelementptr inbounds ([256 x float], [256 x float]* addrspacecast ([256 x float] addrspace(3)* @_ZZ5func2vE1a to [256 x float]*), i{{32|64}} 0, i{{32|64}} 128), float** %{{.*}} __device__ void func3() { __shared__ float a; @@ -75,7 +76,7 @@ *ap = 1.0f; } // CHECK: define void @_Z5func3v() -// CHECK: store float* addrspacecast (float addrspace(3)* @_ZZ5func3vE1a to float*), float** %ap +// CHECK: store float* addrspacecast (float addrspace(3)* @_ZZ5func3vE1a to float*), float** %{{.*}} __device__ void func4() { __shared__ float a; @@ -83,7 +84,7 @@ *ap = 1.0f; } // CHECK: define void @_Z5func4v() -// CHECK: store float* addrspacecast (float addrspace(3)* @_ZZ5func4vE1a to float*), float** %ap +// CHECK: store float* addrspacecast (float addrspace(3)* @_ZZ5func4vE1a to float*), float** %{{.*}} __shared__ float b; Index: cfe/trunk/test/CodeGenCUDA/device-var-init.cu =================================================================== --- cfe/trunk/test/CodeGenCUDA/device-var-init.cu +++ cfe/trunk/test/CodeGenCUDA/device-var-init.cu @@ -1,10 +1,14 @@ // REQUIRES: nvptx-registered-target +// REQUIRES: amdgpu-registered-target // Make sure we don't allow dynamic initialization for device // variables, but accept empty constructors allowed by CUDA. // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -std=c++11 \ -// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck %s +// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,NVPTX %s + +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -std=c++11 \ +// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,AMDGCN %s #ifdef __clang__ #include "Inputs/cuda.h" @@ -105,50 +109,102 @@ __constant__ EC_I_EC c_ec_i_ec; // CHECK: @c_ec_i_ec = addrspace(4) externally_initialized global %struct.EC_I_EC zeroinitializer, +// CHECK: @_ZZ2dfvE4s_ec = internal addrspace(3) global %struct.EC undef +// CHECK: @_ZZ2dfvE5s_etc = internal addrspace(3) global %struct.ETC undef + // We should not emit global initializers for device-side variables. // CHECK-NOT: @__cxx_global_var_init // Make sure that initialization restrictions do not apply to local // variables. __device__ void df() { + // NVPTX: %[[ec:.*]] = alloca %struct.EC + // NVPTX: %[[ed:.*]] = alloca %struct.ED + // NVPTX: %[[ecd:.*]] = alloca %struct.ECD + // NVPTX: %[[etc:.*]] = alloca %struct.ETC + // NVPTX: %[[uc:.*]] = alloca %struct.UC + // NVPTX: %[[ud:.*]] = alloca %struct.UD + // NVPTX: %[[eci:.*]] = alloca %struct.ECI + // NVPTX: %[[nec:.*]] = alloca %struct.NEC + // NVPTX: %[[ned:.*]] = alloca %struct.NED + // NVPTX: %[[ncv:.*]] = alloca %struct.NCV + // NVPTX: %[[vd:.*]] = alloca %struct.VD + // NVPTX: %[[ncf:.*]] = alloca %struct.NCF + // NVPTX: %[[ncfs:.*]] = alloca %struct.NCFS + // NVPTX: %[[utc:.*]] = alloca %struct.UTC + // NVPTX: %[[netc:.*]] = alloca %struct.NETC + // NVPTX: %[[ec_i_ec:.*]] = alloca %struct.EC_I_EC + // NVPTX: %[[ec_i_ec1:.*]] = alloca %struct.EC_I_EC1 + // NVPTX: %[[t_v_t:.*]] = alloca %struct.T_V_T + // NVPTX: %[[t_b_nec:.*]] = alloca %struct.T_B_NEC + // NVPTX: %[[t_f_nec:.*]] = alloca %struct.T_F_NEC + // NVPTX: %[[t_fa_nec:.*]] = alloca %struct.T_FA_NEC + // NVPTX: %[[t_b_ned:.*]] = alloca %struct.T_B_NED + // NVPTX: %[[t_f_ned:.*]] = alloca %struct.T_F_NED + // NVPTX: %[[t_fa_ned:.*]] = alloca %struct.T_FA_NED + // AMDGCN: %[[ec:.*]] = addrspacecast %struct.EC addrspace(5)* %ec to %struct.EC* + // AMDGCN: %[[ed:.*]] = addrspacecast %struct.ED addrspace(5)* %ed to %struct.ED* + // AMDGCN: %[[ecd:.*]] = addrspacecast %struct.ECD addrspace(5)* %ecd to %struct.ECD* + // AMDGCN: %[[etc:.*]] = addrspacecast %struct.ETC addrspace(5)* %etc to %struct.ETC* + // AMDGCN: %[[uc:.*]] = addrspacecast %struct.UC addrspace(5)* %uc to %struct.UC* + // AMDGCN: %[[ud:.*]] = addrspacecast %struct.UD addrspace(5)* %ud to %struct.UD* + // AMDGCN: %[[eci:.*]] = addrspacecast %struct.ECI addrspace(5)* %eci to %struct.ECI* + // AMDGCN: %[[nec:.*]] = addrspacecast %struct.NEC addrspace(5)* %nec to %struct.NEC* + // AMDGCN: %[[ned:.*]] = addrspacecast %struct.NED addrspace(5)* %ned to %struct.NED* + // AMDGCN: %[[ncv:.*]] = addrspacecast %struct.NCV addrspace(5)* %ncv to %struct.NCV* + // AMDGCN: %[[vd:.*]] = addrspacecast %struct.VD addrspace(5)* %vd to %struct.VD* + // AMDGCN: %[[ncf:.*]] = addrspacecast %struct.NCF addrspace(5)* %ncf to %struct.NCF* + // AMDGCN: %[[ncfs:.*]] = addrspacecast %struct.NCFS addrspace(5)* %ncfs to %struct.NCFS* + // AMDGCN: %[[utc:.*]] = addrspacecast %struct.UTC addrspace(5)* %utc to %struct.UTC* + // AMDGCN: %[[netc:.*]] = addrspacecast %struct.NETC addrspace(5)* %netc to %struct.NETC* + // AMDGCN: %[[ec_i_ec:.*]] = addrspacecast %struct.EC_I_EC addrspace(5)* %ec_i_ec to %struct.EC_I_EC* + // AMDGCN: %[[ec_i_ec1:.*]] = addrspacecast %struct.EC_I_EC1 addrspace(5)* %ec_i_ec1 to %struct.EC_I_EC1* + // AMDGCN: %[[t_v_t:.*]] = addrspacecast %struct.T_V_T addrspace(5)* %t_v_t to %struct.T_V_T* + // AMDGCN: %[[t_b_nec:.*]] = addrspacecast %struct.T_B_NEC addrspace(5)* %t_b_nec to %struct.T_B_NEC* + // AMDGCN: %[[t_f_nec:.*]] = addrspacecast %struct.T_F_NEC addrspace(5)* %t_f_nec to %struct.T_F_NEC* + // AMDGCN: %[[t_fa_nec:.*]] = addrspacecast %struct.T_FA_NEC addrspace(5)* %t_fa_nec to %struct.T_FA_NEC* + // AMDGCN: %[[t_b_ned:.*]] = addrspacecast %struct.T_B_NED addrspace(5)* %t_b_ned to %struct.T_B_NED* + // AMDGCN: %[[t_f_ned:.*]] = addrspacecast %struct.T_F_NED addrspace(5)* %t_f_ned to %struct.T_F_NED* + // AMDGCN: %[[t_fa_ned:.*]] = addrspacecast %struct.T_FA_NED addrspace(5)* %t_fa_ned to %struct.T_FA_NED* + T t; // CHECK-NOT: call EC ec; - // CHECK: call void @_ZN2ECC1Ev(%struct.EC* %ec) + // CHECK: call void @_ZN2ECC1Ev(%struct.EC* %[[ec]]) ED ed; // CHECK-NOT: call ECD ecd; - // CHECK: call void @_ZN3ECDC1Ev(%struct.ECD* %ecd) + // CHECK: call void @_ZN3ECDC1Ev(%struct.ECD* %[[ecd]]) ETC etc; - // CHECK: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* %etc) + // CHECK: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* %[[etc]]) UC uc; // undefined constructor -- not allowed - // CHECK: call void @_ZN2UCC1Ev(%struct.UC* %uc) + // CHECK: call void @_ZN2UCC1Ev(%struct.UC* %[[uc]]) UD ud; // undefined destructor -- not allowed // CHECK-NOT: call ECI eci; // empty constructor w/ initializer list -- not allowed - // CHECK: call void @_ZN3ECIC1Ev(%struct.ECI* %eci) + // CHECK: call void @_ZN3ECIC1Ev(%struct.ECI* %[[eci]]) NEC nec; // non-empty constructor -- not allowed - // CHECK: call void @_ZN3NECC1Ev(%struct.NEC* %nec) + // CHECK: call void @_ZN3NECC1Ev(%struct.NEC* %[[nec]]) // non-empty destructor -- not allowed NED ned; // no-constructor, virtual method -- not allowed - // CHECK: call void @_ZN3NCVC1Ev(%struct.NCV* %ncv) + // CHECK: call void @_ZN3NCVC1Ev(%struct.NCV* %[[ncv]]) NCV ncv; // CHECK-NOT: call VD vd; - // CHECK: call void @_ZN2VDC1Ev(%struct.VD* %vd) + // CHECK: call void @_ZN2VDC1Ev(%struct.VD* %[[vd]]) NCF ncf; - // CHECK: call void @_ZN3NCFC1Ev(%struct.NCF* %ncf) + // CHECK: call void @_ZN3NCFC1Ev(%struct.NCF* %[[ncf]]) NCFS ncfs; - // CHECK: call void @_ZN4NCFSC1Ev(%struct.NCFS* %ncfs) + // CHECK: call void @_ZN4NCFSC1Ev(%struct.NCFS* %[[ncfs]]) UTC utc; - // CHECK: call void @_ZN3UTCC1IJEEEDpT_(%struct.UTC* %utc) + // CHECK: call void @_ZN3UTCC1IJEEEDpT_(%struct.UTC* %[[utc]]) NETC netc; - // CHECK: call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC* %netc) + // CHECK: call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC* %[[netc]]) T_B_T t_b_t; // CHECK-NOT: call T_F_T t_f_t; @@ -156,17 +212,17 @@ T_FA_T t_fa_t; // CHECK-NOT: call EC_I_EC ec_i_ec; - // CHECK: call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC* %ec_i_ec) + // CHECK: call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC* %[[ec_i_ec]]) EC_I_EC1 ec_i_ec1; - // CHECK: call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1* %ec_i_ec1) + // CHECK: call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1* %[[ec_i_ec1]]) T_V_T t_v_t; - // CHECK: call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %t_v_t) + // CHECK: call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %[[t_v_t]]) T_B_NEC t_b_nec; - // CHECK: call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* %t_b_nec) + // CHECK: call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* %[[t_b_nec]]) T_F_NEC t_f_nec; - // CHECK: call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* %t_f_nec) + // CHECK: call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* %[[t_f_nec]]) T_FA_NEC t_fa_nec; - // CHECK: call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* %t_fa_nec) + // CHECK: call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* %[[t_fa_nec]]) T_B_NED t_b_ned; // CHECK-NOT: call T_F_NED t_f_ned; @@ -182,14 +238,14 @@ df(); // CHECK: call void @_Z2dfv() // Verify that we only call non-empty destructors - // CHECK-NEXT: call void @_ZN8T_FA_NEDD1Ev(%struct.T_FA_NED* %t_fa_ned) - // CHECK-NEXT: call void @_ZN7T_F_NEDD1Ev(%struct.T_F_NED* %t_f_ned) - // CHECK-NEXT: call void @_ZN7T_B_NEDD1Ev(%struct.T_B_NED* %t_b_ned) - // CHECK-NEXT: call void @_ZN2VDD1Ev(%struct.VD* %vd) - // CHECK-NEXT: call void @_ZN3NEDD1Ev(%struct.NED* %ned) - // CHECK-NEXT: call void @_ZN2UDD1Ev(%struct.UD* %ud) - // CHECK-NEXT: call void @_ZN3ECDD1Ev(%struct.ECD* %ecd) - // CHECK-NEXT: call void @_ZN2EDD1Ev(%struct.ED* %ed) + // CHECK-NEXT: call void @_ZN8T_FA_NEDD1Ev(%struct.T_FA_NED* %[[t_fa_ned]]) + // CHECK-NEXT: call void @_ZN7T_F_NEDD1Ev(%struct.T_F_NED* %[[t_f_ned]]) + // CHECK-NEXT: call void @_ZN7T_B_NEDD1Ev(%struct.T_B_NED* %[[t_b_ned]]) + // CHECK-NEXT: call void @_ZN2VDD1Ev(%struct.VD* %[[vd]]) + // CHECK-NEXT: call void @_ZN3NEDD1Ev(%struct.NED* %[[ned]]) + // CHECK-NEXT: call void @_ZN2UDD1Ev(%struct.UD* %[[ud]]) + // CHECK-NEXT: call void @_ZN3ECDD1Ev(%struct.ECD* %[[ecd]]) + // CHECK-NEXT: call void @_ZN2EDD1Ev(%struct.ED* %[[ed]]) // CHECK-NEXT: ret void }