diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -69,6 +69,8 @@ bool RelocatableDeviceCode; /// Mangle context for device. std::unique_ptr DeviceMC; + /// Some zeros used for GEPs. + llvm::Constant *Zeros[2]; llvm::FunctionCallee getSetupArgumentFn() const; llvm::FunctionCallee getLaunchFn() const; @@ -86,14 +88,25 @@ /// the start of the string. The result of this function can be used anywhere /// where the C code specifies const char*. llvm::Constant *makeConstantString(const std::string &Str, - const std::string &Name = "", - const std::string &SectionName = "", - unsigned Alignment = 0) { - llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0), - llvm::ConstantInt::get(SizeTy, 0)}; + const std::string &Name = "") { auto ConstStr = CGM.GetAddrOfConstantCString(Str, Name.c_str()); - llvm::GlobalVariable *GV = - cast(ConstStr.getPointer()); + return llvm::ConstantExpr::getGetElementPtr(ConstStr.getElementType(), + ConstStr.getPointer(), Zeros); + } + + /// Helper function which generates an initialized constant array from Str, + /// and optionally sets section name and alignment. AddNull specifies whether + /// the array should nave NUL termination. + llvm::Constant *makeConstantArray(StringRef Str, + StringRef Name = "", + StringRef SectionName = "", + unsigned Alignment = 0, + bool AddNull = false) { + llvm::Constant *Value = + llvm::ConstantDataArray::getString(Context, Str, AddNull); + auto *GV = new llvm::GlobalVariable( + TheModule, Value->getType(), /*isConstant=*/true, + llvm::GlobalValue::PrivateLinkage, Value, Name); if (!SectionName.empty()) { GV->setSection(SectionName); // Mark the address as used which make sure that this section isn't @@ -102,9 +115,7 @@ } if (Alignment) GV->setAlignment(llvm::Align(Alignment)); - - return llvm::ConstantExpr::getGetElementPtr(ConstStr.getElementType(), - ConstStr.getPointer(), Zeros); + return llvm::ConstantExpr::getGetElementPtr(GV->getValueType(), GV, Zeros); } /// Helper function that generates an empty dummy function returning void. @@ -220,6 +231,8 @@ IntTy = CGM.IntTy; SizeTy = CGM.SizeTy; VoidTy = CGM.VoidTy; + Zeros[0] = llvm::ConstantInt::get(SizeTy, 0); + Zeros[1] = Zeros[0]; CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy)); VoidPtrTy = cast(Types.ConvertType(Ctx.VoidPtrTy)); @@ -744,9 +757,8 @@ // If fatbin is available from early finalization, create a string // literal containing the fat binary loaded from the given file. const unsigned HIPCodeObjectAlign = 4096; - FatBinStr = - makeConstantString(std::string(CudaGpuBinary->getBuffer()), "", - FatbinConstantName, HIPCodeObjectAlign); + FatBinStr = makeConstantArray(std::string(CudaGpuBinary->getBuffer()), "", + FatbinConstantName, HIPCodeObjectAlign); } else { // If fatbin is not available, create an external symbol // __hip_fatbin in section .hip_fatbin. The external symbol is supposed @@ -780,8 +792,8 @@ // For CUDA, create a string literal containing the fat binary loaded from // the given file. - FatBinStr = makeConstantString(std::string(CudaGpuBinary->getBuffer()), "", - FatbinConstantName, 8); + FatBinStr = makeConstantArray(std::string(CudaGpuBinary->getBuffer()), "", + FatbinConstantName, 8); FatMagic = CudaFatMagic; } @@ -888,8 +900,8 @@ SmallString<64> ModuleID; llvm::raw_svector_ostream OS(ModuleID); OS << ModuleIDPrefix << llvm::format("%" PRIx64, FatbinWrapper->getGUID()); - llvm::Constant *ModuleIDConstant = makeConstantString( - std::string(ModuleID.str()), "", ModuleIDSectionName, 32); + llvm::Constant *ModuleIDConstant = makeConstantArray( + std::string(ModuleID.str()), "", ModuleIDSectionName, 32, /*AddNull=*/true); // Create an alias for the FatbinWrapper that nvcc will look for. llvm::GlobalAlias::create(llvm::GlobalValue::ExternalLinkage, diff --git a/clang/test/CodeGenCUDA/device-stub.cu b/clang/test/CodeGenCUDA/device-stub.cu --- a/clang/test/CodeGenCUDA/device-stub.cu +++ b/clang/test/CodeGenCUDA/device-stub.cu @@ -1,4 +1,4 @@ -// RUN: echo "GPU binary would be here" > %t +// RUN: echo -n "GPU binary would be here." > %t // RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-linux-gnu -emit-llvm %s \ // RUN: -target-sdk-version=8.0 -fcuda-include-gpubinary %t -o - \ // RUN: | FileCheck -allow-deprecated-dag-overlap %s \ @@ -159,8 +159,8 @@ // ALL: @3 = private unnamed_addr constant [19 x i8] c"ext_device_var_def\00" // ALL: @4 = private unnamed_addr constant [21 x i8] c"ext_constant_var_def\00" // * constant unnamed string with GPU binary -// CUDA: @[[FATBIN:.*]] = private constant{{.*GPU binary would be here.*}}\00", -// HIPEF: @[[FATBIN:.*]] = private constant{{.*GPU binary would be here.*}}\00",{{.*}}align 4096 +// CUDA: @[[FATBIN:.*]] = private constant{{.*}} c"GPU binary would be here.", +// HIPEF: @[[FATBIN:.*]] = private constant{{.*}} c"GPU binary would be here.",{{.*}}align 4096 // HIPNEF: @[[FATBIN:__hip_fatbin]] = external constant i8, section ".hip_fatbin" // CUDANORDC-SAME: section ".nv_fatbin", align 8 // CUDARDC-SAME: section "__nv_relfatbin", align 8