Index: lib/CodeGen/CGCUDANV.cpp =================================================================== --- lib/CodeGen/CGCUDANV.cpp +++ lib/CodeGen/CGCUDANV.cpp @@ -21,6 +21,7 @@ #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/Support/Path.h" using namespace clang; using namespace CodeGen; @@ -244,7 +245,7 @@ /// Creates a global constructor function for the module: /// \code -/// void __cuda_module_ctor(void*) { +/// void __cuda_module_ctor_(void*) { /// Handle = __cudaRegisterFatBinary(GpuBinaryBlob); /// __cuda_register_globals(Handle); /// } @@ -277,9 +278,28 @@ return nullptr; } + // A unique ctor/dtor name is necessary for incremental and lazy JIT + // compilation of cuda code in which each TU can include more than one llvm + // module. Each llvm module has a cuda ctor/dtor (if a fatbinary file exist). + const SmallString<128> ModuleName + = llvm::sys::path::filename(CGM.getModule().getName()); + SmallString<128> CtorSuffix(""); + if (!ModuleName.empty()){ + CtorSuffix.append("_"); + CtorSuffix.append(ModuleName); + } + + for (size_t i = 0; i < CtorSuffix.size(); ++i) { + // Replace everything that's not [a-zA-Z0-9._] with a _. This set happens + // to be the set of C preprocessing numbers. + if (!isPreprocessingNumberBody(CtorSuffix[i])) + CtorSuffix[i] = '_'; + } + llvm::Function *ModuleCtorFunc = llvm::Function::Create( llvm::FunctionType::get(VoidTy, VoidPtrTy, false), - llvm::GlobalValue::InternalLinkage, "__cuda_module_ctor", &TheModule); + llvm::GlobalValue::InternalLinkage, "__cuda_module_ctor" + CtorSuffix, + &TheModule); llvm::BasicBlock *CtorEntryBB = llvm::BasicBlock::Create(Context, "entry", ModuleCtorFunc); CGBuilderTy CtorBuilder(CGM, Context); @@ -329,7 +349,7 @@ /// Creates a global destructor function that unregisters the GPU code blob /// registered by constructor. /// \code -/// void __cuda_module_dtor(void*) { +/// void __cuda_module_dtor_(void*) { /// __cudaUnregisterFatBinary(Handle); /// } /// \endcode @@ -343,9 +363,28 @@ llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false), "__cudaUnregisterFatBinary"); + // A unique ctor/dtor name is necessary for incremental and lazy JIT + // compilation of cuda code in which each TU can include more than one llvm + // module. Each llvm module has a cuda ctor/dtor (if a fatbinary file exist). + const SmallString<128> ModuleName + = llvm::sys::path::filename(CGM.getModule().getName()); + SmallString<128> DtorSuffix(""); + if (!ModuleName.empty()){ + DtorSuffix.append("_"); + DtorSuffix.append(ModuleName); + } + + for (size_t i = 0; i < DtorSuffix.size(); ++i) { + // Replace everything that's not [a-zA-Z0-9._] with a _. This set happens + // to be the set of C preprocessing numbers. + if (!isPreprocessingNumberBody(DtorSuffix[i])) + DtorSuffix[i] = '_'; + } + llvm::Function *ModuleDtorFunc = llvm::Function::Create( llvm::FunctionType::get(VoidTy, VoidPtrTy, false), - llvm::GlobalValue::InternalLinkage, "__cuda_module_dtor", &TheModule); + llvm::GlobalValue::InternalLinkage, "__cuda_module_dtor" + DtorSuffix, + &TheModule); llvm::BasicBlock *DtorEntryBB = llvm::BasicBlock::Create(Context, "entry", ModuleDtorFunc); CGBuilderTy DtorBuilder(CGM, Context); Index: unittests/CodeGen/IncrementalProcessingTest.cpp =================================================================== --- unittests/CodeGen/IncrementalProcessingTest.cpp +++ unittests/CodeGen/IncrementalProcessingTest.cpp @@ -21,9 +21,11 @@ #include "llvm/IR/Module.h" #include "llvm/Support/Host.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Target/TargetOptions.h" #include "gtest/gtest.h" #include +#include using namespace llvm; using namespace clang; @@ -171,4 +173,122 @@ } + +// In CUDA incremental processing, a CUDA ctor or dtor will be generated for +// every statement if a fatbinary file exists. +const char CUDATestProgram1[] = + "void cudaFunc1(){}\n"; + +const char CUDATestProgram2[] = + "void cudaFunc2(){}\n"; + +const Function* getCUDActor(llvm::Module& M) { + for (const auto& Func: M) + if (Func.hasName() && Func.getName().startswith("__cuda_module_ctor_")) + return &Func; + + return nullptr; +} + +const Function* getCUDAdtor(llvm::Module& M) { + for (const auto& Func: M) + if (Func.hasName() && Func.getName().startswith("__cuda_module_dtor_")) + return &Func; + + return nullptr; +} + +TEST(IncrementalProcessing, EmitCUDAGlobalInitFunc) { + LLVMContext Context; + CompilerInstance compiler; + + compiler.createDiagnostics(); + compiler.getLangOpts().CPlusPlus = 1; + compiler.getLangOpts().CPlusPlus11 = 1; + compiler.getLangOpts().CUDA = 1; + + compiler.getTargetOpts().Triple = llvm::Triple::normalize( + llvm::sys::getProcessTriple()); + compiler.setTarget(clang::TargetInfo::CreateTargetInfo( + compiler.getDiagnostics(), + std::make_shared( + compiler.getTargetOpts()))); + + // To enable the generating of cuda host code, it's needs to set up the + // auxTriple. + llvm::Triple hostTriple(llvm::sys::getProcessTriple()); + compiler.getFrontendOpts().AuxTriple = + hostTriple.isArch64Bit() ? "nvptx64-nvidia-cuda" : "nvptx-nvidia-cuda"; + auto targetOptions = std::make_shared(); + targetOptions->Triple = compiler.getFrontendOpts().AuxTriple; + targetOptions->HostTriple = compiler.getTarget().getTriple().str(); + compiler.setAuxTarget(clang::TargetInfo::CreateTargetInfo( + compiler.getDiagnostics(), targetOptions)); + + // A fatbinary file is necessary, that the code generator generates the ctor + // and dtor. + auto tmpFatbinFileOrError = llvm::sys::fs::TempFile::create("dummy.fatbin"); + ASSERT_TRUE((bool)tmpFatbinFileOrError); + auto tmpFatbinFile = std::move(*tmpFatbinFileOrError); + compiler.getCodeGenOpts().CudaGpuBinaryFileName = tmpFatbinFile.TmpName; + + compiler.createFileManager(); + compiler.createSourceManager(compiler.getFileManager()); + compiler.createPreprocessor(clang::TU_Prefix); + compiler.getPreprocessor().enableIncrementalProcessing(); + + compiler.createASTContext(); + + CodeGenerator* CG = + CreateLLVMCodeGen( + compiler.getDiagnostics(), + "main-module", + compiler.getHeaderSearchOpts(), + compiler.getPreprocessorOpts(), + compiler.getCodeGenOpts(), + Context); + + compiler.setASTConsumer(std::unique_ptr(CG)); + compiler.createSema(clang::TU_Prefix, nullptr); + Sema& S = compiler.getSema(); + + std::unique_ptr ParseOP(new Parser(S.getPreprocessor(), S, + /*SkipFunctionBodies*/ false)); + Parser &P = *ParseOP.get(); + + std::array, 3> M; + M[0] = IncrementalParseAST(compiler, P, *CG, nullptr); + ASSERT_TRUE(M[0]); + + M[1] = IncrementalParseAST(compiler, P, *CG, CUDATestProgram1); + ASSERT_TRUE(M[1]); + ASSERT_TRUE(M[1]->getFunction("_Z9cudaFunc1v")); + + M[2] = IncrementalParseAST(compiler, P, *CG, CUDATestProgram2); + ASSERT_TRUE(M[2]); + ASSERT_TRUE(M[2]->getFunction("_Z9cudaFunc2v")); + // First code should not end up in second module: + ASSERT_FALSE(M[2]->getFunction("_Z9cudaFunc1v")); + + // Make sure, that cuda ctor's and dtor's exist: + const Function* CUDActor1 = getCUDActor(*M[1]); + ASSERT_TRUE(CUDActor1); + + const Function* CUDActor2 = getCUDActor(*M[2]); + ASSERT_TRUE(CUDActor2); + + const Function* CUDAdtor1 = getCUDAdtor(*M[1]); + ASSERT_TRUE(CUDAdtor1); + + const Function* CUDAdtor2 = getCUDAdtor(*M[2]); + ASSERT_TRUE(CUDAdtor2); + + // Compare the names of both ctor's and dtor's to check, that they are + // unique. + ASSERT_FALSE(CUDActor1->getName() == CUDActor2->getName()); + ASSERT_FALSE(CUDAdtor1->getName() == CUDAdtor2->getName()); + + ASSERT_FALSE((bool)tmpFatbinFile.discard()); +} + } // end anonymous namespace