Index: clang/lib/CodeGen/CGCUDANV.cpp =================================================================== --- clang/lib/CodeGen/CGCUDANV.cpp +++ clang/lib/CodeGen/CGCUDANV.cpp @@ -472,6 +472,18 @@ CtorBuilder.CreateCall(RegisterLinkedBinaryFunc, Args); } + // Create destructor and register it with atexit() the way NVCC does it. Doing + // it during regular destructor phase worked in CUDA before 9.2 but results in + // double-free in 9.2. + llvm::Function *CleanupFn = makeModuleDtorFunction(); + // extern "C" int atexit(void (*f)(void)); + llvm::FunctionType *AtExitTy = + llvm::FunctionType::get(IntTy, CleanupFn->getType(), false); + llvm::Constant *AtExitFunc = + CGM.CreateRuntimeFunction(AtExitTy, "atexit", llvm::AttributeList(), + /*Local=*/true); + CtorBuilder.CreateCall(AtExitFunc, CleanupFn); + CtorBuilder.CreateRetVoid(); return ModuleCtorFunc; } Index: clang/lib/CodeGen/CodeGenModule.cpp =================================================================== --- clang/lib/CodeGen/CodeGenModule.cpp +++ clang/lib/CodeGen/CodeGenModule.cpp @@ -404,10 +404,9 @@ AddGlobalCtor(ObjCInitFunction); if (Context.getLangOpts().CUDA && !Context.getLangOpts().CUDAIsDevice && CUDARuntime) { - if (llvm::Function *CudaCtorFunction = CUDARuntime->makeModuleCtorFunction()) + if (llvm::Function *CudaCtorFunction = + CUDARuntime->makeModuleCtorFunction()) AddGlobalCtor(CudaCtorFunction); - if (llvm::Function *CudaDtorFunction = CUDARuntime->makeModuleDtorFunction()) - AddGlobalDtor(CudaDtorFunction); } if (OpenMPRuntime) { if (llvm::Function *OpenMPRegistrationFunction =