Index: include/clang/Basic/LangOptions.def =================================================================== --- include/clang/Basic/LangOptions.def +++ include/clang/Basic/LangOptions.def @@ -169,7 +169,6 @@ LANGOPT(CUDAAllowHostCallsFromHostDevice, 1, 0, "Allow host device functions to call host functions") LANGOPT(CUDADisableTargetCallChecks, 1, 0, "Disable checks for call targets (host, device, etc.)") LANGOPT(CUDATargetOverloads, 1, 0, "Enable function overloads based on CUDA target attributes") -LANGOPT(CUDAUsesLibDevice , 1, 0, "Selectively link and internalize bitcode.") LANGOPT(AssumeSaneOperatorNew , 1, 1, "implicit __attribute__((malloc)) for C++'s new operators") LANGOPT(SizedDeallocation , 1, 0, "enable sized deallocation functions") Index: include/clang/CodeGen/CodeGenAction.h =================================================================== --- include/clang/CodeGen/CodeGenAction.h +++ include/clang/CodeGen/CodeGenAction.h @@ -25,7 +25,9 @@ private: unsigned Act; std::unique_ptr TheModule; - llvm::Module *LinkModule; + // Vector of {Linker::Flags, Module*} pairs to specify bitcode + // modules to link in using corresponding linker flags. + SmallVector, 4> LinkModules; llvm::LLVMContext *VMContext; bool OwnsVMContext; @@ -50,7 +52,9 @@ /// setLinkModule - Set the link module to be used by this action. If a link /// module is not provided, and CodeGenOptions::LinkBitcodeFile is non-empty, /// the action will load it from the specified file. - void setLinkModule(llvm::Module *Mod) { LinkModule = Mod; } + void addLinkModule(llvm::Module *Mod, unsigned LinkFlags) { + LinkModules.push_back(std::make_pair(LinkFlags, Mod)); + } /// Take the generated LLVM module, for use after the action has been run. /// The result may be null on failure. Index: include/clang/Driver/CC1Options.td =================================================================== --- include/clang/Driver/CC1Options.td +++ include/clang/Driver/CC1Options.td @@ -240,6 +240,9 @@ HelpText<"Emit complete constructors and destructors as aliases when possible">; def mlink_bitcode_file : Separate<["-"], "mlink-bitcode-file">, HelpText<"Link the given bitcode file before performing optimizations.">; +def mlink_cuda_bitcode : Separate<["-"], "mlink-cuda-bitcode">, + HelpText<"Link and internalize needed symbols from the given bitcode file " + "before performing optimizations.">; def vectorize_loops : Flag<["-"], "vectorize-loops">, HelpText<"Run the Loop vectorization passes">; def vectorize_slp : Flag<["-"], "vectorize-slp">, @@ -665,8 +668,6 @@ HelpText<"Incorporate CUDA device-side binary into host object file.">; def fcuda_target_overloads : Flag<["-"], "fcuda-target-overloads">, HelpText<"Enable function overloads based on CUDA target attributes.">; -def fcuda_uses_libdevice : Flag<["-"], "fcuda-uses-libdevice">, - HelpText<"Selectively link and internalize bitcode.">; } // let Flags = [CC1Option] Index: include/clang/Frontend/CodeGenOptions.h =================================================================== --- include/clang/Frontend/CodeGenOptions.h +++ include/clang/Frontend/CodeGenOptions.h @@ -130,7 +130,7 @@ std::string LimitFloatPrecision; /// The name of the bitcode file to link before optzns. - std::string LinkBitcodeFile; + std::vector> LinkBitcodeFiles; /// The user provided name for the "main file", if non-empty. This is useful /// in situations where the input file name does not match the original input Index: lib/CodeGen/CodeGenAction.cpp =================================================================== --- lib/CodeGen/CodeGenAction.cpp +++ lib/CodeGen/CodeGenAction.cpp @@ -53,29 +53,35 @@ std::unique_ptr Gen; - std::unique_ptr TheModule, LinkModule; + std::unique_ptr TheModule; + SmallVector>, 4> + LinkModules; public: - BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags, - const HeaderSearchOptions &HeaderSearchOpts, - const PreprocessorOptions &PPOpts, - const CodeGenOptions &CodeGenOpts, - const TargetOptions &TargetOpts, - const LangOptions &LangOpts, bool TimePasses, - const std::string &InFile, llvm::Module *LinkModule, - raw_pwrite_stream *OS, LLVMContext &C, - CoverageSourceInfo *CoverageInfo = nullptr) + BackendConsumer( + BackendAction Action, DiagnosticsEngine &Diags, + const HeaderSearchOptions &HeaderSearchOpts, + const PreprocessorOptions &PPOpts, const CodeGenOptions &CodeGenOpts, + const TargetOptions &TargetOpts, const LangOptions &LangOpts, + bool TimePasses, const std::string &InFile, + const SmallVectorImpl> &LinkModules, + raw_pwrite_stream *OS, LLVMContext &C, + CoverageSourceInfo *CoverageInfo = nullptr) : Diags(Diags), Action(Action), CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts), AsmOutStream(OS), Context(nullptr), LLVMIRGeneration("LLVM IR Generation Time"), Gen(CreateLLVMCodeGen(Diags, InFile, HeaderSearchOpts, PPOpts, - CodeGenOpts, C, CoverageInfo)), - LinkModule(LinkModule) { + CodeGenOpts, C, CoverageInfo)) { llvm::TimePassesIsEnabled = TimePasses; + for (auto &I : LinkModules) + this->LinkModules.push_back( + std::make_pair(I.first, std::unique_ptr(I.second))); } - std::unique_ptr takeModule() { return std::move(TheModule); } - llvm::Module *takeLinkModule() { return LinkModule.release(); } + void releaseLinkModules() { + for (auto &I : LinkModules) + I.second.release(); + } void HandleCXXStaticMemberVarInstantiation(VarDecl *VD) override { Gen->HandleCXXStaticMemberVarInstantiation(VD); @@ -156,15 +162,14 @@ "Unexpected module change during IR generation"); // Link LinkModule into this module if present, preserving its validity. - if (LinkModule) { - if (Linker::LinkModules( - M, LinkModule.get(), - [=](const DiagnosticInfo &DI) { linkerDiagnosticHandler(DI); }, - (LangOpts.CUDA && LangOpts.CUDAIsDevice && - LangOpts.CUDAUsesLibDevice) - ? (Linker::Flags::LinkOnlyNeeded | - Linker::Flags::InternalizeLinkedSymbols) - : Linker::Flags::None)) + for (auto &I : LinkModules) { + unsigned LinkFlags = I.first; + llvm::Module *LinkModule = I.second.get(); + if (Linker::LinkModules(M, LinkModule, + [=](const DiagnosticInfo &DI) { + linkerDiagnosticHandler(DI, LinkModule); + }, + LinkFlags)) return; } @@ -228,7 +233,8 @@ ((BackendConsumer*)Context)->InlineAsmDiagHandler2(SM, Loc); } - void linkerDiagnosticHandler(const llvm::DiagnosticInfo &DI); + void linkerDiagnosticHandler(const llvm::DiagnosticInfo &DI, + const llvm::Module *LinkModule); static void DiagnosticHandler(const llvm::DiagnosticInfo &DI, void *Context) { @@ -539,7 +545,8 @@ EmitOptimizationMessage(D, diag::warn_fe_backend_optimization_failure); } -void BackendConsumer::linkerDiagnosticHandler(const DiagnosticInfo &DI) { +void BackendConsumer::linkerDiagnosticHandler(const DiagnosticInfo &DI, + const llvm::Module *LinkModule) { if (DI.getSeverity() != DS_Error) return; @@ -623,9 +630,8 @@ #undef ComputeDiagID CodeGenAction::CodeGenAction(unsigned _Act, LLVMContext *_VMContext) - : Act(_Act), LinkModule(nullptr), - VMContext(_VMContext ? _VMContext : new LLVMContext), - OwnsVMContext(!_VMContext) {} + : Act(_Act), VMContext(_VMContext ? _VMContext : new LLVMContext), + OwnsVMContext(!_VMContext) {} CodeGenAction::~CodeGenAction() { TheModule.reset(); @@ -640,9 +646,9 @@ if (!getCompilerInstance().hasASTConsumer()) return; - // If we were given a link module, release consumer's ownership of it. - if (LinkModule) - BEConsumer->takeLinkModule(); + // Take back ownership of link modules we passed to consumer. + if (!LinkModules.empty()) + BEConsumer->releaseLinkModules(); // Steal the module from the consumer. TheModule = BEConsumer->takeModule(); @@ -684,28 +690,29 @@ if (BA != Backend_EmitNothing && !OS) return nullptr; - llvm::Module *LinkModuleToUse = LinkModule; + // Load bitcode modules to link with, if we need to. + if (LinkModules.empty()) + for (auto &I : CI.getCodeGenOpts().LinkBitcodeFiles) { + const std::string &LinkBCFile = I.second; - // If we were not given a link module, and the user requested that one be - // loaded from bitcode, do so now. - const std::string &LinkBCFile = CI.getCodeGenOpts().LinkBitcodeFile; - if (!LinkModuleToUse && !LinkBCFile.empty()) { - auto BCBuf = CI.getFileManager().getBufferForFile(LinkBCFile); - if (!BCBuf) { - CI.getDiagnostics().Report(diag::err_cannot_open_file) - << LinkBCFile << BCBuf.getError().message(); - return nullptr; - } + auto BCBuf = CI.getFileManager().getBufferForFile(LinkBCFile); + if (!BCBuf) { + CI.getDiagnostics().Report(diag::err_cannot_open_file) + << LinkBCFile << BCBuf.getError().message(); + LinkModules.clear(); + return nullptr; + } - ErrorOr> ModuleOrErr = - getLazyBitcodeModule(std::move(*BCBuf), *VMContext); - if (std::error_code EC = ModuleOrErr.getError()) { - CI.getDiagnostics().Report(diag::err_cannot_open_file) - << LinkBCFile << EC.message(); - return nullptr; + ErrorOr> ModuleOrErr = + getLazyBitcodeModule(std::move(*BCBuf), *VMContext); + if (std::error_code EC = ModuleOrErr.getError()) { + CI.getDiagnostics().Report(diag::err_cannot_open_file) << LinkBCFile + << EC.message(); + LinkModules.clear(); + return nullptr; + } + addLinkModule(ModuleOrErr.get().release(), I.first); } - LinkModuleToUse = ModuleOrErr.get().release(); - } CoverageSourceInfo *CoverageInfo = nullptr; // Add the preprocessor callback only when the coverage mapping is generated. @@ -714,11 +721,12 @@ CI.getPreprocessor().addPPCallbacks( std::unique_ptr(CoverageInfo)); } + std::unique_ptr Result(new BackendConsumer( BA, CI.getDiagnostics(), CI.getHeaderSearchOpts(), CI.getPreprocessorOpts(), CI.getCodeGenOpts(), CI.getTargetOpts(), - CI.getLangOpts(), CI.getFrontendOpts().ShowTimers, InFile, - LinkModuleToUse, OS, *VMContext, CoverageInfo)); + CI.getLangOpts(), CI.getFrontendOpts().ShowTimers, InFile, LinkModules, + OS, *VMContext, CoverageInfo)); BEConsumer = Result.get(); return std::move(Result); } Index: lib/Frontend/CompilerInvocation.cpp =================================================================== --- lib/Frontend/CompilerInvocation.cpp +++ lib/Frontend/CompilerInvocation.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" +#include "llvm/Linker/Linker.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/OptTable.h" @@ -539,7 +540,13 @@ Opts.EmitOpenCLArgMetadata = Args.hasArg(OPT_cl_kernel_arg_info); Opts.CompressDebugSections = Args.hasArg(OPT_compress_debug_sections); Opts.DebugCompilationDir = Args.getLastArgValue(OPT_fdebug_compilation_dir); - Opts.LinkBitcodeFile = Args.getLastArgValue(OPT_mlink_bitcode_file); + for (auto A : Args.filtered(OPT_mlink_bitcode_file, OPT_mlink_cuda_bitcode)) { + unsigned LinkFlags = llvm::Linker::Flags::None; + if (A->getOption().matches(OPT_mlink_cuda_bitcode)) + LinkFlags = llvm::Linker::Flags::LinkOnlyNeeded | + llvm::Linker::Flags::InternalizeLinkedSymbols; + Opts.LinkBitcodeFiles.push_back(std::make_pair(LinkFlags, A->getValue())); + } Opts.SanitizeCoverageType = getLastArgIntValue(Args, OPT_fsanitize_coverage_type, 0, Diags); Opts.SanitizeCoverageIndirectCalls = @@ -1394,9 +1401,6 @@ if (Args.hasArg(OPT_fcuda_is_device)) Opts.CUDAIsDevice = 1; - if (Args.hasArg(OPT_fcuda_uses_libdevice)) - Opts.CUDAUsesLibDevice = 1; - if (Args.hasArg(OPT_fcuda_allow_host_calls_from_host_device)) Opts.CUDAAllowHostCallsFromHostDevice = 1; Index: test/CodeGen/link-bitcode-file.c =================================================================== --- test/CodeGen/link-bitcode-file.c +++ test/CodeGen/link-bitcode-file.c @@ -1,6 +1,12 @@ // RUN: %clang_cc1 -triple i386-pc-linux-gnu -DBITCODE -emit-llvm-bc -o %t.bc %s -// RUN: %clang_cc1 -triple i386-pc-linux-gnu -mlink-bitcode-file %t.bc -O3 -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-NO-BC %s -// RUN: not %clang_cc1 -triple i386-pc-linux-gnu -DBITCODE -mlink-bitcode-file %t.bc -O3 -emit-llvm -o - %s 2>&1 | FileCheck -check-prefix=CHECK-BC %s +// RUN: %clang_cc1 -triple i386-pc-linux-gnu -DBITCODE2 -emit-llvm-bc -o %t-2.bc %s +// RUN: %clang_cc1 -triple i386-pc-linux-gnu -mlink-bitcode-file %t.bc \ +// RUN: -O3 -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-NO-BC %s +// RUN: %clang_cc1 -triple i386-pc-linux-gnu -O3 -emit-llvm -o - \ +// RUN: -mlink-bitcode-file %t.bc -mlink-bitcode-file %t-2.bc %s \ +// RUN: | FileCheck -check-prefix=CHECK-NO-BC -check-prefix=CHECK-NO-BC2 %s +// RUN: not %clang_cc1 -triple i386-pc-linux-gnu -DBITCODE -O3 -emit-llvm -o - \ +// RUN: -mlink-bitcode-file %t.bc %s 2>&1 | FileCheck -check-prefix=CHECK-BC %s // Make sure we deal with failure to load the file. // RUN: not %clang_cc1 -triple i386-pc-linux-gnu -mlink-bitcode-file no-such-file.bc \ // RUN: -emit-llvm -o - %s 2>&1 | FileCheck -check-prefix=CHECK-NO-FILE %s @@ -9,11 +15,15 @@ #ifdef BITCODE +extern int f2(void); // CHECK-BC: fatal error: cannot link module {{.*}}'f': symbol multiply defined int f(void) { + f2(); return 42; } +#elif BITCODE2 +int f2(void) { return 43; } #else // CHECK-NO-BC-LABEL: define i32 @g @@ -23,6 +33,7 @@ } // CHECK-NO-BC-LABEL: define i32 @f +// CHECK-NO-BC2-LABEL: define i32 @f2 #endif Index: test/CodeGenCUDA/Inputs/device-code-2.ll =================================================================== --- /dev/null +++ test/CodeGenCUDA/Inputs/device-code-2.ll @@ -0,0 +1,16 @@ +; Simple bit of IR to mimic CUDA's libdevice. + +target triple = "nvptx-unknown-cuda" + +define double @__nv_sin(double %a) { + ret double 1.0 +} + +define double @__nv_exp(double %a) { + ret double 3.0 +} + +define double @__unused(double %a) { + ret double 2.0 +} + Index: test/CodeGenCUDA/link-device-bitcode.cu =================================================================== --- test/CodeGenCUDA/link-device-bitcode.cu +++ test/CodeGenCUDA/link-device-bitcode.cu @@ -6,13 +6,21 @@ // Prepare bitcode file to link with // RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc -o %t.bc \ // RUN: %S/Inputs/device-code.ll +// RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc -o %t-2.bc \ +// RUN: %S/Inputs/device-code-2.ll // // Make sure function in device-code gets linked in and internalized. // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \ -// RUN: -mlink-bitcode-file %t.bc -fcuda-uses-libdevice -emit-llvm \ +// RUN: -mlink-cuda-bitcode %t.bc -emit-llvm \ // RUN: -disable-llvm-passes -o - %s \ // RUN: | FileCheck %s -check-prefix CHECK-IR // +// Make sure we can link two bitcode files. +// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \ +// RUN: -mlink-cuda-bitcode %t.bc -mlink-cuda-bitcode %t-2.bc \ +// RUN: -emit-llvm -disable-llvm-passes -o - %s \ +// RUN: | FileCheck %s -check-prefix CHECK-IR -check-prefix CHECK-IR-2 +// // Make sure function in device-code gets linked but is not internalized // without -fcuda-uses-libdevice // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \ @@ -22,7 +30,7 @@ // // Make sure NVVMReflect pass is enabled in NVPTX back-end. // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \ -// RUN: -mlink-bitcode-file %t.bc -fcuda-uses-libdevice -S -o /dev/null %s \ +// RUN: -mlink-cuda-bitcode %t.bc -S -o /dev/null %s \ // RUN: -backend-option -debug-pass=Structure 2>&1 \ // RUN: | FileCheck %s -check-prefix CHECK-REFLECT @@ -52,5 +60,11 @@ // CHECK-IR: call i32 @__nvvm_reflect // CHECK-IR: ret float +// Make sure we've linked in and internalized only needed functions +// from the second bitcode file. +// CHECK-IR-2-LABEL: define internal double @__nv_sin +// CHECK-IR-2-LABEL: define internal double @__nv_exp +// CHECK-IR-2-NOT: double @__unused + // Verify that NVVMReflect pass is among the passes run by NVPTX back-end. // CHECK-REFLECT: Replace occurrences of __nvvm_reflect() calls with 0/1