Index: lib/Driver/Tools.cpp =================================================================== --- lib/Driver/Tools.cpp +++ lib/Driver/Tools.cpp @@ -12235,6 +12235,96 @@ assert(TC.getTriple().isNVPTX() && "Wrong platform"); ArgStringList CmdArgs; + + // OpenMP uses nvlink to link cubin files. The result will be embedded in the + // host binary by the host linker. + assert(!JA.isHostOffloading(Action::OFK_OpenMP) && + "CUDA toolchain not expected for an OpenMP host device."); + if (JA.isDeviceOffloading(Action::OFK_OpenMP)) { + if (Output.isFilename()) { + CmdArgs.push_back("-o"); + CmdArgs.push_back(Output.getFilename()); + } else + assert(Output.isNothing() && "Invalid output."); + + if (Args.hasArg(options::OPT_g_Flag)) + CmdArgs.push_back("-g"); + + if (Args.hasArg(options::OPT_v)) + CmdArgs.push_back("-v"); + + std::vector GPUArchs = + Args.getAllArgValues(options::OPT_march_EQ); + assert(GPUArchs.size() == 1 && "Exactly one GPU Arch required for ptxas."); + const std::string &GPUArch = GPUArchs[0]; + + CmdArgs.push_back("-arch"); + CmdArgs.push_back(Args.MakeArgString(GPUArch)); + + // add paths specified in LIBRARY_PATH environment variable as -L options. + addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH"); + + // add paths for the default clang library path. + SmallString<256> DefaultLibPath = + llvm::sys::path::parent_path(TC.getDriver().Dir); + llvm::sys::path::append(DefaultLibPath, "lib" CLANG_LIBDIR_SUFFIX); + CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath)); + + // add linking against library implementing OpenMP calls on NVPTX target + CmdArgs.push_back("-lomptarget-nvptx"); + + // nvlink relies on the extension used by the input files + // to decide what to do. Given that ptxas produces cubin files + // we need to copy the input files to a new file with the right + // extension. + // FIXME: this can be efficiently done by specifying a new + // output type for the assembly action, however this would expose + // the target details to the driver and maybe we do not want to do + // that + for (const auto &II : Inputs) { + + if (II.getType() == types::TY_LLVM_IR || + II.getType() == types::TY_LTO_IR || + II.getType() == types::TY_LTO_BC || + II.getType() == types::TY_LLVM_BC) { + C.getDriver().Diag(diag::err_drv_no_linker_llvm_support) + << getToolChain().getTripleString(); + continue; + } + + // Currently, we only pass the input files to the linker, we do not pass + // any libraries that may be valid only for the host. + if (!II.isFilename()) + continue; + + StringRef Name = llvm::sys::path::filename(II.getFilename()); + std::pair Split = Name.rsplit('.'); + std::string TmpName = + C.getDriver().GetTemporaryPath(Split.first, "cubin"); + + const char *CubinF = + C.addTempFile(C.getArgs().MakeArgString(TmpName.c_str())); + + const char *CopyExec = Args.MakeArgString(getToolChain().GetProgramPath( + C.getDriver().IsCLMode() ? "copy" : "cp")); + + ArgStringList CopyCmdArgs; + CopyCmdArgs.push_back(II.getFilename()); + CopyCmdArgs.push_back(CubinF); + C.addCommand( + llvm::make_unique(JA, *this, CopyExec, CopyCmdArgs, Inputs)); + + CmdArgs.push_back(CubinF); + } + + AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA); + + const char *Exec = + Args.MakeArgString(getToolChain().GetProgramPath("nvlink")); + C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); + return; + } + CmdArgs.push_back("--cuda"); CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32"); CmdArgs.push_back(Args.MakeArgString("--create")); Index: test/Driver/openmp-offload.c =================================================================== --- test/Driver/openmp-offload.c +++ test/Driver/openmp-offload.c @@ -590,6 +590,17 @@ /// ########################################################################### +/// Check cubin file generation and usage by nvlink +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps -no-canonical-prefixes %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-CUBIN %s + +// CHK-CUBIN: clang{{.*}}" "-o" "{{.*}}-openmp-nvptx64-nvidia-cuda.s" +// CHK-CUBIN-NEXT: ptxas{{.*}}" "--output-file" "{{.*}}-openmp-nvptx64-nvidia-cuda.o" "{{.*}}-openmp-nvptx64-nvidia-cuda.s" +// CHK-CUBIN-NEXT: cp{{.*}}-openmp-nvptx64-nvidia-cuda.o" "{{.*}}-openmp-nvptx64-nvidia-cuda-{{.*}}.cubin" +// CHK-CUBIN-NEXT: nvlink" "-o" "{{.*}}-openmp-nvptx64-nvidia-cuda" "{{.*}}" "{{.*}}-openmp-nvptx64-nvidia-cuda-{{.*}}.cubin" + +/// ########################################################################### + /// Check PTXAS is passed -c flag when offloading to an NVIDIA device using OpenMP. // RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps -no-canonical-prefixes %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-PTXAS %s