diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -8148,11 +8148,24 @@ const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { + const Driver &D = getToolChain().getDriver(); + const llvm::Triple TheTriple = getToolChain().getTriple(); + auto OpenMPTCRange = C.getOffloadToolChains(); ArgStringList CmdArgs; - if (getToolChain().getDriver().isUsingLTO(/* IsOffload */ true)) { + // Pass the CUDA path to the linker wrapper tool. + for (auto &I : llvm::make_range(OpenMPTCRange.first, OpenMPTCRange.second)) { + const ToolChain *TC = I.second; + if (TC->getTriple().isNVPTX()) { + CudaInstallationDetector CudaInstallation(D, TheTriple, Args); + if (CudaInstallation.isValid()) + CmdArgs.push_back(Args.MakeArgString( + "-cuda-path=" + CudaInstallation.getInstallPath())); + } + } + + if (D.isUsingLTO(/* IsOffload */ true)) { // Pass in target features for each toolchain. - auto OpenMPTCRange = C.getOffloadToolChains(); for (auto &I : llvm::make_range(OpenMPTCRange.first, OpenMPTCRange.second)) { const ToolChain *TC = I.second; @@ -8165,9 +8178,10 @@ } // Pass in the bitcode library to be linked during LTO. - for (auto &I : llvm::make_range(OpenMPTCRange.first, OpenMPTCRange.second)) { + for (auto &I : + llvm::make_range(OpenMPTCRange.first, OpenMPTCRange.second)) { const ToolChain *TC = I.second; - const Driver &D = TC->getDriver(); + const Driver &TCDriver = TC->getDriver(); const ArgList &TCArgs = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP); StringRef Arch = TCArgs.getLastArgValue(options::OPT_march_EQ); @@ -8182,7 +8196,7 @@ BitcodeSuffix += Arch; ArgStringList BitcodeLibrary; - addOpenMPDeviceRTL(D, TCArgs, BitcodeLibrary, BitcodeSuffix, + addOpenMPDeviceRTL(TCDriver, TCArgs, BitcodeLibrary, BitcodeSuffix, TC->getTriple()); if (!BitcodeLibrary.empty()) @@ -8210,12 +8224,8 @@ } } - // Construct the link job so we can wrap around it. - Linker->ConstructJob(C, JA, Output, Inputs, Args, LinkingOutput); - const auto &LinkCommand = C.getJobs().getJobs().back(); - CmdArgs.push_back("-host-triple"); - CmdArgs.push_back(Args.MakeArgString(getToolChain().getTripleString())); + CmdArgs.push_back(Args.MakeArgString(TheTriple.getTriple())); if (Args.hasArg(options::OPT_v)) CmdArgs.push_back("-v"); @@ -8246,6 +8256,10 @@ if (Args.getLastArg(options::OPT_save_temps_EQ)) CmdArgs.push_back("-save-temps"); + // Construct the link job so we can wrap around it. + Linker->ConstructJob(C, JA, Output, Inputs, Args, LinkingOutput); + const auto &LinkCommand = C.getJobs().getJobs().back(); + // Add the linker arguments to be forwarded by the wrapper. CmdArgs.push_back("-linker-path"); CmdArgs.push_back(LinkCommand->getExecutable()); diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -99,8 +99,8 @@ static cl::list PtxasArgs("ptxas-args", cl::ZeroOrMore, - cl::desc("Argument to pass to the ptxas invocation"), - cl::cat(ClangLinkerWrapperCategory)); + cl::desc("Argument to pass to the ptxas invocation"), + cl::cat(ClangLinkerWrapperCategory)); static cl::opt Verbose("v", cl::ZeroOrMore, cl::desc("Verbose output from tools"), @@ -118,6 +118,10 @@ cl::desc("Save intermediary results."), cl::cat(ClangLinkerWrapperCategory)); +static cl::opt CudaPath("cuda-path", cl::ZeroOrMore, + cl::desc("Save intermediary results."), + cl::cat(ClangLinkerWrapperCategory)); + // Do not parse linker options. static cl::list HostLinkerArgs(cl::Positional, @@ -129,6 +133,9 @@ /// Filename of the executable being created. static StringRef ExecutableName; +/// Binary path for the CUDA installation. +static std::string CudaBinaryPath; + /// Temporary files created by the linker wrapper. static SmallVector TempFiles; @@ -507,9 +514,9 @@ namespace nvptx { Expected assemble(StringRef InputFile, Triple TheTriple, StringRef Arch) { - // NVPTX uses the nvlink binary to link device object files. + // NVPTX uses the ptxas binary to create device object files. ErrorOr PtxasPath = - sys::findProgramByName("ptxas", sys::path::parent_path(LinkerExecutable)); + sys::findProgramByName("ptxas", {CudaBinaryPath}); if (!PtxasPath) PtxasPath = sys::findProgramByName("ptxas"); if (!PtxasPath) @@ -554,7 +561,10 @@ Expected link(ArrayRef InputFiles, Triple TheTriple, StringRef Arch) { // NVPTX uses the nvlink binary to link device object files. - ErrorOr NvlinkPath = sys::findProgramByName("nvlink"); + ErrorOr NvlinkPath = + sys::findProgramByName("nvlink", {CudaBinaryPath}); + if (!NvlinkPath) + NvlinkPath = sys::findProgramByName("nvlink"); if (!NvlinkPath) return createStringError(NvlinkPath.getError(), "Unable to find 'nvlink' in path"); @@ -1097,6 +1107,9 @@ return EXIT_FAILURE; }; + if (!CudaPath.empty()) + CudaBinaryPath = CudaPath + "/bin"; + ExecutableName = *(llvm::find(HostLinkerArgs, "-o") + 1); SmallVector LinkerArgs; for (const std::string &Arg : HostLinkerArgs)