Index: include/clang/Basic/DiagnosticDriverKinds.td =================================================================== --- include/clang/Basic/DiagnosticDriverKinds.td +++ include/clang/Basic/DiagnosticDriverKinds.td @@ -69,6 +69,8 @@ "invalid Xarch argument: '%0', options requiring arguments are unsupported">; def err_drv_invalid_Xarch_argument_isdriver : Error< "invalid Xarch argument: '%0', cannot change driver behavior inside Xarch argument">; +def err_drv_Xopenmp_target_missing_triple : Error< + "cannot deduce implicit triple value for -Xopenmp-target, specify triple using -Xopenmp-target=">; def err_drv_argument_only_allowed_with : Error< "invalid argument '%0' only allowed with '%1'">; def err_drv_argument_not_allowed_with : Error< Index: include/clang/Driver/Options.td =================================================================== --- include/clang/Driver/Options.td +++ include/clang/Driver/Options.td @@ -459,6 +459,10 @@ HelpText<"Pass to fatbinary invocation">, MetaVarName<"">; def Xcuda_ptxas : Separate<["-"], "Xcuda-ptxas">, HelpText<"Pass to the ptxas assembler">, MetaVarName<"">; +def Xopenmp_target : Separate<["-"], "Xopenmp-target">, + HelpText<"Pass to the target offloading toolchain.">, MetaVarName<"">; +def Xopenmp_target_EQ : JoinedAndSeparate<["-"], "Xopenmp-target=">, + HelpText<"Pass to the specified target offloading toolchain. The triple that identifies the toolchain must be provided after the equals sign.">, MetaVarName<"">; def z : Separate<["-"], "z">, Flags<[LinkerInput, RenderAsInput]>, HelpText<"Pass -z to the linker">, MetaVarName<"">, Group; Index: lib/Driver/Driver.cpp =================================================================== --- lib/Driver/Driver.cpp +++ lib/Driver/Driver.cpp @@ -525,7 +525,7 @@ auto &CudaTC = ToolChains[CudaTriple.str() + "/" + HostTriple.str()]; if (!CudaTC) { CudaTC = llvm::make_unique( - *this, CudaTriple, *HostTC, C.getInputArgs()); + *this, CudaTriple, *HostTC, C.getInputArgs(), Action::OFK_Cuda); } C.addOffloadDeviceToolChain(CudaTC.get(), Action::OFK_Cuda); } @@ -583,7 +583,7 @@ ToolChains[TT.str() + "/" + HostTC->getTriple().normalize()]; if (!CudaTC) CudaTC = llvm::make_unique( - *this, TT, *HostTC, C.getInputArgs()); + *this, TT, *HostTC, C.getInputArgs(), Action::OFK_OpenMP); TC = CudaTC.get(); } else TC = &getToolChain(C.getInputArgs(), TT); Index: lib/Driver/ToolChains/Clang.cpp =================================================================== --- lib/Driver/ToolChains/Clang.cpp +++ lib/Driver/ToolChains/Clang.cpp @@ -5253,7 +5253,13 @@ for (unsigned I = 0; I < Outputs.size(); ++I) { if (I) UB += ','; - UB += Outputs[I].getFilename(); + SmallString<256> OutputFileName(Outputs[I].getFilename()); + // Change extension of target files for OpenMP offloading + // to NVIDIA GPUs. + if (DepInfo[I].DependentToolChain->getTriple().isNVPTX() && + JA.isOffloading(Action::OFK_OpenMP)) + llvm::sys::path::replace_extension(OutputFileName, "cubin"); + UB += OutputFileName; } CmdArgs.push_back(TCArgs.MakeArgString(UB)); CmdArgs.push_back("-unbundle"); Index: lib/Driver/ToolChains/CommonArgs.h =================================================================== --- lib/Driver/ToolChains/CommonArgs.h +++ lib/Driver/ToolChains/CommonArgs.h @@ -39,6 +39,13 @@ llvm::opt::ArgStringList &CmdArgs, const llvm::opt::ArgList &Args); +void AddOpenMPLinkerScript(const ToolChain &TC, Compilation &C, + const InputInfo &Output, + const InputInfoList &Inputs, + const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs, + const JobAction &JA); + const char *SplitDebugName(const llvm::opt::ArgList &Args, const InputInfo &Input); Index: lib/Driver/ToolChains/CommonArgs.cpp =================================================================== --- lib/Driver/ToolChains/CommonArgs.cpp +++ lib/Driver/ToolChains/CommonArgs.cpp @@ -1023,3 +1023,128 @@ break; } } + +/// Add OpenMP linker script arguments at the end of the argument list so that +/// the fat binary is built by embedding each of the device images into the +/// host. The linker script also defines a few symbols required by the code +/// generation so that the images can be easily retrieved at runtime by the +/// offloading library. This should be used only in tool chains that support +/// linker scripts. +void tools::AddOpenMPLinkerScript(const ToolChain &TC, Compilation &C, + const InputInfo &Output, + const InputInfoList &Inputs, + const ArgList &Args, ArgStringList &CmdArgs, + const JobAction &JA) { + + // If this is not an OpenMP host toolchain, we don't need to do anything. + if (!JA.isHostOffloading(Action::OFK_OpenMP)) + return; + + // Create temporary linker script. Keep it if save-temps is enabled. + const char *LKS; + SmallString<256> Name = llvm::sys::path::filename(Output.getFilename()); + if (C.getDriver().isSaveTempsEnabled()) { + llvm::sys::path::replace_extension(Name, "lk"); + LKS = C.getArgs().MakeArgString(Name.c_str()); + } else { + llvm::sys::path::replace_extension(Name, ""); + Name = C.getDriver().GetTemporaryPath(Name, "lk"); + LKS = C.addTempFile(C.getArgs().MakeArgString(Name.c_str())); + } + + // Add linker script option to the command. + CmdArgs.push_back("-T"); + CmdArgs.push_back(LKS); + + // Create a buffer to write the contents of the linker script. + std::string LksBuffer; + llvm::raw_string_ostream LksStream(LksBuffer); + + // Get the OpenMP offload tool chains so that we can extract the triple + // associated with each device input. + auto OpenMPToolChains = C.getOffloadToolChains(); + assert(OpenMPToolChains.first != OpenMPToolChains.second && + "No OpenMP toolchains??"); + + // Track the input file name and device triple in order to build the script, + // inserting binaries in the designated sections. + SmallVector, 8> InputBinaryInfo; + + // Add commands to embed target binaries. We ensure that each section and + // image is 16-byte aligned. This is not mandatory, but increases the + // likelihood of data to be aligned with a cache block in several main host + // machines. + LksStream << "/*\n"; + LksStream << " OpenMP Offload Linker Script\n"; + LksStream << " *** Automatically generated by Clang ***\n"; + LksStream << "*/\n"; + LksStream << "TARGET(binary)\n"; + auto DTC = OpenMPToolChains.first; + for (auto &II : Inputs) { + const Action *A = II.getAction(); + // Is this a device linking action? + if (A && isa(A) && + A->isDeviceOffloading(Action::OFK_OpenMP)) { + assert(DTC != OpenMPToolChains.second && + "More device inputs than device toolchains??"); + InputBinaryInfo.push_back(std::make_pair( + DTC->second->getTriple().normalize(), II.getFilename())); + ++DTC; + LksStream << "INPUT(" << II.getFilename() << ")\n"; + } + } + + assert(DTC == OpenMPToolChains.second && + "Less device inputs than device toolchains??"); + + LksStream << "SECTIONS\n"; + LksStream << "{\n"; + + // Put each target binary into a separate section. + for (const auto &BI : InputBinaryInfo) { + LksStream << " .omp_offloading." << BI.first << " :\n"; + LksStream << " ALIGN(0x10)\n"; + LksStream << " {\n"; + LksStream << " PROVIDE_HIDDEN(.omp_offloading.img_start." << BI.first + << " = .);\n"; + LksStream << " " << BI.second << "\n"; + LksStream << " PROVIDE_HIDDEN(.omp_offloading.img_end." << BI.first + << " = .);\n"; + LksStream << " }\n"; + } + + // Add commands to define host entries begin and end. We use 1-byte subalign + // so that the linker does not add any padding and the elements in this + // section form an array. + LksStream << " .omp_offloading.entries :\n"; + LksStream << " ALIGN(0x10)\n"; + LksStream << " SUBALIGN(0x01)\n"; + LksStream << " {\n"; + LksStream << " PROVIDE_HIDDEN(.omp_offloading.entries_begin = .);\n"; + LksStream << " *(.omp_offloading.entries)\n"; + LksStream << " PROVIDE_HIDDEN(.omp_offloading.entries_end = .);\n"; + LksStream << " }\n"; + LksStream << "}\n"; + LksStream << "INSERT BEFORE .data\n"; + LksStream.flush(); + + // Dump the contents of the linker script if the user requested that. We + // support this option to enable testing of behavior with -###. + if (C.getArgs().hasArg(options::OPT_fopenmp_dump_offload_linker_script)) + llvm::errs() << LksBuffer; + + // If this is a dry run, do not create the linker script file. + if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH)) + return; + + // Open script file and write the contents. + std::error_code EC; + llvm::raw_fd_ostream Lksf(LKS, EC, llvm::sys::fs::F_None); + + if (EC) { + C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message(); + return; + } + + Lksf << LksBuffer; +} Index: lib/Driver/ToolChains/Cuda.h =================================================================== --- lib/Driver/ToolChains/Cuda.h +++ lib/Driver/ToolChains/Cuda.h @@ -112,6 +112,20 @@ const char *LinkingOutput) const override; }; +class LLVM_LIBRARY_VISIBILITY OpenMPLinker : public Tool { + public: + OpenMPLinker(const ToolChain &TC) + : Tool("NVPTX::OpenMPLinker", "fatbinary", TC, RF_Full, llvm::sys::WEM_UTF8, + "--options-file") {} + + bool hasIntegratedCPP() const override { return false; } + + void ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, const InputInfoList &Inputs, + const llvm::opt::ArgList &TCArgs, + const char *LinkingOutput) const override; +}; + } // end namespace NVPTX } // end namespace tools @@ -120,7 +134,8 @@ class LLVM_LIBRARY_VISIBILITY CudaToolChain : public ToolChain { public: CudaToolChain(const Driver &D, const llvm::Triple &Triple, - const ToolChain &HostTC, const llvm::opt::ArgList &Args); + const ToolChain &HostTC, const llvm::opt::ArgList &Args, + const Action::OffloadKind OK); virtual const llvm::Triple *getAuxTriple() const override { return &HostTC.getTriple(); @@ -169,6 +184,9 @@ protected: Tool *buildAssembler() const override; // ptxas Tool *buildLinker() const override; // fatbinary (ok, not really a linker) + +private: + const Action::OffloadKind OK; }; } // end namespace toolchains Index: lib/Driver/ToolChains/Cuda.cpp =================================================================== --- lib/Driver/ToolChains/Cuda.cpp +++ lib/Driver/ToolChains/Cuda.cpp @@ -9,7 +9,9 @@ #include "Cuda.h" #include "InputInfo.h" +#include "CommonArgs.h" #include "clang/Basic/Cuda.h" +#include "clang/Config/config.h" #include "clang/Basic/VirtualFileSystem.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" @@ -212,8 +214,20 @@ static_cast(getToolChain()); assert(TC.getTriple().isNVPTX() && "Wrong platform"); + StringRef GPUArchName; + std::vector GPUArchNames; + // If this is an OpenMP action we need to extract the device architecture from + // the -fopenmp-target-arch option. + if (JA.isDeviceOffloading(Action::OFK_OpenMP)) { + GPUArchNames = Args.getAllArgValues(options::OPT_march_EQ); + assert(GPUArchNames.size() == 1 && + "Exactly one GPU Arch required for ptxas."); + GPUArchName = GPUArchNames[0]; + } else + GPUArchName = JA.getOffloadingArch(); + // Obtain architecture from the action. - CudaArch gpu_arch = StringToCudaArch(JA.getOffloadingArch()); + CudaArch gpu_arch = StringToCudaArch(GPUArchName); assert(gpu_arch != CudaArch::UNKNOWN && "Device action expected to have an architecture."); @@ -265,7 +279,10 @@ CmdArgs.push_back("--gpu-name"); CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch))); CmdArgs.push_back("--output-file"); - CmdArgs.push_back(Args.MakeArgString(Output.getFilename())); + SmallString<256> OutputFileName(Output.getFilename()); + if (JA.isOffloading(Action::OFK_OpenMP)) + llvm::sys::path::replace_extension(OutputFileName, "cubin"); + CmdArgs.push_back(Args.MakeArgString(OutputFileName)); for (const auto& II : Inputs) CmdArgs.push_back(Args.MakeArgString(II.getFilename())); @@ -324,14 +341,93 @@ C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } +void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, + const InputInfoList &Inputs, + const ArgList &Args, + const char *LinkingOutput) const { + const auto &TC = + static_cast(getToolChain()); + assert(TC.getTriple().isNVPTX() && "Wrong platform"); + + ArgStringList CmdArgs; + + // OpenMP uses nvlink to link cubin files. The result will be embedded in the + // host binary by the host linker. + assert(!JA.isHostOffloading(Action::OFK_OpenMP) && + "CUDA toolchain not expected for an OpenMP host device."); + + if (Output.isFilename()) { + CmdArgs.push_back("-o"); + CmdArgs.push_back(Output.getFilename()); + } else + assert(Output.isNothing() && "Invalid output."); + if (Args.hasArg(options::OPT_g_Flag)) + CmdArgs.push_back("-g"); + + if (Args.hasArg(options::OPT_v)) + CmdArgs.push_back("-v"); + + std::vector GPUArchs = + Args.getAllArgValues(options::OPT_march_EQ); + assert(GPUArchs.size() == 1 && "Exactly one GPU Arch required for ptxas."); + const std::string &GPUArch = GPUArchs[0]; + + CmdArgs.push_back("-arch"); + CmdArgs.push_back(Args.MakeArgString(GPUArch)); + + // Add paths specified in LIBRARY_PATH environment variable as -L options. + addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH"); + + // Add paths for the default clang library path. + SmallString<256> DefaultLibPath = + llvm::sys::path::parent_path(TC.getDriver().Dir); + llvm::sys::path::append(DefaultLibPath, "lib" CLANG_LIBDIR_SUFFIX); + CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath)); + + // Add linking against library implementing OpenMP calls on NVPTX target. + CmdArgs.push_back("-lomptarget-nvptx"); + + for (const auto &II : Inputs) { + if (II.getType() == types::TY_LLVM_IR || + II.getType() == types::TY_LTO_IR || + II.getType() == types::TY_LTO_BC || + II.getType() == types::TY_LLVM_BC) { + C.getDriver().Diag(diag::err_drv_no_linker_llvm_support) + << getToolChain().getTripleString(); + continue; + } + + // Currently, we only pass the input files to the linker, we do not pass + // any libraries that may be valid only for the host. + if (!II.isFilename()) + continue; + + SmallString<256> Name = llvm::sys::path::filename(II.getFilename()); + llvm::sys::path::replace_extension(Name, "cubin"); + + const char *CubinF = + C.addTempFile(C.getArgs().MakeArgString(Name)); + + CmdArgs.push_back(CubinF); + } + + AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA); + + const char *Exec = + Args.MakeArgString(getToolChain().GetProgramPath("nvlink")); + C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); +} + /// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary, /// which isn't properly a linker but nonetheless performs the step of stitching /// together object files from the assembler into a single blob. CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple, - const ToolChain &HostTC, const ArgList &Args) + const ToolChain &HostTC, const ArgList &Args, + const Action::OffloadKind OK) : ToolChain(D, Triple, Args), HostTC(HostTC), - CudaInstallation(D, HostTC.getTriple(), Args) { + CudaInstallation(D, HostTC.getTriple(), Args), OK(OK) { if (CudaInstallation.isValid()) getProgramPaths().push_back(CudaInstallation.getBinPath()); } @@ -392,6 +488,22 @@ CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args); } +void AddMArchOption(DerivedArgList *DAL, + const OptTable &Opts, + StringRef Opt) { + if (Opt.startswith("-march=")) { + StringRef Arch = Opt.split("=").second; + // Check if the arch provided is valid for this toolchain. + // If not valid, ignore it. + if (StringToCudaArch(Arch) != CudaArch::UNKNOWN) { + DAL->eraseArg(options::OPT_march_EQ); + DAL->AddJoinedArg(nullptr, + Opts.getOption(options::OPT_march_EQ), + Arch.str()); + } + } +} + llvm::opt::DerivedArgList * CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, @@ -405,7 +517,7 @@ // For OpenMP device offloading, append derived arguments. Make sure // flags are not duplicated. - // TODO: Append the compute capability. + // Also append the compute capability. if (DeviceOffloadKind == Action::OFK_OpenMP) { for (Arg *A : Args){ bool IsDuplicate = false; @@ -418,6 +530,49 @@ if (!IsDuplicate) DAL->append(A); } + + // Get the compute capability from the -Xopenmp-target flag. + auto OptList = Args.getAllArgValues(options::OPT_Xopenmp_target_EQ); + + // For each OPT_Xopenmp_target_EQ option, the function returns + // two strings, the triple and the option. + // The following format is assumed: + // + // -Xopenmp-target=nvptx64-nvidia-cuda -opt=val + for (unsigned i = 0; i < OptList.size(); i+=2) { + StringRef Opt = OptList[i+1]; + if (OptList[i] == getTripleString()) + AddMArchOption(DAL, Opts, Opt); + } + + OptList = Args.getAllArgValues(options::OPT_Xopenmp_target); + // When there is only one option in the list, the following format + // is assumed: + // + // -Xopenmp-target -opt=val + + // By default, if no triple is explicitely specified, we + // associate -opt=val with the toolchain specified under the + // -fopenmp-targets flag (provided that there is only one such + // toolchain specified). + if (!OptList.empty() && + Args.getAllArgValues( + options::OPT_fopenmp_targets_EQ).size() != 1) + getDriver().Diag(diag::err_drv_Xopenmp_target_missing_triple); + + // Add arch + for (StringRef Opt : OptList) { + AddMArchOption(DAL, Opts, Opt); + } + + auto MArchList = DAL->getAllArgValues(options::OPT_march_EQ); + assert(MArchList.size() < 2 && + "Too many archs under -Xopenmp-targets"); + if (MArchList.empty()) + // Default compute capability for CUDA toolchain is sm_20. + DAL->AddJoinedArg(nullptr, + Opts.getOption(options::OPT_march_EQ), "sm_20"); + return DAL; } @@ -467,6 +622,8 @@ } Tool *CudaToolChain::buildLinker() const { + if (OK == Action::OFK_OpenMP) + return new tools::NVPTX::OpenMPLinker(*this); return new tools::NVPTX::Linker(*this); } Index: lib/Driver/ToolChains/Gnu.cpp =================================================================== --- lib/Driver/ToolChains/Gnu.cpp +++ lib/Driver/ToolChains/Gnu.cpp @@ -203,131 +203,6 @@ // The types are (hopefully) good enough. } -/// Add OpenMP linker script arguments at the end of the argument list so that -/// the fat binary is built by embedding each of the device images into the -/// host. The linker script also defines a few symbols required by the code -/// generation so that the images can be easily retrieved at runtime by the -/// offloading library. This should be used only in tool chains that support -/// linker scripts. -static void AddOpenMPLinkerScript(const ToolChain &TC, Compilation &C, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, ArgStringList &CmdArgs, - const JobAction &JA) { - - // If this is not an OpenMP host toolchain, we don't need to do anything. - if (!JA.isHostOffloading(Action::OFK_OpenMP)) - return; - - // Create temporary linker script. Keep it if save-temps is enabled. - const char *LKS; - SmallString<256> Name = llvm::sys::path::filename(Output.getFilename()); - if (C.getDriver().isSaveTempsEnabled()) { - llvm::sys::path::replace_extension(Name, "lk"); - LKS = C.getArgs().MakeArgString(Name.c_str()); - } else { - llvm::sys::path::replace_extension(Name, ""); - Name = C.getDriver().GetTemporaryPath(Name, "lk"); - LKS = C.addTempFile(C.getArgs().MakeArgString(Name.c_str())); - } - - // Add linker script option to the command. - CmdArgs.push_back("-T"); - CmdArgs.push_back(LKS); - - // Create a buffer to write the contents of the linker script. - std::string LksBuffer; - llvm::raw_string_ostream LksStream(LksBuffer); - - // Get the OpenMP offload tool chains so that we can extract the triple - // associated with each device input. - auto OpenMPToolChains = C.getOffloadToolChains(); - assert(OpenMPToolChains.first != OpenMPToolChains.second && - "No OpenMP toolchains??"); - - // Track the input file name and device triple in order to build the script, - // inserting binaries in the designated sections. - SmallVector, 8> InputBinaryInfo; - - // Add commands to embed target binaries. We ensure that each section and - // image is 16-byte aligned. This is not mandatory, but increases the - // likelihood of data to be aligned with a cache block in several main host - // machines. - LksStream << "/*\n"; - LksStream << " OpenMP Offload Linker Script\n"; - LksStream << " *** Automatically generated by Clang ***\n"; - LksStream << "*/\n"; - LksStream << "TARGET(binary)\n"; - auto DTC = OpenMPToolChains.first; - for (auto &II : Inputs) { - const Action *A = II.getAction(); - // Is this a device linking action? - if (A && isa(A) && - A->isDeviceOffloading(Action::OFK_OpenMP)) { - assert(DTC != OpenMPToolChains.second && - "More device inputs than device toolchains??"); - InputBinaryInfo.push_back(std::make_pair( - DTC->second->getTriple().normalize(), II.getFilename())); - ++DTC; - LksStream << "INPUT(" << II.getFilename() << ")\n"; - } - } - - assert(DTC == OpenMPToolChains.second && - "Less device inputs than device toolchains??"); - - LksStream << "SECTIONS\n"; - LksStream << "{\n"; - - // Put each target binary into a separate section. - for (const auto &BI : InputBinaryInfo) { - LksStream << " .omp_offloading." << BI.first << " :\n"; - LksStream << " ALIGN(0x10)\n"; - LksStream << " {\n"; - LksStream << " PROVIDE_HIDDEN(.omp_offloading.img_start." << BI.first - << " = .);\n"; - LksStream << " " << BI.second << "\n"; - LksStream << " PROVIDE_HIDDEN(.omp_offloading.img_end." << BI.first - << " = .);\n"; - LksStream << " }\n"; - } - - // Add commands to define host entries begin and end. We use 1-byte subalign - // so that the linker does not add any padding and the elements in this - // section form an array. - LksStream << " .omp_offloading.entries :\n"; - LksStream << " ALIGN(0x10)\n"; - LksStream << " SUBALIGN(0x01)\n"; - LksStream << " {\n"; - LksStream << " PROVIDE_HIDDEN(.omp_offloading.entries_begin = .);\n"; - LksStream << " *(.omp_offloading.entries)\n"; - LksStream << " PROVIDE_HIDDEN(.omp_offloading.entries_end = .);\n"; - LksStream << " }\n"; - LksStream << "}\n"; - LksStream << "INSERT BEFORE .data\n"; - LksStream.flush(); - - // Dump the contents of the linker script if the user requested that. We - // support this option to enable testing of behavior with -###. - if (C.getArgs().hasArg(options::OPT_fopenmp_dump_offload_linker_script)) - llvm::errs() << LksBuffer; - - // If this is a dry run, do not create the linker script file. - if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH)) - return; - - // Open script file and write the contents. - std::error_code EC; - llvm::raw_fd_ostream Lksf(LKS, EC, llvm::sys::fs::F_None); - - if (EC) { - C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message(); - return; - } - - Lksf << LksBuffer; -} - static bool addXRayRuntime(const ToolChain &TC, const ArgList &Args, ArgStringList &CmdArgs) { if (Args.hasFlag(options::OPT_fxray_instrument, Index: test/Driver/openmp-offload.c =================================================================== --- test/Driver/openmp-offload.c +++ test/Driver/openmp-offload.c @@ -597,3 +597,76 @@ // RUN: | FileCheck -check-prefix=CHK-FOPENMP-IS-DEVICE %s // CHK-FOPENMP-IS-DEVICE: clang{{.*}} "-aux-triple" "powerpc64le--linux" {{.*}}.c" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" + +/// ########################################################################### + +/// Check -Xopenmp-target=powerpc64le-ibm-linux-gnu -march=pwr8 is passed when compiling for the device. +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu -Xopenmp-target=powerpc64le-ibm-linux-gnu -march=pwr8 %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-FOPENMP-EQ-TARGET %s + +// CHK-FOPENMP-EQ-TARGET: clang{{.*}} argument unused during compilation: '-Xopenmp-target=powerpc64le-ibm-linux-gnu -march=pwr8' + +/// ########################################################################### + +/// Check -Xopenmp-target -march=pwr8 is passed when compiling for the device. +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu -Xopenmp-target -march=pwr8 %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET %s + +// CHK-FOPENMP-TARGET: clang{{.*}} argument unused during compilation: '-Xopenmp-target -march=pwr8' + +/// ########################################################################### + +/// Check -Xopenmp-target triggers error when multiple triples are used. +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu,nvptx64-nvidia-cuda -Xopenmp-target -march=pwr8 %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET-ERROR %s + +// CHK-FOPENMP-TARGET-ERROR: clang{{.*}} error: cannot deduce implicit triple value for -Xopenmp-target, specify triple using -Xopenmp-target= + +/// ########################################################################### + +/// Check -Xopenmp-target uses one of the archs provided when several archs are used. +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_35 -Xopenmp-target -march=sm_60 %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET-ARCHS %s + +// CHK-FOPENMP-TARGET-ARCHS: ptxas{{.*}}" "--gpu-name" "sm_60" +// CHK-FOPENMP-TARGET-ARCHS: nvlink{{.*}}" "-arch" "sm_60" + +/// ########################################################################### + +/// Check -Xopenmp-target -march=pwr8 is ignored in favor of sm_20 (default). +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu,nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=pwr8 %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET-WARNING %s + +// CHK-FOPENMP-TARGET-WARNING: ptxas{{.*}}" "--gpu-name" "sm_20" +// CHK-FOPENMP-TARGET-WARNING: nvlink{{.*}}" "-arch" "sm_20" + +/// ########################################################################### + +/// Check -Xopenmp-target -march=sm_35 works as expected when two triples are present. +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu,nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_35 %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET-COMPILATION %s + +// CHK-FOPENMP-TARGET-COMPILATION: ptxas{{.*}}" "--gpu-name" "sm_35" +// CHK-FOPENMP-TARGET-COMPILATION: nvlink{{.*}}" "-arch" "sm_35" + +/// ########################################################################### + +/// Check cubin file generation and usage by nvlink +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps -no-canonical-prefixes %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-CUBIN %s + +// CHK-CUBIN: clang{{.*}}" "-o" "{{.*}}-openmp-nvptx64-nvidia-cuda.s" +// CHK-CUBIN-NEXT: ptxas{{.*}}" "--output-file" "{{.*}}-openmp-nvptx64-nvidia-cuda.cubin" "{{.*}}-openmp-nvptx64-nvidia-cuda.s" +// CHK-CUBIN-NEXT: nvlink" "-o" "{{.*}}-openmp-nvptx64-nvidia-cuda" {{.*}} "openmp-offload-openmp-nvptx64-nvidia-cuda.cubin" + +/// ########################################################################### + +/// Check cubin file generation and usage by nvlink +// RUN: touch %t1.o +// RUN: touch %t2.o +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps -no-canonical-prefixes %t1.o %t2.o 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN %s + +// CHK-TWOCUBIN: clang-offload-bundler" "-type=o" "{{.*}}inputs={{.*}}tmp1.o" "-outputs={{.*}}.o,{{.*}}tmp1-openmp-nvptx64-nvidia-cuda.cubin" "-unbundle" +// CHK-TWOCUBIN-NEXT: clang-offload-bundler" "-type=o" "{{.*}}inputs={{.*}}tmp2.o" "-outputs={{.*}}.o,{{.*}}tmp2-openmp-nvptx64-nvidia-cuda.cubin" "-unbundle" +// CHK-TWOCUBIN-NEXT: nvlink" "-o" "{{.*}}-openmp-nvptx64-nvidia-cuda" {{.*}} "openmp-offload.c.tmp1-openmp-nvptx64-nvidia-cuda.cubin" "openmp-offload.c.tmp2-openmp-nvptx64-nvidia-cuda.cubin"