diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -5937,6 +5937,9 @@ case llvm::Triple::Solaris: TC = std::make_unique(*this, Target, Args); break; + case llvm::Triple::CUDA: + TC = std::make_unique(*this, Target, Args); + break; case llvm::Triple::AMDHSA: TC = std::make_unique(*this, Target, Args); break; @@ -6056,11 +6059,6 @@ } } - // Intentionally omitted from the switch above: llvm::Triple::CUDA. CUDA - // compiles always need two toolchains, the CUDA toolchain and the host - // toolchain. So the only valid way to create a CUDA toolchain is via - // CreateOffloadingDeviceToolChains. - return *TC; } diff --git a/clang/lib/Driver/ToolChains/Cuda.h b/clang/lib/Driver/ToolChains/Cuda.h --- a/clang/lib/Driver/ToolChains/Cuda.h +++ b/clang/lib/Driver/ToolChains/Cuda.h @@ -98,9 +98,22 @@ // Runs fatbinary, which combines GPU object files ("cubin" files) and/or PTX // assembly into a single output file. +class LLVM_LIBRARY_VISIBILITY FatBinary : public Tool { + public: + FatBinary(const ToolChain &TC) : Tool("NVPTX::Linker", "fatbinary", TC) {} + + bool hasIntegratedCPP() const override { return false; } + + void ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, const InputInfoList &Inputs, + const llvm::opt::ArgList &TCArgs, + const char *LinkingOutput) const override; +}; + +// Runs nvlink, which links GPU object files ("cubin" files) into a single file. class LLVM_LIBRARY_VISIBILITY Linker : public Tool { public: - Linker(const ToolChain &TC) : Tool("NVPTX::Linker", "fatbinary", TC) {} + Linker(const ToolChain &TC) : Tool("NVPTX::Linker", "nvlink", TC) {} bool hasIntegratedCPP() const override { return false; } @@ -119,73 +132,95 @@ namespace toolchains { -class LLVM_LIBRARY_VISIBILITY CudaToolChain : public ToolChain { -public: - CudaToolChain(const Driver &D, const llvm::Triple &Triple, - const ToolChain &HostTC, const llvm::opt::ArgList &Args); +class LLVM_LIBRARY_VISIBILITY NVPTXToolChain : public ToolChain { + public: + NVPTXToolChain(const Driver &D, const llvm::Triple &Triple, + const llvm::Triple &HostTriple, + const llvm::opt::ArgList &Args); + + NVPTXToolChain(const Driver &D, const llvm::Triple &Triple, + const llvm::opt::ArgList &Args); + + llvm::opt::DerivedArgList * + TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, + Action::OffloadKind DeviceOffloadKind) const override; + + // Never try to use the integrated assembler with NVPTX; always fork out to + // ptxas. + bool useIntegratedAs() const override { return false; } + bool isCrossCompiling() const override { return true; } + bool isPICDefault() const override { return false; } + bool isPIEDefault(const llvm::opt::ArgList &Args) const override { + return false; + } + bool isPICDefaultForced() const override { return false; } + bool SupportsProfiling() const override { return false; } + + bool IsMathErrnoDefault() const override { return false; } - const llvm::Triple *getAuxTriple() const override { + bool supportsDebugInfoOption(const llvm::opt::Arg *A) const override; + void adjustDebugInfoKind(codegenoptions::DebugInfoKind &DebugInfoKind, + const llvm::opt::ArgList &Args) const override; + + // NVPTX supports only DWARF2. + unsigned GetDefaultDwarfVersion() const override { return 2; } + unsigned getMaxDwarfVersion() const override { return 2; } + + CudaInstallationDetector CudaInstallation; + + protected: + Tool *buildAssembler() const override; // ptxas. + Tool *buildLinker() const override; // nvlink. +}; + +class LLVM_LIBRARY_VISIBILITY CudaToolChain : public NVPTXToolChain { + public: + CudaToolChain(const Driver &D, const llvm::Triple &Triple, + const ToolChain &HostTC, const llvm::opt::ArgList &Args); + + const llvm::Triple *getAuxTriple() const override { return &HostTC.getTriple(); - } + } - std::string getInputFilename(const InputInfo &Input) const override; - - llvm::opt::DerivedArgList * - TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, - Action::OffloadKind DeviceOffloadKind) const override; - void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, - Action::OffloadKind DeviceOffloadKind) const override; - - llvm::DenormalMode getDefaultDenormalModeForType( - const llvm::opt::ArgList &DriverArgs, const JobAction &JA, - const llvm::fltSemantics *FPType = nullptr) const override; - - // Never try to use the integrated assembler with CUDA; always fork out to - // ptxas. - bool useIntegratedAs() const override { return false; } - bool isCrossCompiling() const override { return true; } - bool isPICDefault() const override { return false; } - bool isPIEDefault(const llvm::opt::ArgList &Args) const override { - return false; - } - bool isPICDefaultForced() const override { return false; } - bool SupportsProfiling() const override { return false; } - bool supportsDebugInfoOption(const llvm::opt::Arg *A) const override; - void adjustDebugInfoKind(codegenoptions::DebugInfoKind &DebugInfoKind, - const llvm::opt::ArgList &Args) const override; - bool IsMathErrnoDefault() const override { return false; } + llvm::opt::DerivedArgList * + TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, + Action::OffloadKind DeviceOffloadKind) const override; + void + addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args, + Action::OffloadKind DeviceOffloadKind) const override; - void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args) const override; + llvm::DenormalMode getDefaultDenormalModeForType( + const llvm::opt::ArgList &DriverArgs, const JobAction &JA, + const llvm::fltSemantics *FPType = nullptr) const override; - void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const override; - CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override; - void - AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args) const override; - void AddClangCXXStdlibIncludeArgs( - const llvm::opt::ArgList &Args, - llvm::opt::ArgStringList &CC1Args) const override; - void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs, + void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; - SanitizerMask getSupportedSanitizers() const override; + void + addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const override; + CXXStdlibType + GetCXXStdlibType(const llvm::opt::ArgList &Args) const override; + void + AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const override; + void AddClangCXXStdlibIncludeArgs( + const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CC1Args) const override; + void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const override; - VersionTuple - computeMSVCVersion(const Driver *D, - const llvm::opt::ArgList &Args) const override; + SanitizerMask getSupportedSanitizers() const override; - unsigned GetDefaultDwarfVersion() const override { return 2; } - // NVPTX supports only DWARF2. - unsigned getMaxDwarfVersion() const override { return 2; } + VersionTuple + computeMSVCVersion(const Driver *D, + const llvm::opt::ArgList &Args) const override; - const ToolChain &HostTC; - CudaInstallationDetector CudaInstallation; + const ToolChain &HostTC; -protected: - Tool *buildAssembler() const override; // ptxas - Tool *buildLinker() const override; // fatbinary (ok, not really a linker) + protected: + Tool *buildAssembler() const override; // ptxas + Tool *buildLinker() const override; // fatbinary (ok, not really a linker) }; } // end namespace toolchains diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -380,18 +380,20 @@ const ArgList &Args, const char *LinkingOutput) const { const auto &TC = - static_cast(getToolChain()); + static_cast(getToolChain()); assert(TC.getTriple().isNVPTX() && "Wrong platform"); StringRef GPUArchName; - // If this is an OpenMP action we need to extract the device architecture - // from the -march=arch option. This option may come from -Xopenmp-target - // flag or the default value. - if (JA.isDeviceOffloading(Action::OFK_OpenMP)) { + // If this is a CUDA action we need to extract the device architecture + // from the Job's associated architecture, otherwise use the -march=arch + // option. This option may come from -Xopenmp-target flag or the default + // value. + if (JA.isDeviceOffloading(Action::OFK_Cuda)) { + GPUArchName = JA.getOffloadingArch(); + } else { GPUArchName = Args.getLastArgValue(options::OPT_march_EQ); assert(!GPUArchName.empty() && "Must have an architecture passed in."); - } else - GPUArchName = JA.getOffloadingArch(); + } // Obtain architecture from the action. CudaArch gpu_arch = StringToCudaArch(GPUArchName); @@ -462,13 +464,14 @@ for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas)) CmdArgs.push_back(Args.MakeArgString(A)); - bool Relocatable = false; + bool Relocatable = true; if (JA.isOffloading(Action::OFK_OpenMP)) // In OpenMP we need to generate relocatable code. Relocatable = Args.hasFlag(options::OPT_fopenmp_relocatable_target, options::OPT_fnoopenmp_relocatable_target, /*Default=*/true); else if (JA.isOffloading(Action::OFK_Cuda)) + // In CUDA we generate non-relocatable code by default. Relocatable = Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, /*Default=*/false); @@ -506,11 +509,11 @@ // All inputs to this linker must be from CudaDeviceActions, as we need to look // at the Inputs' Actions in order to figure out which GPU architecture they // correspond to. -void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { +void NVPTX::FatBinary::ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, + const InputInfoList &Inputs, + const ArgList &Args, + const char *LinkingOutput) const { const auto &TC = static_cast(getToolChain()); assert(TC.getTriple().isNVPTX() && "Wrong platform"); @@ -557,6 +560,99 @@ Exec, CmdArgs, Inputs, Output)); } +void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, + const InputInfoList &Inputs, + const ArgList &Args, + const char *LinkingOutput) const { + const auto &TC = + static_cast(getToolChain()); + assert(TC.getTriple().isNVPTX() && "Wrong platform"); + + ArgStringList CmdArgs; + if (Output.isFilename()) { + CmdArgs.push_back("-o"); + CmdArgs.push_back(Output.getFilename()); + } else { + assert(Output.isNothing() && "Invalid output."); + } + + if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost) + CmdArgs.push_back("-g"); + + if (Args.hasArg(options::OPT_v)) + CmdArgs.push_back("-v"); + + StringRef GPUArch = Args.getLastArgValue(options::OPT_march_EQ); + assert(!GPUArch.empty() && "At least one GPU Arch required for nvlink."); + + CmdArgs.push_back("-arch"); + CmdArgs.push_back(Args.MakeArgString(GPUArch)); + + // Add paths specified in LIBRARY_PATH environment variable as -L options. + addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH"); + + // Add paths for the default clang library path. + SmallString<256> DefaultLibPath = + llvm::sys::path::parent_path(TC.getDriver().Dir); + llvm::sys::path::append(DefaultLibPath, CLANG_INSTALL_LIBDIR_BASENAME); + CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath)); + + for (const auto &II : Inputs) { + if (II.getType() == types::TY_LLVM_IR || II.getType() == types::TY_LTO_IR || + II.getType() == types::TY_LTO_BC || II.getType() == types::TY_LLVM_BC) { + C.getDriver().Diag(diag::err_drv_no_linker_llvm_support) + << getToolChain().getTripleString(); + continue; + } + + // Currently, we only pass the input files to the linker, we do not pass + // any libraries that may be valid only for the host. + if (!II.isFilename()) + continue; + + // The 'nvlink' application performs RDC-mode linking when given a '.o' + // file and device linking when given a '.cubin' file. We always want to + // perform device linking, so just rename any '.o' files. + auto InputFile = getToolChain().getInputFilename(II); + if (llvm::sys::path::extension(InputFile) != "cubin") { + const char *CubinF = + Args.MakeArgString(getToolChain().getDriver().GetTemporaryPath( + llvm::sys::path::stem(InputFile), "cubin")); + uint64_t Size = 0; + if (std::error_code EC = llvm::sys::fs::file_size(InputFile, Size)) + continue; + + // If the file is empty it is a temporary file created by the Driver, we + // need to create a symbolic link so that the '.cubin' file nvlink + // expects is the same as the '.o' file we will write to later. If we have + // an input file just copy it to a new '.cubin' file. + if (Size == 0) { + if (std::error_code EC = llvm::sys::fs::remove(InputFile)) + continue; + if (std::error_code EC = llvm::sys::fs::create_link(CubinF, InputFile)) + continue; + assert(llvm::sys::fs::is_symlink_file(CubinF) && "Not a symbolic link"); + } else { + if (std::error_code EC = + llvm::sys::fs::copy_file(InputFile, C.addTempFile(CubinF))) + continue; + } + + CmdArgs.push_back(CubinF); + } else { + CmdArgs.push_back(Args.MakeArgString(InputFile)); + } + } + + C.addCommand(std::make_unique( + JA, *this, + ResponseFileSupport{ResponseFileSupport::RF_Full, llvm::sys::WEM_UTF8, + "--options-file"}, + Args.MakeArgString(getToolChain().GetProgramPath("nvlink")), CmdArgs, + Inputs, Output)); +} + void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple, const llvm::opt::ArgList &Args, std::vector &Features) { @@ -599,14 +695,13 @@ Features.push_back(PtxFeature); } -/// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary, -/// which isn't properly a linker but nonetheless performs the step of stitching -/// together object files from the assembler into a single blob. - -CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple, - const ToolChain &HostTC, const ArgList &Args) - : ToolChain(D, Triple, Args), HostTC(HostTC), - CudaInstallation(D, HostTC.getTriple(), Args) { +/// NVPTX toolchain. Our assembler is ptxas, and our linker is nvlink. This +/// operates as a stand-alone version of the NVPTX tools without the host +/// toolchain. +NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple, + const llvm::Triple &HostTriple, + const ArgList &Args) + : ToolChain(D, Triple, Args), CudaInstallation(D, HostTriple, Args) { if (CudaInstallation.isValid()) { CudaInstallation.WarnIfUnsupportedVersion(); getProgramPaths().push_back(std::string(CudaInstallation.getBinPath())); @@ -616,22 +711,72 @@ getProgramPaths().push_back(getDriver().Dir); } -std::string CudaToolChain::getInputFilename(const InputInfo &Input) const { - // Only object files are changed, for example assembly files keep their .s - // extensions. If the user requested device-only compilation don't change it. - if (Input.getType() != types::TY_Object || getDriver().offloadDeviceOnly()) - return ToolChain::getInputFilename(Input); +/// We only need the host triple to locate the CUDA binary utilities, use the +/// system's default triple if not provided. +NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple, + const ArgList &Args) + : NVPTXToolChain(D, Triple, + llvm::Triple(llvm::sys::getDefaultTargetTriple()), Args) {} + +llvm::opt::DerivedArgList * +NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, + StringRef BoundArch, + Action::OffloadKind DeviceOffloadKind) const { + DerivedArgList *DAL = + ToolChain::TranslateArgs(Args, BoundArch, DeviceOffloadKind); + if (!DAL) + DAL = new DerivedArgList(Args.getBaseArgs()); + + const OptTable &Opts = getDriver().getOpts(); + + for (Arg *A : Args) + if (!llvm::is_contained(*DAL, A)) + DAL->append(A); + + if (!DAL->hasArg(options::OPT_march_EQ)) + DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), + CudaArchToString(CudaArch::CudaDefault)); + + return DAL; +} + +bool NVPTXToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const { + const Option &O = A->getOption(); + return (O.matches(options::OPT_gN_Group) && + !O.matches(options::OPT_gmodules)) || + O.matches(options::OPT_g_Flag) || + O.matches(options::OPT_ggdbN_Group) || O.matches(options::OPT_ggdb) || + O.matches(options::OPT_gdwarf) || O.matches(options::OPT_gdwarf_2) || + O.matches(options::OPT_gdwarf_3) || O.matches(options::OPT_gdwarf_4) || + O.matches(options::OPT_gdwarf_5) || + O.matches(options::OPT_gcolumn_info); +} - // Replace extension for object files with cubin because nvlink relies on - // these particular file names. - SmallString<256> Filename(ToolChain::getInputFilename(Input)); - llvm::sys::path::replace_extension(Filename, "cubin"); - return std::string(Filename.str()); +void NVPTXToolChain::adjustDebugInfoKind( + codegenoptions::DebugInfoKind &DebugInfoKind, const ArgList &Args) const { + switch (mustEmitDebugInfo(Args)) { + case DisableDebugInfo: + DebugInfoKind = codegenoptions::NoDebugInfo; + break; + case DebugDirectivesOnly: + DebugInfoKind = codegenoptions::DebugDirectivesOnly; + break; + case EmitSameDebugInfoAsHost: + // Use same debug info level as the host. + break; + } } +/// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary, +/// which isn't properly a linker but nonetheless performs the step of stitching +/// together object files from the assembler into a single blob. + +CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple, + const ToolChain &HostTC, const ArgList &Args) + : NVPTXToolChain(D, Triple, HostTC.getTriple(), Args), HostTC(HostTC) {} + void CudaToolChain::addClangTargetOptions( - const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, + const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadingKind) const { HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind); @@ -708,33 +853,6 @@ return llvm::DenormalMode::getIEEE(); } -bool CudaToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const { - const Option &O = A->getOption(); - return (O.matches(options::OPT_gN_Group) && - !O.matches(options::OPT_gmodules)) || - O.matches(options::OPT_g_Flag) || - O.matches(options::OPT_ggdbN_Group) || O.matches(options::OPT_ggdb) || - O.matches(options::OPT_gdwarf) || O.matches(options::OPT_gdwarf_2) || - O.matches(options::OPT_gdwarf_3) || O.matches(options::OPT_gdwarf_4) || - O.matches(options::OPT_gdwarf_5) || - O.matches(options::OPT_gcolumn_info); -} - -void CudaToolChain::adjustDebugInfoKind( - codegenoptions::DebugInfoKind &DebugInfoKind, const ArgList &Args) const { - switch (mustEmitDebugInfo(Args)) { - case DisableDebugInfo: - DebugInfoKind = codegenoptions::NoDebugInfo; - break; - case DebugDirectivesOnly: - DebugInfoKind = codegenoptions::DebugDirectivesOnly; - break; - case EmitSameDebugInfoAsHost: - // Use same debug info level as the host. - break; - } -} - void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { // Check our CUDA version if we're going to include the CUDA headers. @@ -785,12 +903,20 @@ return DAL; } +Tool *NVPTXToolChain::buildAssembler() const { + return new tools::NVPTX::Assembler(*this); +} + +Tool *NVPTXToolChain::buildLinker() const { + return new tools::NVPTX::Linker(*this); +} + Tool *CudaToolChain::buildAssembler() const { return new tools::NVPTX::Assembler(*this); } Tool *CudaToolChain::buildLinker() const { - return new tools::NVPTX::Linker(*this); + return new tools::NVPTX::FatBinary(*this); } void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const { diff --git a/clang/test/Driver/cuda-cross-compiling.c b/clang/test/Driver/cuda-cross-compiling.c new file mode 100644 --- /dev/null +++ b/clang/test/Driver/cuda-cross-compiling.c @@ -0,0 +1,48 @@ +// Tests the driver when targeting the NVPTX architecture directly without a +// host toolchain to perform CUDA mappings. + +// REQUIRES: nvptx-registered-target + +// +// Test the generated phases when targeting NVPTX. +// +// RUN: %clang -target nvptx64-nvidia-cuda -ccc-print-phases %s 2>&1 \ +// RUN: | FileCheck -check-prefix=PHASES %s + +// PHASES: 0: input, "[[INPUT:.+]]", c +// PHASES-NEXT: 1: preprocessor, {0}, cpp-output +// PHASES-NEXT: 2: compiler, {1}, ir +// PHASES-NEXT: 3: backend, {2}, assembler +// PHASES-NEXT: 4: assembler, {3}, object +// PHASES-NEXT: 5: linker, {4}, image + +// +// Test the generated bindings when targeting NVPTX. +// +// RUN: %clang -target nvptx64-nvidia-cuda -ccc-print-bindings %s 2>&1 \ +// RUN: | FileCheck -check-prefix=BINDINGS %s + +// BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX:.+]].s" +// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX]].s"], output: "[[CUBIN:.+]].o" +// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN]].o"], output: "a.out" + +// +// Test the generated arguments to the CUDA binary utils when targeting NVPTX. +// Ensure that the '.o' files are converted to '.cubin' as well. +// +// RUN: %clang -target nvptx64-nvidia-cuda -march=sm_61 -### %s 2>&1 \ +// RUN: | FileCheck -check-prefix=ARGS %s + +// ARGS: -cc1" "-triple" "nvptx64-nvidia-cuda" "-S" {{.*}} "-target-cpu" "sm_61" "-target-feature" "+ptx{{[0-9]+}}" {{.*}} "-o" "[[PTX:.+]].s" +// ARGS-NEXT: ptxas" "-m64" "-O0" "--gpu-name" "sm_61" "--output-file" "[[CUBIN:.+]].o" "[[PTX]].s" "-c" +// ARGS-NEXT: nvlink" "-o" "a.out" "-arch" "sm_61" {{.*}} "{{.*}}.cubin" + +// +// Test the generated arguments default to a value with no architecture. +// +// RUN: %clang -target nvptx64-nvidia-cuda -### %s 2>&1 \ +// RUN: | FileCheck -check-prefix=DEFAULT %s + +// DEFAULT: -cc1" "-triple" "nvptx64-nvidia-cuda" "-S" {{.*}} "-target-cpu" "sm_35" "-target-feature" "+ptx{{[0-9]+}}" {{.*}} "-o" "[[PTX:.+]].s" +// DEFAULT-NEXT: ptxas" "-m64" "-O0" "--gpu-name" "sm_35" "--output-file" "[[CUBIN:.+]].o" "[[PTX]].s" "-c" +// DEFAULT-NEXT: nvlink" "-o" "a.out" "-arch" "sm_35" {{.*}} "{{.*}}.cubin"