diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -102,6 +102,9 @@ LAST, }; +constexpr CudaArch DefaultCudaArch = CudaArch::SM_35; +constexpr CudaArch DefaultHIPArch = CudaArch::GFX803; + static inline bool IsNVIDIAGpuArch(CudaArch A) { return A >= CudaArch::SM_20 && A < CudaArch::GFX600; } diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -327,6 +327,8 @@ "invalid value '%1' in '%0', value must be 'none' or a positive integer">; def err_drv_small_columns : Error< "invalid value '%1' in '%0', value must be '%2' or greater">; +def err_drv_non_relocatable : Error< + "the new driver requires relocatable code, compile with '-fgpu-rdc' enabled">; def err_drv_invalid_malign_branch_EQ : Error< "invalid argument '%0' to -malign-branch=; each element must be one of: %1">; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2512,6 +2512,8 @@ HelpText<"Use the static host OpenMP runtime while linking.">; def fopenmp_new_driver : Flag<["-"], "fopenmp-new-driver">, Flags<[CC1Option]>, Group, HelpText<"Use the new driver for OpenMP offloading.">; +def foffload_new_driver : Flag<["-"], "foffload-new-driver">, Flags<[CC1Option]>, Group, + HelpText<"Use the new driver for offloading.">; def fno_optimize_sibling_calls : Flag<["-"], "fno-optimize-sibling-calls">, Group; def foptimize_sibling_calls : Flag<["-"], "foptimize-sibling-calls">, Group; defm escaping_block_tail_calls : BoolFOption<"escaping-block-tail-calls", diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -456,6 +456,10 @@ } } + // Using the new offloading driver implies redistributable device code. + if (A->getOption().matches(options::OPT_foffload_new_driver)) + DAL->AddFlagArg(A, Opts.getOption(options::OPT_fgpu_rdc)); + // Pick up inputs via the -- option. if (A->getOption().matches(options::OPT__DASH_DASH)) { A->claim(); @@ -4088,6 +4092,101 @@ Args.ClaimAllArgs(options::OPT_cuda_compile_host_device); } +/// Returns the canonical name for the offloading architecture when using HIP or +/// CUDA. +static StringRef getCanonicalArchString(Compilation &C, + llvm::opt::DerivedArgList &Args, + StringRef ArchStr, + Action::OffloadKind Kind) { + if (Kind == Action::OFK_Cuda) { + CudaArch Arch = StringToCudaArch(ArchStr); + if (Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch(Arch)) { + C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr; + return StringRef(); + } + return Args.MakeArgStringRef(CudaArchToString(Arch)); + } else if (Kind == Action::OFK_HIP) { + llvm::StringMap Features; + // getHIPOffloadTargetTriple() is known to return valid value as it has + // been called successfully in the CreateOffloadingDeviceToolChains(). + auto Arch = parseTargetID( + *getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs()), ArchStr, + &Features); + if (!Arch) { + C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << ArchStr; + C.setContainsError(); + return StringRef(); + } + return Args.MakeArgStringRef( + getCanonicalTargetID(Arch.getValue(), Features)); + } + return StringRef(); +} + +/// Checks if the set offloading architectures does not conflict. Returns the +/// incompatible pair if a conflict occurs. +static llvm::Optional> +getConflictOffloadArchCombination(const llvm::DenseSet &Archs, + Action::OffloadKind Kind) { + if (Kind != Action::OFK_HIP) + return None; + + std::set ArchSet; + llvm::copy(Archs, std::inserter(ArchSet, ArchSet.begin())); + return getConflictTargetIDCombination(ArchSet); +} + +/// Returns the set of bound architectures active for this compilation kind. +/// This function returns a set of bound architectures, if there are no bound +/// architctures we return a set containing only the empty string. +static llvm::DenseSet +getOffloadArchs(Compilation &C, llvm::opt::DerivedArgList &Args, + Action::OffloadKind Kind) { + + // If this is OpenMP offloading we don't use a bound architecture. + if (Kind == Action::OFK_OpenMP) + return llvm::DenseSet{StringRef()}; + + // --offload and --offload-arch options are mutually exclusive. + if (Args.hasArgNoClaim(options::OPT_offload_EQ) && + Args.hasArgNoClaim(options::OPT_offload_arch_EQ, + options::OPT_no_offload_arch_EQ)) { + C.getDriver().Diag(diag::err_opt_not_valid_with_opt) + << "--offload" + << (Args.hasArgNoClaim(options::OPT_offload_arch_EQ) + ? "--offload-arch" + : "--no-offload-arch"); + } + + llvm::DenseSet Archs; + for (auto &Arg : Args) { + if (Arg->getOption().matches(options::OPT_offload_arch_EQ)) { + Archs.insert(getCanonicalArchString(C, Args, Arg->getValue(), Kind)); + } else if (Arg->getOption().matches(options::OPT_no_offload_arch_EQ)) { + if (Arg->getValue() == StringRef("all")) + Archs.clear(); + else + Archs.erase(getCanonicalArchString(C, Args, Arg->getValue(), Kind)); + } + } + + if (auto ConflictingArchs = getConflictOffloadArchCombination(Archs, Kind)) { + C.getDriver().Diag(clang::diag::err_drv_bad_offload_arch_combo) + << ConflictingArchs.getValue().first + << ConflictingArchs.getValue().second; + C.setContainsError(); + } + + if (Archs.empty()) { + if (Kind == Action::OFK_Cuda) + Archs.insert(CudaArchToString(DefaultCudaArch)); + else if (Kind == Action::OFK_HIP) + Archs.insert(CudaArchToString(DefaultHIPArch)); + } + + return Archs; +} + Action *Driver::BuildOffloadingActions(Compilation &C, llvm::opt::DerivedArgList &Args, const InputTy &Input, @@ -4100,12 +4199,18 @@ types::ID InputType = Input.first; const Arg *InputArg = Input.second; - const Action::OffloadKind OffloadKinds[] = {Action::OFK_OpenMP}; + const Action::OffloadKind OffloadKinds[] = { + Action::OFK_OpenMP, Action::OFK_Cuda, Action::OFK_HIP}; for (Action::OffloadKind Kind : OffloadKinds) { SmallVector ToolChains; ActionList DeviceActions; + const bool Relocatable = + Kind == Action::OFK_OpenMP || + Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, + /*Default=*/false); + auto TCRange = C.getOffloadToolChains(Kind); for (auto TI = TCRange.first, TE = TCRange.second; TI != TE; ++TI) ToolChains.push_back(TI->second); @@ -4113,7 +4218,18 @@ if (ToolChains.empty()) continue; - for (unsigned I = 0; I < ToolChains.size(); ++I) + if (!Relocatable) { + Diags.Report(diag::err_drv_non_relocatable); + return HostAction; + } + + // Get the product of all bound architectures and toolchains. + SmallVector> TCAndArchs; + for (const ToolChain *TC : ToolChains) + for (StringRef Arch : getOffloadArchs(C, Args, Kind)) + TCAndArchs.push_back(std::make_pair(TC, Arch)); + + for (unsigned I = 0, E = TCAndArchs.size(); I != E; ++I) DeviceActions.push_back(C.MakeAction(*InputArg, InputType)); if (DeviceActions.empty()) @@ -4127,7 +4243,7 @@ break; } - auto TC = ToolChains.begin(); + auto TCAndArch = TCAndArchs.begin(); for (Action *&A : DeviceActions) { A = ConstructPhaseAction(C, Args, Phase, A, Kind); @@ -4135,19 +4251,28 @@ HostAction->setCannotBeCollapsedWithNextDependentAction(); OffloadAction::HostDependence HDep( *HostAction, *C.getSingleOffloadToolChain(), - /*BourdArch=*/nullptr, Action::OFK_OpenMP); + /*BoundArch=*/nullptr, Kind); OffloadAction::DeviceDependences DDep; - DDep.add(*A, **TC, /*BoundArch=*/nullptr, Kind); + DDep.add(*A, *TCAndArch->first, /*BoundArch=*/nullptr, Kind); A = C.MakeAction(HDep, DDep); + ++TCAndArch; + } else if (isa(A) && Kind == Action::OFK_Cuda) { + ActionList FatbinActions; + for (Action *A : {A, A->getInputs()[0]}) { + OffloadAction::DeviceDependences DDep; + DDep.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind); + FatbinActions.emplace_back( + C.MakeAction(DDep, A->getType())); + } + A = C.MakeAction(FatbinActions, types::TY_CUDA_FATBIN); } - ++TC; } } - auto TC = ToolChains.begin(); + auto TCAndArch = TCAndArchs.begin(); for (Action *A : DeviceActions) { - DDeps.add(*A, **TC, /*BoundArch=*/nullptr, Kind); - TC++; + DDeps.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind); + ++TCAndArch; } } @@ -4249,7 +4374,7 @@ return C.MakeAction(Input, Output); } if (isUsingLTO(/* IsOffload */ true) && - TargetDeviceOffloadKind == Action::OFK_OpenMP) { + TargetDeviceOffloadKind != Action::OFK_None) { types::ID Output = Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC; return C.MakeAction(Input, Output); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -71,8 +71,8 @@ if (Args.hasArg(options::OPT_static)) if (const Arg *A = Args.getLastArg(options::OPT_dynamic, options::OPT_mdynamic_no_pic)) - D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args) - << "-static"; + D.Diag(diag::err_drv_argument_not_allowed_with) + << A->getAsString(Args) << "-static"; } // Add backslashes to escape spaces and other backslashes. @@ -157,8 +157,8 @@ /// parameter in reciprocal argument strings. Return false if there is an error /// parsing the refinement step. Otherwise, return true and set the Position /// of the refinement step in the input string. -static bool getRefinementStep(StringRef In, const Driver &D, - const Arg &A, size_t &Position) { +static bool getRefinementStep(StringRef In, const Driver &D, const Arg &A, + size_t &Position) { const char RefinementStepToken = ':'; Position = In.find(RefinementStepToken); if (Position != StringRef::npos) { @@ -510,7 +510,7 @@ } static bool mustUseNonLeafFramePointerForTarget(const llvm::Triple &Triple) { - switch (Triple.getArch()){ + switch (Triple.getArch()) { default: return false; case llvm::Triple::arm: @@ -705,7 +705,7 @@ /// Add a CC1 and CC1AS option to specify the coverage file path prefix map. static void addCoveragePrefixMapArg(const Driver &D, const ArgList &Args, - ArgStringList &CmdArgs) { + ArgStringList &CmdArgs) { for (const Arg *A : Args.filtered(options::OPT_ffile_prefix_map_EQ, options::OPT_fcoverage_prefix_map_EQ)) { StringRef Map = A->getValue(); @@ -801,13 +801,12 @@ CSPGOGenerateArg->getOption().matches(options::OPT_fno_profile_generate)) CSPGOGenerateArg = nullptr; - auto *ProfileGenerateArg = Args.getLastArg( - options::OPT_fprofile_instr_generate, - options::OPT_fprofile_instr_generate_EQ, - options::OPT_fno_profile_instr_generate); - if (ProfileGenerateArg && - ProfileGenerateArg->getOption().matches( - options::OPT_fno_profile_instr_generate)) + auto *ProfileGenerateArg = + Args.getLastArg(options::OPT_fprofile_instr_generate, + options::OPT_fprofile_instr_generate_EQ, + options::OPT_fno_profile_instr_generate); + if (ProfileGenerateArg && ProfileGenerateArg->getOption().matches( + options::OPT_fno_profile_instr_generate)) ProfileGenerateArg = nullptr; if (PGOGenerateArg && ProfileGenerateArg) @@ -1334,8 +1333,8 @@ } if (ThroughHeader.empty()) { - CmdArgs.push_back(Args.MakeArgString( - Twine("-pch-through-hdrstop-") + (YcArg ? "create" : "use"))); + CmdArgs.push_back(Args.MakeArgString(Twine("-pch-through-hdrstop-") + + (YcArg ? "create" : "use"))); } else { CmdArgs.push_back( Args.MakeArgString(Twine("-pch-through-header=") + ThroughHeader)); @@ -1374,8 +1373,8 @@ continue; } else { // Ignore the PCH if not first on command line and emit warning. - D.Diag(diag::warn_drv_pch_not_first_include) << P - << A->getAsString(Args); + D.Diag(diag::warn_drv_pch_not_first_include) + << P << A->getAsString(Args); } } } else if (A->getOption().matches(options::OPT_isystem_after)) { @@ -1552,8 +1551,9 @@ if (Arg *FinalOutput = Args.getLastArg(options::OPT_o)) F = FinalOutput->getValue(); } else { - if (Format != "yaml" && // For YAML, keep the original behavior. - Triple.isOSDarwin() && // Enable this only on darwin, since it's the only platform supporting .dSYM bundles. + if (Format != "yaml" && // For YAML, keep the original behavior. + Triple.isOSDarwin() && // Enable this only on darwin, since it's the + // only platform supporting .dSYM bundles. Output.isFilename()) F = Output.getFilename(); } @@ -1647,7 +1647,7 @@ StringRef(*StrictAlignIter) == "+strict-align") CmdArgs.push_back("-Wunaligned-access"); } -} +} // namespace static void CollectARMPACBTIOptions(const ToolChain &TC, const ArgList &Args, ArgStringList &CmdArgs, bool isAArch64) { @@ -1842,7 +1842,7 @@ CmdArgs.push_back("-target-abi"); CmdArgs.push_back(ABIName); } -} +} // namespace void Clang::AddAArch64TargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { @@ -1884,17 +1884,19 @@ if (Val.endswith("+")) Val = Val.substr(0, Val.size() - 1); else { - bool Invalid = Val.getAsInteger(10, Bits); (void)Invalid; + bool Invalid = Val.getAsInteger(10, Bits); + (void)Invalid; assert(!Invalid && "Failed to parse value"); CmdArgs.push_back( Args.MakeArgString("-mvscale-max=" + llvm::Twine(Bits / 128))); } - bool Invalid = Val.getAsInteger(10, Bits); (void)Invalid; + bool Invalid = Val.getAsInteger(10, Bits); + (void)Invalid; assert(!Invalid && "Failed to parse value"); CmdArgs.push_back( Args.MakeArgString("-mvscale-min=" + llvm::Twine(Bits / 128))); - // Silently drop requests for vector-length agnostic code as it's implied. + // Silently drop requests for vector-length agnostic code as it's implied. } else if (!Val.equals("scalable")) // Handle the unsupported values passed to msve-vector-bits. D.Diag(diag::err_drv_unsupported_option_argument) @@ -2077,8 +2079,8 @@ if (T.isOSBinFormatELF()) { switch (getToolChain().getArch()) { case llvm::Triple::ppc64: { - if ((T.isOSFreeBSD() && T.getOSMajorVersion() >= 13) || - T.isOSOpenBSD() || T.isMusl()) + if ((T.isOSFreeBSD() && T.getOSMajorVersion() >= 13) || T.isOSOpenBSD() || + T.isMusl()) ABIName = "elfv2"; else ABIName = "elfv1"; @@ -2100,9 +2102,9 @@ else if (V == "ibmlongdouble") IEEELongDouble = false; else if (V != "altivec") - // The ppc64 linux abis are all "altivec" abis by default. Accept and ignore - // the option if given as we don't have backend support for any targets - // that don't use the altivec abi. + // The ppc64 linux abis are all "altivec" abis by default. Accept and + // ignore the option if given as we don't have backend support for any + // targets that don't use the altivec abi. ABIName = A->getValue(); } if (IEEELongDouble) @@ -2204,8 +2206,8 @@ void Clang::AddSystemZTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { - bool HasBackchain = Args.hasFlag(options::OPT_mbackchain, - options::OPT_mno_backchain, false); + bool HasBackchain = + Args.hasFlag(options::OPT_mbackchain, options::OPT_mno_backchain, false); bool HasPackedStack = Args.hasFlag(options::OPT_mpacked_stack, options::OPT_mno_packed_stack, false); systemz::FloatABI FloatABI = @@ -2214,7 +2216,7 @@ if (HasBackchain && HasPackedStack && !HasSoftFloat) { const Driver &D = getToolChain().getDriver(); D.Diag(diag::err_drv_unsupported_opt) - << "-mpacked-stack -mbackchain -mhard-float"; + << "-mpacked-stack -mbackchain -mhard-float"; } if (HasBackchain) CmdArgs.push_back("-mbackchain"); @@ -2371,7 +2373,8 @@ void Clang::DumpCompilationDatabase(Compilation &C, StringRef Filename, StringRef Target, const InputInfo &Output, - const InputInfo &Input, const ArgList &Args) const { + const InputInfo &Input, + const ArgList &Args) const { // If this is a dry run, do not create the compilation database file. if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH)) return; @@ -2384,8 +2387,8 @@ auto File = std::make_unique( Filename, EC, llvm::sys::fs::OF_TextWithCRLF); if (EC) { - D.Diag(clang::diag::err_drv_compilationdatabase) << Filename - << EC.message(); + D.Diag(clang::diag::err_drv_compilationdatabase) + << Filename << EC.message(); return; } CompilationDatabase = std::move(File); @@ -2408,7 +2411,7 @@ CDB << ", \"" << escape(Buf) << "\""; } CDB << ", \"" << escape(Input.getFilename()) << "\""; - for (auto &A: Args) { + for (auto &A : Args) { auto &O = A->getOption(); // Skip language selection, which is positional. if (O.getID() == options::OPT_x) @@ -2424,7 +2427,7 @@ // All other arguments are quoted and appended. ArgStringList ASL; A->render(Args, ASL); - for (auto &it: ASL) + for (auto &it : ASL) CDB << ", \"" << escape(it) << "\""; } Buf = "--target="; @@ -2646,26 +2649,26 @@ Value.startswith("-mhwdiv") || Value.startswith("-march")) { // Do nothing, we'll validate it later. } else if (Value == "-defsym") { - if (A->getNumValues() != 2) { - D.Diag(diag::err_drv_defsym_invalid_format) << Value; - break; - } - const char *S = A->getValue(1); - auto Pair = StringRef(S).split('='); - auto Sym = Pair.first; - auto SVal = Pair.second; - - if (Sym.empty() || SVal.empty()) { - D.Diag(diag::err_drv_defsym_invalid_format) << S; - break; - } - int64_t IVal; - if (SVal.getAsInteger(0, IVal)) { - D.Diag(diag::err_drv_defsym_invalid_symval) << SVal; - break; - } - CmdArgs.push_back(Value.data()); - TakeNextArg = true; + if (A->getNumValues() != 2) { + D.Diag(diag::err_drv_defsym_invalid_format) << Value; + break; + } + const char *S = A->getValue(1); + auto Pair = StringRef(S).split('='); + auto Sym = Pair.first; + auto SVal = Pair.second; + + if (Sym.empty() || SVal.empty()) { + D.Diag(diag::err_drv_defsym_invalid_format) << S; + break; + } + int64_t IVal; + if (SVal.getAsInteger(0, IVal)) { + D.Diag(diag::err_drv_defsym_invalid_symval) << SVal; + break; + } + CmdArgs.push_back(Value.data()); + TakeNextArg = true; } else if (Value == "-fdebug-compilation-dir") { CmdArgs.push_back("-fdebug-compilation-dir"); TakeNextArg = true; @@ -2717,7 +2720,7 @@ bool AssociativeMath = false; bool ReciprocalMath = false; bool SignedZeros = true; - bool TrappingMath = false; // Implemented via -ffp-exception-behavior + bool TrappingMath = false; // Implemented via -ffp-exception-behavior bool TrappingMathPresent = false; // Is trapping-math in args, and not // overriden by ffp-exception-behavior? bool RoundingFPMath = false; @@ -2773,10 +2776,9 @@ StringRef Val = A->getValue(); if (OFastEnabled && !Val.equals("fast")) { - // Only -ffp-model=fast is compatible with OFast, ignore. + // Only -ffp-model=fast is compatible with OFast, ignore. D.Diag(clang::diag::warn_drv_overriding_flag_option) - << Args.MakeArgString("-ffp-model=" + Val) - << "-Ofast"; + << Args.MakeArgString("-ffp-model=" + Val) << "-Ofast"; break; } StrictFPModel = false; @@ -2808,35 +2810,65 @@ D.Diag(diag::err_drv_unsupported_option_argument) << A->getOption().getName() << Val; break; - } + } } switch (optID) { // If this isn't an FP option skip the claim below - default: continue; + default: + continue; // Options controlling individual features - case options::OPT_fhonor_infinities: HonorINFs = true; break; - case options::OPT_fno_honor_infinities: HonorINFs = false; break; - case options::OPT_fhonor_nans: HonorNaNs = true; break; - case options::OPT_fno_honor_nans: HonorNaNs = false; break; - case options::OPT_fapprox_func: ApproxFunc = true; break; - case options::OPT_fno_approx_func: ApproxFunc = false; break; - case options::OPT_fmath_errno: MathErrno = true; break; - case options::OPT_fno_math_errno: MathErrno = false; break; - case options::OPT_fassociative_math: AssociativeMath = true; break; - case options::OPT_fno_associative_math: AssociativeMath = false; break; - case options::OPT_freciprocal_math: ReciprocalMath = true; break; - case options::OPT_fno_reciprocal_math: ReciprocalMath = false; break; - case options::OPT_fsigned_zeros: SignedZeros = true; break; - case options::OPT_fno_signed_zeros: SignedZeros = false; break; + case options::OPT_fhonor_infinities: + HonorINFs = true; + break; + case options::OPT_fno_honor_infinities: + HonorINFs = false; + break; + case options::OPT_fhonor_nans: + HonorNaNs = true; + break; + case options::OPT_fno_honor_nans: + HonorNaNs = false; + break; + case options::OPT_fapprox_func: + ApproxFunc = true; + break; + case options::OPT_fno_approx_func: + ApproxFunc = false; + break; + case options::OPT_fmath_errno: + MathErrno = true; + break; + case options::OPT_fno_math_errno: + MathErrno = false; + break; + case options::OPT_fassociative_math: + AssociativeMath = true; + break; + case options::OPT_fno_associative_math: + AssociativeMath = false; + break; + case options::OPT_freciprocal_math: + ReciprocalMath = true; + break; + case options::OPT_fno_reciprocal_math: + ReciprocalMath = false; + break; + case options::OPT_fsigned_zeros: + SignedZeros = true; + break; + case options::OPT_fno_signed_zeros: + SignedZeros = false; + break; case options::OPT_ftrapping_math: if (!TrappingMathPresent && !FPExceptionBehavior.empty() && !FPExceptionBehavior.equals("strict")) // Warn that previous value of option is overridden. D.Diag(clang::diag::warn_drv_overriding_flag_option) - << Args.MakeArgString("-ffp-exception-behavior=" + FPExceptionBehavior) - << "-ftrapping-math"; + << Args.MakeArgString("-ffp-exception-behavior=" + + FPExceptionBehavior) + << "-ftrapping-math"; TrappingMath = true; TrappingMathPresent = true; FPExceptionBehavior = "strict"; @@ -2846,8 +2878,9 @@ !FPExceptionBehavior.equals("ignore")) // Warn that previous value of option is overridden. D.Diag(clang::diag::warn_drv_overriding_flag_option) - << Args.MakeArgString("-ffp-exception-behavior=" + FPExceptionBehavior) - << "-fno-trapping-math"; + << Args.MakeArgString("-ffp-exception-behavior=" + + FPExceptionBehavior) + << "-fno-trapping-math"; TrappingMath = false; TrappingMathPresent = true; FPExceptionBehavior = "ignore"; @@ -2891,7 +2924,7 @@ FPContract = Val; else D.Diag(diag::err_drv_unsupported_option_argument) - << A->getOption().getName() << Val; + << A->getOption().getName() << Val; break; } @@ -2909,8 +2942,9 @@ !FPExceptionBehavior.equals(Val)) // Warn that previous value of option is overridden. D.Diag(clang::diag::warn_drv_overriding_flag_option) - << Args.MakeArgString("-ffp-exception-behavior=" + FPExceptionBehavior) - << Args.MakeArgString("-ffp-exception-behavior=" + Val); + << Args.MakeArgString("-ffp-exception-behavior=" + + FPExceptionBehavior) + << Args.MakeArgString("-ffp-exception-behavior=" + Val); TrappingMath = TrappingMathPresent = false; if (Val.equals("ignore") || Val.equals("maytrap")) FPExceptionBehavior = Val; @@ -3021,9 +3055,10 @@ StrictFPModel = false; FPModel = ""; D.Diag(clang::diag::warn_drv_overriding_flag_option) - << "-ffp-model=strict" << - ((A->getNumValues() == 0) ? A->getSpelling() - : Args.MakeArgString(A->getSpelling() + A->getValue())); + << "-ffp-model=strict" + << ((A->getNumValues() == 0) + ? A->getSpelling() + : Args.MakeArgString(A->getSpelling() + A->getValue())); } } @@ -3087,8 +3122,8 @@ CmdArgs.push_back(Args.MakeArgString("-frounding-math")); if (!FPExceptionBehavior.empty()) - CmdArgs.push_back(Args.MakeArgString("-ffp-exception-behavior=" + - FPExceptionBehavior)); + CmdArgs.push_back( + Args.MakeArgString("-ffp-exception-behavior=" + FPExceptionBehavior)); if (!FPEvalMethod.empty()) CmdArgs.push_back(Args.MakeArgString("-ffp-eval-method=" + FPEvalMethod)); @@ -3110,8 +3145,8 @@ CmdArgs.push_back(Args.MakeArgString("-ffp-contract=fast")); else D.Diag(clang::diag::warn_drv_overriding_flag_option) - << "-ffp-model=fast" - << Args.MakeArgString("-ffp-contract=" + FPContract); + << "-ffp-model=fast" + << Args.MakeArgString("-ffp-contract=" + FPContract); } } @@ -3166,8 +3201,7 @@ CmdArgs.push_back("-analyzer-checker=osx"); CmdArgs.push_back( "-analyzer-checker=security.insecureAPI.decodeValueOfObjCType"); - } - else if (Triple.isOSFuchsia()) + } else if (Triple.isOSFuchsia()) CmdArgs.push_back("-analyzer-checker=fuchsia"); CmdArgs.push_back("-analyzer-checker=deadcode"); @@ -3176,7 +3210,8 @@ CmdArgs.push_back("-analyzer-checker=cplusplus"); if (!Triple.isPS4()) { - CmdArgs.push_back("-analyzer-checker=security.insecureAPI.UncheckedReturn"); + CmdArgs.push_back( + "-analyzer-checker=security.insecureAPI.UncheckedReturn"); CmdArgs.push_back("-analyzer-checker=security.insecureAPI.getpw"); CmdArgs.push_back("-analyzer-checker=security.insecureAPI.gets"); CmdArgs.push_back("-analyzer-checker=security.insecureAPI.mktemp"); @@ -3391,7 +3426,8 @@ } if (!TrivialAutoVarInit.empty()) { - if (TrivialAutoVarInit == "zero" && !Args.hasArg(options::OPT_enable_trivial_var_init_zero)) + if (TrivialAutoVarInit == "zero" && + !Args.hasArg(options::OPT_enable_trivial_var_init_zero)) D.Diag(diag::err_drv_trivial_auto_var_init_zero_disabled); CmdArgs.push_back( Args.MakeArgString("-ftrivial-auto-var-init=" + TrivialAutoVarInit)); @@ -3428,8 +3464,7 @@ options::OPT_cl_mad_enable, options::OPT_cl_no_signed_zeros, options::OPT_cl_fp32_correctly_rounded_divide_sqrt, - options::OPT_cl_uniform_work_group_size - }; + options::OPT_cl_uniform_work_group_size}; if (Arg *A = Args.getLastArg(options::OPT_cl_std_EQ)) { std::string CLStdStr = std::string("-cl-std=") + A->getValue(); @@ -3458,7 +3493,8 @@ options::OPT_ccc_arcmt_migrate)) { ARCMTEnabled = true; switch (A->getOption().getID()) { - default: llvm_unreachable("missed a case"); + default: + llvm_unreachable("missed a case"); case options::OPT_ccc_arcmt_check: CmdArgs.push_back("-arcmt-action=check"); break; @@ -3744,10 +3780,9 @@ static void RenderCharacterOptions(const ArgList &Args, const llvm::Triple &T, ArgStringList &CmdArgs) { // -fsigned-char is default. - if (const Arg *A = Args.getLastArg(options::OPT_fsigned_char, - options::OPT_fno_signed_char, - options::OPT_funsigned_char, - options::OPT_fno_unsigned_char)) { + if (const Arg *A = Args.getLastArg( + options::OPT_fsigned_char, options::OPT_fno_signed_char, + options::OPT_funsigned_char, options::OPT_fno_unsigned_char)) { if (A->getOption().matches(options::OPT_funsigned_char) || A->getOption().matches(options::OPT_fno_signed_char)) { CmdArgs.push_back("-fno-signed-char"); @@ -3840,9 +3875,8 @@ auto *Arg = Args.getLastArg( options::OPT_fobjc_convert_messages_to_runtime_calls, options::OPT_fno_objc_convert_messages_to_runtime_calls); - if (Arg && - Arg->getOption().matches( - options::OPT_fno_objc_convert_messages_to_runtime_calls)) + if (Arg && Arg->getOption().matches( + options::OPT_fno_objc_convert_messages_to_runtime_calls)) CmdArgs.push_back("-fno-objc-convert-messages-to-runtime-calls"); } @@ -4290,7 +4324,8 @@ ? "-gpubnames" : "-ggnu-pubnames"); const auto *SimpleTemplateNamesArg = - Args.getLastArg(options::OPT_gsimple_template_names, options::OPT_gno_simple_template_names, + Args.getLastArg(options::OPT_gsimple_template_names, + options::OPT_gno_simple_template_names, options::OPT_gsimple_template_names_EQ); bool ForwardTemplateParams = DebuggerTuning == llvm::DebuggerKind::SCE; if (SimpleTemplateNamesArg && @@ -4391,6 +4426,7 @@ // one input. bool IsCuda = JA.isOffloading(Action::OFK_Cuda); bool IsCudaDevice = JA.isDeviceOffloading(Action::OFK_Cuda); + bool IsCudaHost = JA.isHostOffloading(Action::OFK_Cuda); bool IsHIP = JA.isOffloading(Action::OFK_HIP); bool IsHIPDevice = JA.isDeviceOffloading(Action::OFK_HIP); bool IsOpenMPDevice = JA.isDeviceOffloading(Action::OFK_OpenMP); @@ -4403,7 +4439,7 @@ // A header module compilation doesn't have a main input file, so invent a // fake one as a placeholder. - const char *ModuleName = [&]{ + const char *ModuleName = [&] { auto *ModuleNameArg = Args.getLastArg(options::OPT_fmodule_name_EQ); return ModuleNameArg ? ModuleNameArg->getValue() : ""; }(); @@ -4414,6 +4450,7 @@ InputInfoList ModuleHeaderInputs; InputInfoList OpenMPHostInputs; + InputInfoList CudaHostInputs; const InputInfo *CudaDeviceInput = nullptr; const InputInfo *OpenMPDeviceInput = nullptr; for (const InputInfo &I : Inputs) { @@ -4428,6 +4465,9 @@ << types::getTypeName(Expected); } ModuleHeaderInputs.push_back(I); + } else if (IsCudaHost && Args.hasArg(options::OPT_fopenmp_new_driver)) { + CudaHostInputs.push_back(I); + CudaDeviceInput = &I; } else if ((IsCuda || IsHIP) && !CudaDeviceInput) { CudaDeviceInput = &I; } else if (IsOpenMPDevice && !OpenMPDeviceInput) { @@ -4533,8 +4573,8 @@ bool Failure = Triple.getArchName().substr(Offset).consumeInteger(10, Version); if (Failure || Version < 7) - D.Diag(diag::err_target_unsupported_arch) << Triple.getArchName() - << TripleStr; + D.Diag(diag::err_target_unsupported_arch) + << Triple.getArchName() << TripleStr; } // Push all default warning arguments that are specific to @@ -4604,9 +4644,8 @@ if (JA.getType() == types::TY_Nothing) CmdArgs.push_back("-fsyntax-only"); else if (JA.getType() == types::TY_ModuleFile) - CmdArgs.push_back(IsHeaderModulePrecompile - ? "-emit-header-module" - : "-emit-module-interface"); + CmdArgs.push_back(IsHeaderModulePrecompile ? "-emit-header-module" + : "-emit-module-interface"); else CmdArgs.push_back("-emit-pch"); } else if (isa(JA)) { @@ -4769,7 +4808,8 @@ // Render ABI arguments switch (TC.getArch()) { - default: break; + default: + break; case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumbeb: @@ -5105,7 +5145,7 @@ } CodeGenOptions::FramePointerKind FPKeepKind = - getFramePointerKind(Args, RawTriple); + getFramePointerKind(Args, RawTriple); const char *FPKeepKindStr = nullptr; switch (FPKeepKind) { case CodeGenOptions::FramePointerKind::None: @@ -5149,12 +5189,10 @@ options::OPT_fno_allow_editor_placeholders, false)) CmdArgs.push_back("-fallow-editor-placeholders"); if (Args.hasFlag(options::OPT_fstrict_vtable_pointers, - options::OPT_fno_strict_vtable_pointers, - false)) + options::OPT_fno_strict_vtable_pointers, false)) CmdArgs.push_back("-fstrict-vtable-pointers"); if (Args.hasFlag(options::OPT_fforce_emit_vtables, - options::OPT_fno_force_emit_vtables, - false)) + options::OPT_fno_force_emit_vtables, false)) CmdArgs.push_back("-fforce-emit-vtables"); if (!Args.hasFlag(options::OPT_foptimize_sibling_calls, options::OPT_fno_optimize_sibling_calls)) @@ -5697,8 +5735,7 @@ /*Joined=*/true); } else ImplyVCPPCVer = true; - } - else if (IsWindowsMSVC) + } else if (IsWindowsMSVC) ImplyVCPPCXXVer = true; Args.AddLastArg(CmdArgs, options::OPT_ftrigraphs, @@ -5806,7 +5843,7 @@ if (const Arg *A = Args.getLastArg(options::OPT_fcf_runtime_abi_EQ)) { static const char *kCFABIs[] = { - "standalone", "objc", "swift", "swift-5.0", "swift-4.2", "swift-4.1", + "standalone", "objc", "swift", "swift-5.0", "swift-4.2", "swift-4.1", }; if (find(kCFABIs, StringRef(A->getValue())) == std::end(kCFABIs)) @@ -5927,13 +5964,13 @@ << A->getAsString(Args) << TripleStr; } - if (Args.hasFlag(options::OPT_fvisibility_inlines_hidden, - options::OPT_fno_visibility_inlines_hidden, false)) + options::OPT_fno_visibility_inlines_hidden, false)) CmdArgs.push_back("-fvisibility-inlines-hidden"); - Args.AddLastArg(CmdArgs, options::OPT_fvisibility_inlines_hidden_static_local_var, - options::OPT_fno_visibility_inlines_hidden_static_local_var); + Args.AddLastArg(CmdArgs, + options::OPT_fvisibility_inlines_hidden_static_local_var, + options::OPT_fno_visibility_inlines_hidden_static_local_var); Args.AddLastArg(CmdArgs, options::OPT_fvisibility_global_new_delete_hidden); Args.AddLastArg(CmdArgs, options::OPT_ftlsmodel_EQ); @@ -6319,8 +6356,8 @@ ToolChain::RTTIMode RTTIMode = TC.getRTTIMode(); - if (KernelOrKext || (types::isCXX(InputType) && - (RTTIMode == ToolChain::RM_Disabled))) + if (KernelOrKext || + (types::isCXX(InputType) && (RTTIMode == ToolChain::RM_Disabled))) CmdArgs.push_back("-fno-rtti"); // -fshort-enums=0 is default for all architectures except Hexagon and z/OS. @@ -6654,16 +6691,16 @@ if (Arg *inputCharset = Args.getLastArg(options::OPT_finput_charset_EQ)) { StringRef value = inputCharset->getValue(); if (!value.equals_insensitive("utf-8")) - D.Diag(diag::err_drv_invalid_value) << inputCharset->getAsString(Args) - << value; + D.Diag(diag::err_drv_invalid_value) + << inputCharset->getAsString(Args) << value; } // -fexec_charset=UTF-8 is default. Reject others if (Arg *execCharset = Args.getLastArg(options::OPT_fexec_charset_EQ)) { StringRef value = execCharset->getValue(); if (!value.equals_insensitive("utf-8")) - D.Diag(diag::err_drv_invalid_value) << execCharset->getAsString(Args) - << value; + D.Diag(diag::err_drv_invalid_value) + << execCharset->getAsString(Args) << value; } RenderDiagnosticsOptions(D, Args, CmdArgs); @@ -6827,15 +6864,13 @@ // parser. // -finclude-default-header flag is for preprocessor, // do not pass it to other cc1 commands when save-temps is enabled - if (C.getDriver().isSaveTempsEnabled() && - !isa(JA)) { + if (C.getDriver().isSaveTempsEnabled() && !isa(JA)) { for (auto Arg : Args.filtered(options::OPT_Xclang)) { Arg->claim(); if (StringRef(Arg->getValue()) != "-finclude-default-header") CmdArgs.push_back(Arg->getValue()); } - } - else { + } else { Args.AddAllArgValues(CmdArgs, options::OPT_Xclang); } for (const Arg *A : Args.filtered(options::OPT_mllvm)) { @@ -6911,10 +6946,10 @@ // Host-side cuda compilation receives all device-side outputs in a single // fatbin as Inputs[1]. Include the binary with -fcuda-include-gpubinary. if ((IsCuda || IsHIP) && CudaDeviceInput) { - CmdArgs.push_back("-fcuda-include-gpubinary"); - CmdArgs.push_back(CudaDeviceInput->getFilename()); - if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) - CmdArgs.push_back("-fgpu-rdc"); + CmdArgs.push_back("-fcuda-include-gpubinary"); + CmdArgs.push_back(CudaDeviceInput->getFilename()); + if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) + CmdArgs.push_back("-fgpu-rdc"); } if (IsCuda) { @@ -6970,6 +7005,7 @@ auto OpenMPTCs = C.getOffloadToolChains(); for (auto TI = OpenMPTCs.first, TE = OpenMPTCs.second; TI != TE; ++TI, ++InputFile) { + assert(InputFile->isFilename() && "Offloading requires a filename"); const ToolChain *TC = TI->second; const ArgList &TCArgs = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP); StringRef File = @@ -6982,6 +7018,20 @@ TC->getTripleString() + "." + TCArgs.getLastArgValue(options::OPT_march_EQ) + "." + InputName)); } + } else if (IsCudaHost && !CudaHostInputs.empty()) { + const ToolChain *TC = C.getSingleOffloadToolChain(); + for (const auto &InputFile : CudaHostInputs) { + assert(InputFile.isFilename() && "Offloading requires a filename"); + StringRef File = + C.getArgs().MakeArgString(TC->getInputFilename(InputFile)); + StringRef InputName = Clang::getBaseInputStem(Args, Inputs); + // The CUDA toolchain should have a bound arch appended to the filename. + StringRef Arch = File.rsplit(".").first.rsplit('-').second; + CmdArgs.push_back(Args.MakeArgString( + "-fembed-offload-object=" + File + "," + + Action::GetOffloadKindName(Action::OFK_Cuda) + "." + + TC->getTripleString() + "." + Arch + "." + InputName)); + } } if (Triple.isAMDGPU()) { @@ -7093,14 +7143,14 @@ } if (Args.hasArg(options::OPT_forder_file_instrumentation)) { - CmdArgs.push_back("-forder-file-instrumentation"); - // Enable order file instrumentation when ThinLTO is not on. When ThinLTO is - // on, we need to pass these flags as linker flags and that will be handled - // outside of the compiler. - if (!IsUsingLTO) { - CmdArgs.push_back("-mllvm"); - CmdArgs.push_back("-enable-order-file-instrumentation"); - } + CmdArgs.push_back("-forder-file-instrumentation"); + // Enable order file instrumentation when ThinLTO is not on. When ThinLTO is + // on, we need to pass these flags as linker flags and that will be handled + // outside of the compiler. + if (!IsUsingLTO) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-enable-order-file-instrumentation"); + } } if (Arg *A = Args.getLastArg(options::OPT_fforce_enable_int128, @@ -7226,8 +7276,8 @@ if (Arg *A = Args.getLastArg(options::OPT_pg)) if (FPKeepKind == CodeGenOptions::FramePointerKind::None && !Args.hasArg(options::OPT_mfentry)) - D.Diag(diag::err_drv_argument_not_allowed_with) << "-fomit-frame-pointer" - << A->getAsString(Args); + D.Diag(diag::err_drv_argument_not_allowed_with) + << "-fomit-frame-pointer" << A->getAsString(Args); // Claim some arguments which clang supports automatically. @@ -7280,7 +7330,7 @@ !getToolChain().getTriple().isOSBinFormatCOFF()) { getToolChain().getDriver().Diag( diag::err_drv_gnustep_objc_runtime_incompatible_binary) - << runtime.getVersion().getMajor(); + << runtime.getVersion().getMajor(); } runtimeArg->render(args, cmdArgs); @@ -7579,11 +7629,10 @@ if (VolatileOptionID == options::OPT__SLASH_volatile_ms) CmdArgs.push_back("-fms-volatile"); - if (Args.hasFlag(options::OPT__SLASH_Zc_dllexportInlines_, - options::OPT__SLASH_Zc_dllexportInlines, - false)) { - CmdArgs.push_back("-fno-dllexport-inlines"); - } + if (Args.hasFlag(options::OPT__SLASH_Zc_dllexportInlines_, + options::OPT__SLASH_Zc_dllexportInlines, false)) { + CmdArgs.push_back("-fno-dllexport-inlines"); + } Arg *MostGeneralArg = Args.getLastArg(options::OPT__SLASH_vmg); Arg *BestCaseArg = Args.getLastArg(options::OPT__SLASH_vmb); @@ -7735,7 +7784,7 @@ } void ClangAs::AddRISCVTargetArgs(const ArgList &Args, - ArgStringList &CmdArgs) const { + ArgStringList &CmdArgs) const { const llvm::Triple &Triple = getToolChain().getTriple(); StringRef ABIName = riscv::getRISCVABI(Args, Triple); @@ -7854,7 +7903,6 @@ renderDwarfFormat(D, Triple, Args, CmdArgs, DwarfVersion); RenderDebugInfoCompressionArgs(Args, CmdArgs, D, getToolChain()); - // Handle -fPIC et al -- the relocation-model affects the assembler // for some targets. llvm::Reloc::Model RelocationModel; @@ -7916,8 +7964,8 @@ // only, not C/C++. if (Args.hasFlag(options::OPT_mdefault_build_attributes, options::OPT_mno_default_build_attributes, true)) { - CmdArgs.push_back("-mllvm"); - CmdArgs.push_back("-arm-add-build-attributes"); + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-arm-add-build-attributes"); } break; @@ -8234,14 +8282,17 @@ ArgStringList CmdArgs; // Pass the CUDA path to the linker wrapper tool. - for (auto &I : llvm::make_range(OpenMPTCRange.first, OpenMPTCRange.second)) { - const ToolChain *TC = I.second; - if (TC->getTriple().isNVPTX()) { - CudaInstallationDetector CudaInstallation(D, TheTriple, Args); - if (CudaInstallation.isValid()) - CmdArgs.push_back(Args.MakeArgString( - "--cuda-path=" + CudaInstallation.getInstallPath())); - break; + for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_OpenMP}) { + auto TCRange = C.getOffloadToolChains(Kind); + for (auto &I : llvm::make_range(TCRange.first, TCRange.second)) { + const ToolChain *TC = I.second; + if (TC->getTriple().isNVPTX()) { + CudaInstallationDetector CudaInstallation(D, TheTriple, Args); + if (CudaInstallation.isValid()) + CmdArgs.push_back(Args.MakeArgString( + "--cuda-path=" + CudaInstallation.getInstallPath())); + break; + } } } diff --git a/clang/test/Driver/cuda-openmp-driver.cu b/clang/test/Driver/cuda-openmp-driver.cu new file mode 100644 --- /dev/null +++ b/clang/test/Driver/cuda-openmp-driver.cu @@ -0,0 +1,16 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: nvptx-registered-target + +// RUN: %clang -### -target x86_64-linux-gnu -nocudalib -ccc-print-bindings -fgpu-rdc \ +// RUN: -foffload-new-driver --offload-arch=sm_35 --offload-arch=sm_70 %s 2>&1 \ +// RUN: | FileCheck -check-prefix CHECK %s + +// CHECK: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX_SM_35:.+]]" +// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_35]]"], output: "[[CUBIN_SM_35:.+]]" +// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_35]]", "[[PTX_SM_35]]"], output: "[[FATBIN_SM_35:.+]]" +// CHECK: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[PTX_SM_70:.+]]" +// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_70:.+]]"], output: "[[CUBIN_SM_70:.+]]" +// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_70]]", "[[PTX_SM_70:.+]]"], output: "[[FATBIN_SM_70:.+]]" +// CHECK: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[FATBIN_SM_35]]", "[[FATBIN_SM_70]]"], output: "[[HOST_OBJ:.+]]" +// CHECK: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"