diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -217,8 +217,7 @@ } InputArgList Driver::ParseArgStrings(ArrayRef ArgStrings, - bool IsClCompatMode, - bool &ContainsError) { + bool IsClCompatMode, bool &ContainsError) { llvm::PrettyStackTraceString CrashInfo("Command line argument parsing"); ContainsError = false; @@ -252,9 +251,9 @@ unsigned DiagID; auto ArgString = A->getAsString(Args); std::string Nearest; - if (getOpts().findNearest( - ArgString, Nearest, IncludedFlagsBitmask, - ExcludedFlagsBitmask | options::Unsupported) > 1) { + if (getOpts().findNearest(ArgString, Nearest, IncludedFlagsBitmask, + ExcludedFlagsBitmask | options::Unsupported) > + 1) { DiagID = diag::err_drv_unsupported_opt; Diag(DiagID) << ArgString; } else { @@ -279,8 +278,8 @@ unsigned DiagID; auto ArgString = A->getAsString(Args); std::string Nearest; - if (getOpts().findNearest( - ArgString, Nearest, IncludedFlagsBitmask, ExcludedFlagsBitmask) > 1) { + if (getOpts().findNearest(ArgString, Nearest, IncludedFlagsBitmask, + ExcludedFlagsBitmask) > 1) { DiagID = IsCLMode() ? diag::warn_drv_unknown_argument_clang_cl : diag::err_drv_unknown_argument; Diags.Report(DiagID) << ArgString; @@ -309,15 +308,14 @@ if (CCCIsCPP() || (PhaseArg = DAL.getLastArg(options::OPT_E)) || (PhaseArg = DAL.getLastArg(options::OPT__SLASH_EP)) || (PhaseArg = DAL.getLastArg(options::OPT_M, options::OPT_MM)) || - (PhaseArg = DAL.getLastArg(options::OPT__SLASH_P)) || - CCGenDiagnostics) { + (PhaseArg = DAL.getLastArg(options::OPT__SLASH_P)) || CCGenDiagnostics) { FinalPhase = phases::Preprocess; - // --precompile only runs up to precompilation. + // --precompile only runs up to precompilation. } else if ((PhaseArg = DAL.getLastArg(options::OPT__precompile))) { FinalPhase = phases::Precompile; - // -{fsyntax-only,-analyze,emit-ast} only run up to the compiler. + // -{fsyntax-only,-analyze,emit-ast} only run up to the compiler. } else if ((PhaseArg = DAL.getLastArg(options::OPT_fsyntax_only)) || (PhaseArg = DAL.getLastArg(options::OPT_print_supported_cpus)) || (PhaseArg = DAL.getLastArg(options::OPT_module_file_info)) || @@ -329,18 +327,18 @@ (PhaseArg = DAL.getLastArg(options::OPT_emit_ast))) { FinalPhase = phases::Compile; - // -S only runs up to the backend. + // -S only runs up to the backend. } else if ((PhaseArg = DAL.getLastArg(options::OPT_S))) { FinalPhase = phases::Backend; - // -c compilation only runs up to the assembler. + // -c compilation only runs up to the assembler. } else if ((PhaseArg = DAL.getLastArg(options::OPT_c))) { FinalPhase = phases::Assemble; } else if ((PhaseArg = DAL.getLastArg(options::OPT_emit_interface_stubs))) { FinalPhase = phases::IfsMerge; - // Otherwise do everything. + // Otherwise do everything. } else FinalPhase = phases::Link; @@ -455,8 +453,7 @@ /// /// This routine provides the logic to compute a target triple from various /// args passed to the driver and the default triple string. -static llvm::Triple computeTargetTriple(const Driver &D, - StringRef TargetTriple, +static llvm::Triple computeTargetTriple(const Driver &D, StringRef TargetTriple, const ArgList &Args, StringRef DarwinArchName = "") { // FIXME: Already done in Compilation *Driver::BuildCompilation @@ -567,8 +564,8 @@ // Handle -miamcu flag. if (Args.hasFlag(options::OPT_miamcu, options::OPT_mno_iamcu, false)) { if (Target.get32BitArchVariant().getArch() != llvm::Triple::x86) - D.Diag(diag::err_drv_unsupported_opt_for_target) << "-miamcu" - << Target.str(); + D.Diag(diag::err_drv_unsupported_opt_for_target) + << "-miamcu" << Target.str(); if (A && !A->getOption().matches(options::OPT_m32)) D.Diag(diag::err_drv_argument_not_allowed_with) @@ -973,8 +970,8 @@ // Get architecture name from config file name like 'i386.cfg' or // 'armv7l-clang.cfg'. // Check if command line options changes effective triple. - llvm::Triple EffectiveTriple = computeTargetTriple(*this, - CfgTriple.getTriple(), *CLOptions); + llvm::Triple EffectiveTriple = + computeTargetTriple(*this, CfgTriple.getTriple(), *CLOptions); if (CfgTriple.getArch() != EffectiveTriple.getArch()) { FixedConfigFile = EffectiveTriple.getArchName(); FixedArchPrefixLen = FixedConfigFile.size(); @@ -1052,8 +1049,8 @@ bool HasConfigFile = !ContainsError && (CfgOptions.get() != nullptr); // All arguments, from both config file and command line. - InputArgList Args = std::move(HasConfigFile ? std::move(*CfgOptions) - : std::move(*CLOptions)); + InputArgList Args = + std::move(HasConfigFile ? std::move(*CfgOptions) : std::move(*CLOptions)); // The args for config files or /clang: flags belong to different InputArgList // objects than Args. This copies an Arg from one of those other InputArgLists @@ -1195,14 +1192,13 @@ if (Arg *A = Args.getLastArg(options::OPT_fembed_bitcode_EQ)) { StringRef Name = A->getValue(); unsigned Model = llvm::StringSwitch(Name) - .Case("off", EmbedNone) - .Case("all", EmbedBitcode) - .Case("bitcode", EmbedBitcode) - .Case("marker", EmbedMarker) - .Default(~0U); + .Case("off", EmbedNone) + .Case("all", EmbedBitcode) + .Case("bitcode", EmbedBitcode) + .Case("marker", EmbedMarker) + .Default(~0U); if (Model == ~0U) { - Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) - << Name; + Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name; } else BitcodeEmbed = static_cast(Model); } @@ -1214,8 +1210,8 @@ DerivedArgList *TranslatedArgs = TranslateInputArgs(*UArgs); // Owned by the host. - const ToolChain &TC = getToolChain( - *UArgs, computeTargetTriple(*this, TargetTriple, *UArgs)); + const ToolChain &TC = + getToolChain(*UArgs, computeTargetTriple(*this, TargetTriple, *UArgs)); // The compilation takes ownership of Args. Compilation *C = new Compilation(*this, TC, UArgs.release(), TranslatedArgs, @@ -1315,7 +1311,7 @@ size_t LineEnd = Data.find_first_of("\n", ParentProcPos); if (LineEnd == StringRef::npos) continue; - StringRef ParentProcess = Data.slice(ParentProcPos+15, LineEnd).trim(); + StringRef ParentProcess = Data.slice(ParentProcPos + 15, LineEnd).trim(); int OpenBracket = -1, CloseBracket = -1; for (size_t i = 0, e = ParentProcess.size(); i < e; ++i) { if (ParentProcess[i] == '[') @@ -1328,7 +1324,8 @@ int CrashPID; if (OpenBracket < 0 || CloseBracket < 0 || ParentProcess.slice(OpenBracket + 1, CloseBracket) - .getAsInteger(10, CrashPID) || CrashPID != PID) { + .getAsInteger(10, CrashPID) || + CrashPID != PID) { continue; } @@ -1531,8 +1528,7 @@ CrashDiagDir += "__.crash"; Diag(clang::diag::note_drv_command_failed_diag_msg) << "Crash backtrace is located in"; - Diag(clang::diag::note_drv_command_failed_diag_msg) - << CrashDiagDir.str(); + Diag(clang::diag::note_drv_command_failed_diag_msg) << CrashDiagDir.str(); Diag(clang::diag::note_drv_command_failed_diag_msg) << "(choose the .crash file that corresponds to your crash)"; } @@ -1817,11 +1813,11 @@ if (C.getArgs().hasArg(options::OPT_v)) { if (!SystemConfigDir.empty()) - llvm::errs() << "System configuration file directory: " - << SystemConfigDir << "\n"; + llvm::errs() << "System configuration file directory: " << SystemConfigDir + << "\n"; if (!UserConfigDir.empty()) - llvm::errs() << "User configuration file directory: " - << UserConfigDir << "\n"; + llvm::errs() << "User configuration file directory: " << UserConfigDir + << "\n"; } const ToolChain &TC = C.getDefaultToolChain(); @@ -1895,7 +1891,7 @@ StringRef ProgName = A->getValue(); // Null program name cannot have a path. - if (! ProgName.empty()) + if (!ProgName.empty()) llvm::outs() << GetProgramPath(ProgName, TC); llvm::outs() << "\n"; @@ -2114,7 +2110,7 @@ // Add in arch bindings for every top level action, as well as lipo and // dsymutil steps if needed. - for (Action* Act : SingleActions) { + for (Action *Act : SingleActions) { // Make sure we can lipo this kind of output. If not (and it is an actual // output) then we disallow, since we can't create an output file with the // right name without overwriting it. We could remove this oddity by just @@ -2157,7 +2153,7 @@ // Verify the debug info output. if (Args.hasArg(options::OPT_verify_debug_info)) { - Action* LastAction = Actions.back(); + Action *LastAction = Actions.back(); Actions.pop_back(); Actions.push_back(C.MakeAction( LastAction, types::TY_Nothing)); @@ -2262,7 +2258,7 @@ Args.filtered(options::OPT__SLASH_TC, options::OPT__SLASH_TP)) { if (Previous) { Diag(clang::diag::warn_drv_overriding_flag_option) - << Previous->getSpelling() << A->getSpelling(); + << Previous->getSpelling() << A->getSpelling(); ShowNote = true; } Previous = A; @@ -2312,7 +2308,8 @@ Ty = TC.LookupTypeForExtension(Ext + 1); if (Ty == types::TY_INVALID) { - if (IsCLMode() && (Args.hasArgNoClaim(options::OPT_E) || CCGenDiagnostics)) + if (IsCLMode() && + (Args.hasArgNoClaim(options::OPT_E) || CCGenDiagnostics)) Ty = types::TY_CXX; else if (CCCIsCPP() || CCGenDiagnostics) Ty = types::TY_C; @@ -2494,7 +2491,7 @@ virtual void appendLinkDeviceActions(ActionList &AL) {} /// Append linker host action generated by the builder. - virtual Action* appendLinkHostActions(ActionList &AL) { return nullptr; } + virtual Action *appendLinkHostActions(ActionList &AL) { return nullptr; } /// Append linker actions generated by the builder. virtual void appendLinkDependences(OffloadAction::DeviceDependences &DA) {} @@ -2718,7 +2715,7 @@ return false; Relocatable = Args.hasFlag(options::OPT_fgpu_rdc, - options::OPT_fno_gpu_rdc, /*Default=*/false); + options::OPT_fno_gpu_rdc, /*Default=*/false); const ToolChain *HostTC = C.getSingleOffloadToolChain(); assert(HostTC && "No toolchain for host compilation."); @@ -2740,12 +2737,12 @@ Arg *PartialCompilationArg = Args.getLastArg( options::OPT_cuda_host_only, options::OPT_cuda_device_only, options::OPT_cuda_compile_host_device); - CompileHostOnly = PartialCompilationArg && - PartialCompilationArg->getOption().matches( - options::OPT_cuda_host_only); - CompileDeviceOnly = PartialCompilationArg && - PartialCompilationArg->getOption().matches( - options::OPT_cuda_device_only); + CompileHostOnly = + PartialCompilationArg && PartialCompilationArg->getOption().matches( + options::OPT_cuda_host_only); + CompileDeviceOnly = + PartialCompilationArg && PartialCompilationArg->getOption().matches( + options::OPT_cuda_device_only); EmitLLVM = Args.getLastArg(options::OPT_emit_llvm); EmitAsm = Args.getLastArg(options::OPT_S); FixedCUID = Args.getLastArgValue(options::OPT_cuid_EQ); @@ -3152,10 +3149,10 @@ // Linking all inputs for the current GPU arch. // LI contains all the inputs for the linker. OffloadAction::DeviceDependences DeviceLinkDeps; - DeviceLinkDeps.add(*DeviceLinkAction, *ToolChains[0], - GpuArchList[I], AssociatedOffloadKind); + DeviceLinkDeps.add(*DeviceLinkAction, *ToolChains[0], GpuArchList[I], + AssociatedOffloadKind); AL.push_back(C.MakeAction(DeviceLinkDeps, - DeviceLinkAction->getType())); + DeviceLinkAction->getType())); ++I; } DeviceLinkerInputs.clear(); @@ -3165,14 +3162,15 @@ OffloadAction::DeviceDependences DDeps; auto *TopDeviceLinkAction = C.MakeAction(AL, types::TY_Object); - DDeps.add(*TopDeviceLinkAction, *ToolChains[0], - nullptr, AssociatedOffloadKind); + DDeps.add(*TopDeviceLinkAction, *ToolChains[0], nullptr, + AssociatedOffloadKind); // Offload the host object to the host linker. - AL.push_back(C.MakeAction(DDeps, TopDeviceLinkAction->getType())); + AL.push_back( + C.MakeAction(DDeps, TopDeviceLinkAction->getType())); } - Action* appendLinkHostActions(ActionList &AL) override { return AL.back(); } + Action *appendLinkHostActions(ActionList &AL) override { return AL.back(); } void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {} }; @@ -3313,17 +3311,17 @@ C.MakeAction(LI, types::TY_Image); OffloadAction::DeviceDependences DeviceLinkDeps; DeviceLinkDeps.add(*DeviceLinkAction, **TC, /*BoundArch=*/nullptr, - Action::OFK_OpenMP); + Action::OFK_OpenMP); AL.push_back(C.MakeAction(DeviceLinkDeps, - DeviceLinkAction->getType())); + DeviceLinkAction->getType())); ++TC; } DeviceLinkerInputs.clear(); } - Action* appendLinkHostActions(ActionList &AL) override { - // Create wrapper bitcode from the result of device link actions and compile - // it to an object which will be added to the host link command. + Action *appendLinkHostActions(ActionList &AL) override { + // Create wrapper bitcode from the result of device link actions and + // compile it to an object which will be added to the host link command. auto *BC = C.MakeAction(AL, types::TY_LLVM_BC); auto *ASM = C.MakeAction(BC, types::TY_PP_Asm); return C.MakeAction(ASM, types::TY_Object); @@ -3556,7 +3554,7 @@ return false; } - Action* makeHostLinkAction() { + Action *makeHostLinkAction() { // Build a list of device linking actions. ActionList DeviceAL; for (DeviceActionBuilder *SB : SpecializedBuilders) { @@ -3569,7 +3567,7 @@ return nullptr; // Let builders add host linking actions. - Action* HA = nullptr; + Action *HA = nullptr; for (DeviceActionBuilder *SB : SpecializedBuilders) { if (!SB->isValid()) continue; @@ -4046,7 +4044,7 @@ auto TC = ToolChains.begin(); for (Action *&A : DeviceActions) { - A = ConstructPhaseAction(C, Args, Phase, A); + A = ConstructPhaseAction(C, Args, Phase, A, Action::OFK_OpenMP); if (isa(A)) { HostAction->setCannotBeCollapsedWithNextDependentAction(); @@ -4163,6 +4161,12 @@ Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC; return C.MakeAction(Input, Output); } + if (isUsingLTO(/* IsOffload */ true) && + TargetDeviceOffloadKind == Action::OFK_OpenMP) { + types::ID Output = + Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC; + return C.MakeAction(Input, Output); + } if (Args.hasArg(options::OPT_emit_llvm) || (TargetDeviceOffloadKind == Action::OFK_HIP && Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, @@ -4603,8 +4607,8 @@ continue; } - // This is legal to combine. Append any offload action we found and add the - // current input to preprocessor inputs. + // This is legal to combine. Append any offload action we found and add + // the current input to preprocessor inputs. CollapsedOffloadAction.append(PreprocessJobOffloadActions.begin(), PreprocessJobOffloadActions.end()); NewInputs.append(PJ->input_begin(), PJ->input_end()); @@ -4627,8 +4631,7 @@ /// connected to collapsed actions are updated accordingly. The latter enables /// the caller of the selector to process them afterwards instead of just /// dropping them. If no suitable tool is found, null will be returned. - const Tool *getTool(ActionList &Inputs, - ActionList &CollapsedOffloadAction) { + const Tool *getTool(ActionList &Inputs, ActionList &CollapsedOffloadAction) { // // Get the largest chain of actions that we could combine. // @@ -4671,7 +4674,7 @@ return T; } }; -} +} // namespace /// Return a string that uniquely identifies the result of a job. The bound arch /// is not necessarily represented in the toolchain's triple -- for example, @@ -4809,9 +4812,9 @@ StringRef ArchName = BAA->getArchName(); if (!ArchName.empty()) - TC = &getToolChain(C.getArgs(), - computeTargetTriple(*this, TargetTriple, - C.getArgs(), ArchName)); + TC = &getToolChain( + C.getArgs(), + computeTargetTriple(*this, TargetTriple, C.getArgs(), ArchName)); else TC = &C.getDefaultToolChain(); @@ -4820,7 +4823,6 @@ TargetDeviceOffloadKind); } - ActionList Inputs = A->getInputs(); const JobAction *JA = cast(A); @@ -4984,10 +4986,11 @@ BaseInput = C.getArgs().MakeArgString(std::string(BaseInput) + "-wrapper"); } - Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BoundArch, - AtTopLevel, MultipleArchs, - OffloadingPrefix), - BaseInput); + Result = + InputInfo(A, + GetNamedOutputPath(C, *JA, BaseInput, BoundArch, AtTopLevel, + MultipleArchs, OffloadingPrefix), + BaseInput); } if (CCCPrintBindings && !CCGenDiagnostics) { @@ -5497,15 +5500,15 @@ case llvm::Triple::Linux: case llvm::Triple::ELFIAMCU: if (Target.getArch() == llvm::Triple::hexagon) - TC = std::make_unique(*this, Target, - Args); + TC = + std::make_unique(*this, Target, Args); else if ((Target.getVendor() == llvm::Triple::MipsTechnologies) && !Target.hasEnvironment()) TC = std::make_unique(*this, Target, - Args); + Args); else if (Target.isPPC()) TC = std::make_unique(*this, Target, - Args); + Args); else if (Target.getArch() == llvm::Triple::ve) TC = std::make_unique(*this, Target, Args); @@ -5543,7 +5546,7 @@ break; case llvm::Triple::Itanium: TC = std::make_unique(*this, Target, - Args); + Args); break; case llvm::Triple::MSVC: case llvm::Triple::UnknownEnvironment: @@ -5552,8 +5555,7 @@ TC = std::make_unique( *this, Target, Args); else - TC = - std::make_unique(*this, Target, Args); + TC = std::make_unique(*this, Target, Args); break; } break; @@ -5580,8 +5582,8 @@ TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::hexagon: - TC = std::make_unique(*this, Target, - Args); + TC = + std::make_unique(*this, Target, Args); break; case llvm::Triple::lanai: TC = std::make_unique(*this, Target, Args); @@ -5597,8 +5599,7 @@ TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::msp430: - TC = - std::make_unique(*this, Target, Args); + TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::riscv32: case llvm::Triple::riscv64: @@ -5618,7 +5619,7 @@ default: if (Target.getVendor() == llvm::Triple::Myriad) TC = std::make_unique(*this, Target, - Args); + Args); else if (toolchains::BareMetal::handlesTarget(Target)) TC = std::make_unique(*this, Target, Args); else if (Target.isOSBinFormatELF()) @@ -5687,12 +5688,12 @@ bool Driver::ShouldUseFlangCompiler(const JobAction &JA) const { // Say "no" if there is not exactly one input of a type flang understands. - if (JA.size() != 1 || - !types::isFortran((*JA.input_begin())->getType())) + if (JA.size() != 1 || !types::isFortran((*JA.input_begin())->getType())) return false; // And say "no" if this is not a kind of action flang understands. - if (!isa(JA) && !isa(JA) && !isa(JA)) + if (!isa(JA) && !isa(JA) && + !isa(JA)) return false; return true; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4607,7 +4607,7 @@ if (JA.getType() == types::TY_LLVM_BC) CmdArgs.push_back("-emit-llvm-uselists"); - if (IsUsingLTO) { + if (IsUsingLTO && !Args.hasArg(options::OPT_fopenmp_new_driver)) { // Only AMDGPU supports device-side LTO. if (IsDeviceOffloadAction && !Triple.isAMDGPU()) { D.Diag(diag::err_drv_unsupported_opt_for_target) @@ -8133,6 +8133,39 @@ const char *LinkingOutput) const { ArgStringList CmdArgs; + if (getToolChain().getDriver().isUsingLTO(/* IsOffload */ true)) { + // Pass in target features for each toolchain. + auto OpenMPTCRange = C.getOffloadToolChains(); + for (auto TI = OpenMPTCRange.first, TE = OpenMPTCRange.second; TI != TE; + ++TI) { + const ToolChain *TC = TI->second; + const ArgList &TCArgs = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP); + ArgStringList FeatureArgs; + TC->addClangTargetOptions(TCArgs, FeatureArgs, Action::OFK_OpenMP); + auto FeatureIt = llvm::find(FeatureArgs, "-target-feature"); + CmdArgs.push_back(Args.MakeArgString( + "-target-feature=" + TC->getTripleString() + "=" + *(FeatureIt + 1))); + } + + // Pass in the optimization level to use for LTO. + if (const Arg *A = Args.getLastArg(options::OPT_O_Group)) { + StringRef OOpt; + if (A->getOption().matches(options::OPT_O4) || + A->getOption().matches(options::OPT_Ofast)) + OOpt = "3"; + else if (A->getOption().matches(options::OPT_O)) { + OOpt = A->getValue(); + if (OOpt == "g") + OOpt = "1"; + else if (OOpt == "s" || OOpt == "z") + OOpt = "2"; + } else if (A->getOption().matches(options::OPT_O0)) + OOpt = "0"; + if (!OOpt.empty()) + CmdArgs.push_back(Args.MakeArgString(Twine("-opt-level=O") + OOpt)); + } + } + // Construct the link job so we can wrap around it. Linker->ConstructJob(C, JA, Output, Inputs, Args, LinkingOutput); const auto &LinkCommand = C.getJobs().getJobs().back(); diff --git a/clang/tools/clang-linker-wrapper/CMakeLists.txt b/clang/tools/clang-linker-wrapper/CMakeLists.txt --- a/clang/tools/clang-linker-wrapper/CMakeLists.txt +++ b/clang/tools/clang-linker-wrapper/CMakeLists.txt @@ -1,4 +1,15 @@ -set(LLVM_LINK_COMPONENTS BitWriter Core BinaryFormat IRReader Object Support) +set(LLVM_LINK_COMPONENTS + ${LLVM_TARGETS_TO_BUILD} + BitWriter + Core + BinaryFormat + MC + Passes + IRReader + Object + Support + CodeGen + LTO) if(NOT CLANG_BUILT_STANDALONE) set(tablegen_deps intrinsics_gen) diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -17,9 +17,12 @@ #include "clang/Basic/Version.h" #include "llvm/BinaryFormat/Magic.h" #include "llvm/Bitcode/BitcodeWriter.h" +#include "llvm/CodeGen/CommandFlags.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/Module.h" #include "llvm/IRReader/IRReader.h" +#include "llvm/LTO/LTO.h" #include "llvm/Object/Archive.h" #include "llvm/Object/ArchiveWriter.h" #include "llvm/Object/Binary.h" @@ -36,6 +39,7 @@ #include "llvm/Support/Signals.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/StringSaver.h" +#include "llvm/Support/TargetSelect.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" @@ -58,6 +62,15 @@ cl::desc("Path of linker binary"), cl::cat(ClangLinkerWrapperCategory)); +static cl::opt + TargetFeatures("target-feature", cl::desc("Target features for triple"), + cl::cat(ClangLinkerWrapperCategory)); + +static cl::opt OptLevel("opt-level", + cl::desc("Optimization level for LTO"), + cl::init("O0"), + cl::cat(ClangLinkerWrapperCategory)); + // Do not parse linker options. static cl::list HostLinkerArgs(cl::Sink, cl::desc("...")); @@ -68,6 +81,9 @@ /// Temporary files created by the linker wrapper. static SmallVector TempFiles; +/// Codegen flags for LTO backend. +static codegen::RegisterCodeGenFlags CodeGenFlags; + /// Magic section string that marks the existence of offloading data. The /// section string will be formatted as `.llvm.offloading..`. #define OFFLOAD_SECTION_MAGIC_STR ".llvm.offloading." @@ -195,6 +211,28 @@ if (ToBeStripped.empty()) return None; + // If the object file to strip doesn't exist we need to write it so we can + // pass it to llvm-strip. + SmallString<128> StripFile = Obj.getFileName(); + if (!sys::fs::exists(StripFile)) { + SmallString<128> TempFile; + if (std::error_code EC = sys::fs::createTemporaryFile( + sys::path::stem(StripFile), "o", TempFile)) + return createFileError(TempFile, EC); + TempFiles.push_back(static_cast(TempFile)); + + auto Contents = Obj.getMemoryBufferRef().getBuffer(); + Expected> OutputOrErr = + FileOutputBuffer::create(TempFile, Contents.size()); + if (!OutputOrErr) + return OutputOrErr.takeError(); + std::unique_ptr Output = std::move(*OutputOrErr); + std::copy(Contents.begin(), Contents.end(), Output->getBufferStart()); + if (Error E = Output->commit()) + return E; + StripFile = TempFile; + } + // We will use llvm-strip to remove the now unneeded section containing the // offloading code. ErrorOr StripPath = sys::findProgramByName( @@ -214,7 +252,7 @@ SmallVector StripArgs; StripArgs.push_back(*StripPath); StripArgs.push_back("--no-strip-all"); - StripArgs.push_back(Obj.getFileName()); + StripArgs.push_back(StripFile); for (auto &Section : ToBeStripped) { StripArgs.push_back("--remove-section"); StripArgs.push_back(Section); @@ -419,6 +457,44 @@ // TODO: Move these to a separate file. namespace nvptx { +Expected assemble(StringRef InputFile, Triple TheTriple, + StringRef Arch) { + // NVPTX uses the nvlink binary to link device object files. + ErrorOr PtxasPath = + sys::findProgramByName("ptxas", sys::path::parent_path(LinkerExecutable)); + if (!PtxasPath) + PtxasPath = sys::findProgramByName("ptxas"); + if (!PtxasPath) + return createStringError(PtxasPath.getError(), + "Unable to find 'ptxas' in path"); + + // Create a new file to write the linked device image to. + SmallString<128> TempFile; + if (std::error_code EC = sys::fs::createTemporaryFile( + TheTriple.getArchName() + "-" + Arch, "cubin", TempFile)) + return createFileError(TempFile, EC); + TempFiles.push_back(static_cast(TempFile)); + + // TODO: Pass in arguments like `-g` and `-v` from the driver. + SmallVector CmdArgs; + std::string Opt = "-" + OptLevel; + CmdArgs.push_back(*PtxasPath); + CmdArgs.push_back(TheTriple.isArch64Bit() ? "-m64" : "-m32"); + CmdArgs.push_back("-o"); + CmdArgs.push_back(TempFile); + CmdArgs.push_back(Opt); + CmdArgs.push_back("--gpu-name"); + CmdArgs.push_back(Arch); + CmdArgs.push_back("-c"); + + CmdArgs.push_back(InputFile); + + if (sys::ExecuteAndWait(*PtxasPath, CmdArgs)) + return createStringError(inconvertibleErrorCode(), "'ptxas' failed"); + + return static_cast(TempFile); +} + Expected link(ArrayRef InputFiles, ArrayRef LinkerArgs, Triple TheTriple, StringRef Arch) { @@ -482,6 +558,221 @@ } } +void diagnosticHandler(const DiagnosticInfo &DI) { + std::string ErrStorage; + raw_string_ostream OS(ErrStorage); + DiagnosticPrinterRawOStream DP(OS); + DI.print(DP); + + switch (DI.getSeverity()) { + case DS_Error: + WithColor::error(errs(), LinkerExecutable) << ErrStorage; + break; + case DS_Warning: + WithColor::warning(errs(), LinkerExecutable) << ErrStorage; + break; + case DS_Note: + WithColor::note(errs(), LinkerExecutable) << ErrStorage; + break; + case DS_Remark: + WithColor::remark(errs(), LinkerExecutable) << ErrStorage; + break; + } +} + +// Get the target features passed in from the driver as =. +std::vector getTargetFeatures(const Triple &TheTriple) { + std::vector Features; + auto TargetAndFeatures = StringRef(TargetFeatures).split('='); + if (TargetAndFeatures.first != TheTriple.getTriple()) + return Features; + + for (auto Feature : llvm::split(TargetAndFeatures.second, ',')) + Features.push_back(Feature.str()); + return Features; +} + +CodeGenOpt::Level getCGOptLevel(unsigned OptLevel) { + switch (OptLevel) { + case 0: + return CodeGenOpt::None; + case 1: + return CodeGenOpt::Less; + case 2: + return CodeGenOpt::Default; + case 3: + return CodeGenOpt::Aggressive; + } + llvm_unreachable("Invalid optimization level"); +} + +std::unique_ptr createLTO(const Triple &TheTriple, StringRef Arch, + bool WholeProgram) { + lto::Config Conf; + lto::ThinBackend Backend; + // TODO: Handle index-only thin-LTO + Backend = lto::createInProcessThinBackend( + llvm::heavyweight_hardware_concurrency(1)); + + Conf.CPU = Arch.str(); + Conf.Options = codegen::InitTargetOptionsFromCodeGenFlags(TheTriple); + + Conf.MAttrs = getTargetFeatures(TheTriple); + Conf.CGOptLevel = getCGOptLevel(OptLevel[1] - '0'); + Conf.OptLevel = OptLevel[1] - '0'; + Conf.DefaultTriple = TheTriple.getTriple(); + Conf.DiagHandler = diagnosticHandler; + + Conf.PTO.LoopVectorization = Conf.OptLevel > 1; + Conf.PTO.SLPVectorization = Conf.OptLevel > 1; + + // TODO: Handle outputting bitcode using a module hook. + if (TheTriple.isNVPTX()) + Conf.CGFileType = CGFT_AssemblyFile; + else + Conf.CGFileType = CGFT_ObjectFile; + + // TODO: Handle remark files + Conf.HasWholeProgramVisibility = WholeProgram; + + return std::make_unique(std::move(Conf), Backend); +} + +// Returns true if \p S is valid as a C language identifier and will be given +// `__start_` and `__stop_` symbols. +bool isValidCIdentifier(StringRef S) { + return !S.empty() && (isAlpha(S[0]) || S[0] == '_') && + std::all_of(S.begin() + 1, S.end(), + [](char C) { return C == '_' || isAlnum(C); }); +} + +Expected> linkBitcodeFiles(ArrayRef InputFiles, + const Triple &TheTriple, + StringRef Arch) { + SmallVector, 4> SavedBuffers; + SmallVector, 4> BitcodeFiles; + StringMap UsedInRegularObj; + + // Search for bitcode files in the input and create an LTO input file. If it + // is not a bitcode file, scan its symbol table for symbols we need to + // save. + for (StringRef File : InputFiles) { + ErrorOr> BufferOrErr = + MemoryBuffer::getFileOrSTDIN(File); + if (std::error_code EC = BufferOrErr.getError()) + return createFileError(File, EC); + + file_magic Type = identify_magic((*BufferOrErr)->getBuffer()); + if (Type != file_magic::bitcode) { + Expected> ObjFile = + ObjectFile::createObjectFile(**BufferOrErr, Type); + if (!ObjFile) + return ObjFile.takeError(); + + for (auto &Sym : (*ObjFile)->symbols()) { + Expected Name = Sym.getName(); + if (!Name) + return Name.takeError(); + + UsedInRegularObj[*Name] = true; + } + } else { + Expected> InputFileOrErr = + llvm::lto::InputFile::create(**BufferOrErr); + if (!InputFileOrErr) + return InputFileOrErr.takeError(); + + BitcodeFiles.push_back(std::move(*InputFileOrErr)); + SavedBuffers.push_back(std::move(*BufferOrErr)); + } + } + + if (BitcodeFiles.empty()) + return None; + + // We have visibility of the whole program if every input is bitcode, all + // inputs are statically linked so there should be no external references. + bool WholeProgram = BitcodeFiles.size() == InputFiles.size(); + StringMap PrevailingSymbols; + + // TODO: Run more tests to verify that this is correct. + // Create the LTO instance with the necessary config and add the bitcode files + // to it after resolving symbols. We make a few assumptions about symbol + // resolution. + // 1. The target is going to be a stand-alone executable file. + // 2. We do not support relocatable object files. + // 3. All inputs are relocatable object files extracted from host binaries, so + // there is no resolution to a dynamic library. + auto LTOBackend = createLTO(TheTriple, Arch, WholeProgram); + for (auto &BitcodeFile : BitcodeFiles) { + const auto Symbols = BitcodeFile->symbols(); + SmallVector Resolutions(Symbols.size()); + size_t Idx = 0; + for (auto &Sym : Symbols) { + lto::SymbolResolution &Res = Resolutions[Idx++]; + + // We will use this as the prevailing symbol definition in LTO unless + // it is undefined in the module or another symbol has already been used. + Res.Prevailing = !Sym.isUndefined() && !PrevailingSymbols[Sym.getName()]; + + // We need LTO to preserve symbols referenced in other object files, or + // are needed by the rest of the toolchain. + Res.VisibleToRegularObj = + UsedInRegularObj[Sym.getName()] || + isValidCIdentifier(Sym.getSectionName()) || + (Res.Prevailing && Sym.getName().startswith("__omp")); + + // We do not currently support shared libraries, so no symbols will be + // referenced externally by shared libraries. + Res.ExportDynamic = false; + + // The result will currently always be an executable, so the only time the + // definition will not reside in this link unit is if it's undefined. + Res.FinalDefinitionInLinkageUnit = !Sym.isUndefined(); + + // We do not support linker redefined symbols (e.g. --wrap) for device + // image linking, so the symbols will not be changed after LTO. + Res.LinkerRedefined = false; + + // Mark this symbol as the prevailing one. + PrevailingSymbols[Sym.getName()] |= Res.Prevailing; + } + + // Add the bitcode file with its resolved symbols to the LTO job. + if (Error Err = LTOBackend->add(std::move(BitcodeFile), Resolutions)) + return Err; + } + + // Run the LTO job to compile the bitcode. + size_t MaxTasks = LTOBackend->getMaxTasks(); + std::vector> Files(MaxTasks); + auto AddStream = [&](size_t Task) -> std::unique_ptr { + int FD = -1; + auto &TempFile = Files[Task]; + StringRef Extension = (TheTriple.isNVPTX()) ? "s" : "o"; + if (std::error_code EC = sys::fs::createTemporaryFile( + "lto-" + TheTriple.getTriple(), Extension, FD, TempFile)) + return nullptr; + TempFiles.push_back(static_cast(TempFile)); + return std::make_unique( + std::make_unique(FD, true)); + }; + if (Error Err = LTOBackend->run(AddStream)) + return Err; + + for (auto &File : Files) { + if (!TheTriple.isNVPTX()) + continue; + + auto FileOrErr = nvptx::assemble(File, TheTriple, Arch); + if (!FileOrErr) + return FileOrErr.takeError(); + File = *FileOrErr; + } + + return static_cast(Files.front()); +} + /// Runs the appropriate linking action on all the device files specified in \p /// DeviceFiles. The linked device images are returned in \p LinkedImages. Error linkDeviceFiles(ArrayRef DeviceFiles, @@ -499,6 +790,12 @@ StringRef Arch(TargetFeatures.second); // TODO: Run LTO or bitcode linking before the final link job. + auto ObjectOrErr = + linkBitcodeFiles(LinkerInput.getValue(), TheTriple, Arch); + if (!ObjectOrErr) + return ObjectOrErr.takeError(); + if ((*ObjectOrErr).hasValue()) + LinkerInput.getValue() = {**ObjectOrErr}; auto ImageOrErr = linkDevice(LinkerInput.getValue(), LinkerArgs, TheTriple, Arch); @@ -525,7 +822,7 @@ // Create a new file to write the wrapped bitcode file to. SmallString<128> BitcodeFile; if (std::error_code EC = - sys::fs::createTemporaryFile("offload", "bc", BitcodeFile)) + sys::fs::createTemporaryFile("wrapper", "bc", BitcodeFile)) return createFileError(BitcodeFile, EC); TempFiles.push_back(static_cast(BitcodeFile)); @@ -554,7 +851,7 @@ // Create a new file to write the wrapped bitcode file to. SmallString<128> ObjectFile; if (std::error_code EC = - sys::fs::createTemporaryFile("offload", "o", ObjectFile)) + sys::fs::createTemporaryFile("image", "o", ObjectFile)) return createFileError(BitcodeFile, EC); TempFiles.push_back(static_cast(ObjectFile)); @@ -592,6 +889,8 @@ Optional searchLibraryBaseName(StringRef Name, ArrayRef SearchPaths) { for (StringRef Dir : SearchPaths) { + if (Optional File = findFile(Dir, "lib" + Name + ".so")) + return None; if (Optional File = findFile(Dir, "lib" + Name + ".a")) return File; } @@ -614,6 +913,11 @@ int main(int argc, const char **argv) { InitLLVM X(argc, argv); + InitializeAllTargetInfos(); + InitializeAllTargets(); + InitializeAllTargetMCs(); + InitializeAllAsmParsers(); + InitializeAllAsmPrinters(); LinkerExecutable = argv[0]; sys::PrintStackTraceOnErrorSignal(argv[0]); diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -1464,6 +1464,9 @@ // libraries and other oracles. MPM.addPass(InferFunctionAttrsPass()); + if (Level.getSpeedupLevel() > 1) + MPM.addPass(OpenMPOptPass()); + if (Level.getSpeedupLevel() > 1) { FunctionPassManager EarlyFPM; EarlyFPM.addPass(CallSiteSplittingPass()); @@ -1773,6 +1776,7 @@ MPM.addPass(createModuleToFunctionPassAdaptor(CoroEarlyPass())); CGSCCPassManager CGPM; CGPM.addPass(CoroSplitPass()); + CGPM.addPass(OpenMPOptCGSCCPass()); MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));