Index: clang/include/clang/Driver/Action.h =================================================================== --- clang/include/clang/Driver/Action.h +++ clang/include/clang/Driver/Action.h @@ -71,9 +71,10 @@ VerifyPCHJobClass, OffloadBundlingJobClass, OffloadUnbundlingJobClass, + OffloadWrapperJobClass, JobClassFirst = PreprocessJobClass, - JobClassLast = OffloadUnbundlingJobClass + JobClassLast = OffloadWrapperJobClass }; // The offloading kind determines if this action is binded to a particular @@ -613,6 +614,17 @@ } }; +class OffloadWrapperJobAction : public JobAction { + void anchor() override; + +public: + OffloadWrapperJobAction(ActionList &Inputs, types::ID Type); + + static bool classof(const Action *A) { + return A->getKind() == OffloadWrapperJobClass; + } +}; + } // namespace driver } // namespace clang Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -1601,8 +1601,6 @@ Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; def fopenmp_targets_EQ : CommaJoined<["-"], "fopenmp-targets=">, Flags<[DriverOption, CC1Option]>, HelpText<"Specify comma-separated list of triples OpenMP offloading targets to be supported">; -def fopenmp_dump_offload_linker_script : Flag<["-"], "fopenmp-dump-offload-linker-script">, - Group, Flags<[NoArgumentUnused, HelpHidden]>; def fopenmp_relocatable_target : Flag<["-"], "fopenmp-relocatable-target">, Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; def fnoopenmp_relocatable_target : Flag<["-"], "fnoopenmp-relocatable-target">, Index: clang/include/clang/Driver/ToolChain.h =================================================================== --- clang/include/clang/Driver/ToolChain.h +++ clang/include/clang/Driver/ToolChain.h @@ -137,12 +137,14 @@ mutable std::unique_ptr Assemble; mutable std::unique_ptr Link; mutable std::unique_ptr OffloadBundler; + mutable std::unique_ptr OffloadWrapper; Tool *getClang() const; Tool *getAssemble() const; Tool *getLink() const; Tool *getClangAs() const; Tool *getOffloadBundler() const; + Tool *getOffloadWrapper() const; mutable std::unique_ptr SanitizerArguments; mutable std::unique_ptr XRayArguments; Index: clang/lib/Driver/Action.cpp =================================================================== --- clang/lib/Driver/Action.cpp +++ clang/lib/Driver/Action.cpp @@ -40,6 +40,8 @@ return "clang-offload-bundler"; case OffloadUnbundlingJobClass: return "clang-offload-unbundler"; + case OffloadWrapperJobClass: + return "clang-offload-wrapper"; } llvm_unreachable("invalid class"); @@ -401,3 +403,9 @@ OffloadUnbundlingJobAction::OffloadUnbundlingJobAction(Action *Input) : JobAction(OffloadUnbundlingJobClass, Input, Input->getType()) {} + +void OffloadWrapperJobAction::anchor() {} + +OffloadWrapperJobAction::OffloadWrapperJobAction(ActionList &Inputs, + types::ID Type) + : JobAction(OffloadWrapperJobClass, Inputs, Type) {} Index: clang/lib/Driver/Driver.cpp =================================================================== --- clang/lib/Driver/Driver.cpp +++ clang/lib/Driver/Driver.cpp @@ -2284,6 +2284,9 @@ /// Append top level actions generated by the builder. virtual void appendTopLevelActions(ActionList &AL) {} + /// Append linker actions generated by the builder. + virtual void appendLinkActions(ActionList &AL) {} + /// Append linker actions generated by the builder. virtual void appendLinkDependences(OffloadAction::DeviceDependences &DA) {} @@ -2873,7 +2876,7 @@ OpenMPDeviceActions.clear(); } - void appendLinkDependences(OffloadAction::DeviceDependences &DA) override { + void appendLinkActions(ActionList &AL) override { assert(ToolChains.size() == DeviceLinkerInputs.size() && "Toolchains and linker inputs sizes do not match."); @@ -2882,12 +2885,18 @@ for (auto &LI : DeviceLinkerInputs) { auto *DeviceLinkAction = C.MakeAction(LI, types::TY_Image); - DA.add(*DeviceLinkAction, **TC, /*BoundArch=*/nullptr, - Action::OFK_OpenMP); + OffloadAction::DeviceDependences DeviceLinkDeps; + DeviceLinkDeps.add(*DeviceLinkAction, **TC, /*BoundArch=*/nullptr, + Action::OFK_OpenMP); + AL.push_back(C.MakeAction(DeviceLinkDeps, + DeviceLinkAction->getType())); ++TC; } + DeviceLinkerInputs.clear(); } + void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {} + bool initialize() override { // Get the OpenMP toolchains. If we don't get any, the action builder will // know there is nothing to do related to OpenMP offloading. @@ -3111,6 +3120,25 @@ return false; } + Action* makeHostLinkAction() { + // Build a list of device linking actions. + ActionList DeviceAL; + for (DeviceActionBuilder *SB : SpecializedBuilders) { + if (!SB->isValid()) + continue; + SB->appendLinkActions(DeviceAL); + } + + if (DeviceAL.empty()) + return nullptr; + + // Create wrapper bitcode from the result of device link actions and compile + // it to an object which will be added to the host link command. + auto *BC = C.MakeAction(DeviceAL, types::TY_LLVM_BC); + auto *ASM = C.MakeAction(BC, types::TY_PP_Asm); + return C.MakeAction(ASM, types::TY_Object); + } + /// Processes the host linker action. This currently consists of replacing it /// with an offload action if there are device link objects and propagate to /// the host action all the offload kinds used in the current compilation. The @@ -3404,6 +3432,8 @@ // Add a link action if necessary. if (!LinkerInputs.empty()) { + if (Action *Wrapper = OffloadBuilder.makeHostLinkAction()) + LinkerInputs.push_back(Wrapper); Action *LA = C.MakeAction(LinkerInputs, types::TY_Image); LA = OffloadBuilder.processHostLinkAction(LA); Actions.push_back(LA); @@ -3783,18 +3813,8 @@ if (!AJ || !BJ) return nullptr; - // Retrieve the compile job, backend action must always be preceded by one. - ActionList CompileJobOffloadActions; - auto *CJ = getPrevDependentAction(BJ->getInputs(), CompileJobOffloadActions, - /*CanBeCollapsed=*/false); - if (!AJ || !BJ || !CJ) - return nullptr; - - assert(isa(CJ) && - "Expecting compile job preceding backend job."); - - // Get compiler tool. - const Tool *T = TC.SelectTool(*CJ); + // Get backend tool. + const Tool *T = TC.SelectTool(*BJ); if (!T) return nullptr; @@ -4199,6 +4219,13 @@ A->getOffloadingDeviceKind(), TC->getTriple().normalize(), /*CreatePrefixForHost=*/!!A->getOffloadingHostActiveKinds() && !AtTopLevel); + if (isa(JA)) { + OffloadingPrefix += "-wrapper"; + if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o)) + BaseInput = FinalOutput->getValue(); + else + BaseInput = getDefaultImageName(); + } Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BoundArch, AtTopLevel, MultipleArchs, OffloadingPrefix), Index: clang/lib/Driver/ToolChain.cpp =================================================================== --- clang/lib/Driver/ToolChain.cpp +++ clang/lib/Driver/ToolChain.cpp @@ -285,6 +285,12 @@ return OffloadBundler.get(); } +Tool *ToolChain::getOffloadWrapper() const { + if (!OffloadWrapper) + OffloadWrapper.reset(new tools::OffloadWrapper(*this)); + return OffloadWrapper.get(); +} + Tool *ToolChain::getTool(Action::ActionClass AC) const { switch (AC) { case Action::AssembleJobClass: @@ -314,6 +320,9 @@ case Action::OffloadBundlingJobClass: case Action::OffloadUnbundlingJobClass: return getOffloadBundler(); + + case Action::OffloadWrapperJobClass: + return getOffloadWrapper(); } llvm_unreachable("Invalid tool kind."); Index: clang/lib/Driver/ToolChains/Clang.h =================================================================== --- clang/lib/Driver/ToolChains/Clang.h +++ clang/lib/Driver/ToolChains/Clang.h @@ -152,6 +152,20 @@ const llvm::opt::ArgList &TCArgs, const char *LinkingOutput) const override; }; + +/// Offload wrapper tool. +class LLVM_LIBRARY_VISIBILITY OffloadWrapper final : public Tool { +public: + OffloadWrapper(const ToolChain &TC) + : Tool("offload wrapper", "clang-offload-wrapper", TC) {} + + bool hasIntegratedCPP() const override { return false; } + void ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, const InputInfoList &Inputs, + const llvm::opt::ArgList &TCArgs, + const char *LinkingOutput) const override; +}; + } // end namespace tools } // end namespace driver Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -6480,3 +6480,57 @@ TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())), CmdArgs, None)); } + +void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, + const InputInfoList &Inputs, + const ArgList &Args, + const char *LinkingOutput) const { + ArgStringList CmdArgs; + + const llvm::Triple &Triple = getToolChain().getEffectiveTriple(); + + // Add the "effective" target triple. + CmdArgs.push_back("-target"); + CmdArgs.push_back(Args.MakeArgString(Triple.getTriple())); + + assert(JA.getInputs().size() == Inputs.size() && + "Not have inputs for all dependence actions??"); + + // Add offload targets. It is a comma-separated list of offload target + // triples. + SmallString<128> Targets; + Targets += "-offload-targets="; + for (unsigned I = 0; I < Inputs.size(); ++I) { + if (I) + Targets += ','; + + // Get input's Offload Kind and ToolChain. + const auto *OA = cast(JA.getInputs()[I]); + assert(OA->hasSingleDeviceDependence(/*DoNotConsiderHostActions=*/true) && + "Expected one device dependence!"); + const ToolChain *DeviceTC = nullptr; + OA->doOnEachDependence([&DeviceTC](Action *, const ToolChain *TC, + const char *) { DeviceTC = TC; }); + + // And add it to the offload targets. + Targets += DeviceTC->getTriple().normalize(); + } + CmdArgs.push_back(Args.MakeArgString(Targets)); + + // Add the output file name. + assert(Output.isFilename() && "Invalid output."); + CmdArgs.push_back("-o"); + CmdArgs.push_back(Output.getFilename()); + + // Add inputs. + for (const InputInfo &I : Inputs) { + assert(I.isFilename() && "Invalid input."); + CmdArgs.push_back(I.getFilename()); + } + + C.addCommand(std::make_unique( + JA, *this, + Args.MakeArgString(getToolChain().GetProgramPath(getShortName())), + CmdArgs, Inputs)); +} Index: clang/lib/Driver/ToolChains/CommonArgs.h =================================================================== --- clang/lib/Driver/ToolChains/CommonArgs.h +++ clang/lib/Driver/ToolChains/CommonArgs.h @@ -45,13 +45,6 @@ llvm::opt::ArgStringList &CmdArgs, const llvm::opt::ArgList &Args); -void AddOpenMPLinkerScript(const ToolChain &TC, Compilation &C, - const InputInfo &Output, - const InputInfoList &Inputs, - const llvm::opt::ArgList &Args, - llvm::opt::ArgStringList &CmdArgs, - const JobAction &JA); - void AddHIPLinkerScript(const ToolChain &TC, Compilation &C, const InputInfo &Output, const InputInfoList &Inputs, const llvm::opt::ArgList &Args, Index: clang/lib/Driver/ToolChains/CommonArgs.cpp =================================================================== --- clang/lib/Driver/ToolChains/CommonArgs.cpp +++ clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -1245,120 +1245,6 @@ } } -/// Add OpenMP linker script arguments at the end of the argument list so that -/// the fat binary is built by embedding each of the device images into the -/// host. The linker script also defines a few symbols required by the code -/// generation so that the images can be easily retrieved at runtime by the -/// offloading library. This should be used only in tool chains that support -/// linker scripts. -void tools::AddOpenMPLinkerScript(const ToolChain &TC, Compilation &C, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, ArgStringList &CmdArgs, - const JobAction &JA) { - - // If this is not an OpenMP host toolchain, we don't need to do anything. - if (!JA.isHostOffloading(Action::OFK_OpenMP)) - return; - - // Create temporary linker script. Keep it if save-temps is enabled. - const char *LKS; - SmallString<256> Name = llvm::sys::path::filename(Output.getFilename()); - if (C.getDriver().isSaveTempsEnabled()) { - llvm::sys::path::replace_extension(Name, "lk"); - LKS = C.getArgs().MakeArgString(Name.c_str()); - } else { - llvm::sys::path::replace_extension(Name, ""); - Name = C.getDriver().GetTemporaryPath(Name, "lk"); - LKS = C.addTempFile(C.getArgs().MakeArgString(Name.c_str())); - } - - // Add linker script option to the command. - CmdArgs.push_back("-T"); - CmdArgs.push_back(LKS); - - // Create a buffer to write the contents of the linker script. - std::string LksBuffer; - llvm::raw_string_ostream LksStream(LksBuffer); - - // Get the OpenMP offload tool chains so that we can extract the triple - // associated with each device input. - auto OpenMPToolChains = C.getOffloadToolChains(); - assert(OpenMPToolChains.first != OpenMPToolChains.second && - "No OpenMP toolchains??"); - - // Track the input file name and device triple in order to build the script, - // inserting binaries in the designated sections. - SmallVector, 8> InputBinaryInfo; - - // Add commands to embed target binaries. We ensure that each section and - // image is 16-byte aligned. This is not mandatory, but increases the - // likelihood of data to be aligned with a cache block in several main host - // machines. - LksStream << "/*\n"; - LksStream << " OpenMP Offload Linker Script\n"; - LksStream << " *** Automatically generated by Clang ***\n"; - LksStream << "*/\n"; - LksStream << "TARGET(binary)\n"; - auto DTC = OpenMPToolChains.first; - for (auto &II : Inputs) { - const Action *A = II.getAction(); - // Is this a device linking action? - if (A && isa(A) && - A->isDeviceOffloading(Action::OFK_OpenMP)) { - assert(DTC != OpenMPToolChains.second && - "More device inputs than device toolchains??"); - InputBinaryInfo.push_back(std::make_pair( - DTC->second->getTriple().normalize(), II.getFilename())); - ++DTC; - LksStream << "INPUT(" << II.getFilename() << ")\n"; - } - } - - assert(DTC == OpenMPToolChains.second && - "Less device inputs than device toolchains??"); - - LksStream << "SECTIONS\n"; - LksStream << "{\n"; - - // Put each target binary into a separate section. - for (const auto &BI : InputBinaryInfo) { - LksStream << " .omp_offloading." << BI.first << " :\n"; - LksStream << " ALIGN(0x10)\n"; - LksStream << " {\n"; - LksStream << " PROVIDE_HIDDEN(.omp_offloading.img_start." << BI.first - << " = .);\n"; - LksStream << " " << BI.second << "\n"; - LksStream << " PROVIDE_HIDDEN(.omp_offloading.img_end." << BI.first - << " = .);\n"; - LksStream << " }\n"; - } - - LksStream << "}\n"; - LksStream << "INSERT BEFORE .data\n"; - LksStream.flush(); - - // Dump the contents of the linker script if the user requested that. We - // support this option to enable testing of behavior with -###. - if (C.getArgs().hasArg(options::OPT_fopenmp_dump_offload_linker_script)) - llvm::errs() << LksBuffer; - - // If this is a dry run, do not create the linker script file. - if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH)) - return; - - // Open script file and write the contents. - std::error_code EC; - llvm::raw_fd_ostream Lksf(LKS, EC, llvm::sys::fs::OF_None); - - if (EC) { - C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message(); - return; - } - - Lksf << LksBuffer; -} - /// Add HIP linker script arguments at the end of the argument list so that /// the fat binary is built by embedding the device images into the host. The /// linker script also defines a symbol required by the code generation so that Index: clang/lib/Driver/ToolChains/Cuda.cpp =================================================================== --- clang/lib/Driver/ToolChains/Cuda.cpp +++ clang/lib/Driver/ToolChains/Cuda.cpp @@ -563,8 +563,6 @@ CmdArgs.push_back(CubinF); } - AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA); - const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("nvlink")); C.addCommand(std::make_unique(JA, *this, Exec, CmdArgs, Inputs)); Index: clang/lib/Driver/ToolChains/Gnu.cpp =================================================================== --- clang/lib/Driver/ToolChains/Gnu.cpp +++ clang/lib/Driver/ToolChains/Gnu.cpp @@ -499,7 +499,7 @@ P = ToolChain.GetFilePath(crtbegin); } CmdArgs.push_back(Args.MakeArgString(P)); - } + } // Add crtfastmath.o if available and fast math is enabled. ToolChain.AddFastMathRuntimeIfAvailable(Args, CmdArgs); @@ -623,9 +623,6 @@ } } - // Add OpenMP offloading linker script args if required. - AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA); - // Add HIP offloading linker script args if required. AddHIPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA, *this); Index: clang/test/Driver/clang-offload-wrapper.c =================================================================== --- /dev/null +++ clang/test/Driver/clang-offload-wrapper.c @@ -0,0 +1,30 @@ +// REQUIRES: x86-registered-target + +// +// Check help message. +// +// RUN: clang-offload-wrapper --help | FileCheck %s --check-prefix CHECK-HELP +// CHECK-HELP: {{.*}}OVERVIEW: A tool to create a wrapper bitcode for offload target binaries. Takes offload +// CHECK-HELP: {{.*}}target binaries as input and produces bitcode file containing target binaries packaged +// CHECK-HELP: {{.*}}as data. +// CHECK-HELP: {{.*}}USAGE: clang-offload-wrapper [options] +// CHECK-HELP: {{.*}} -o= - Output filename +// CHECK-HELP: {{.*}} --offload-targets= - Comma-separated list of device target triples +// CHECK-HELP: {{.*}} --target= - Target triple for the output module + +// +// Generate a file to wrap. +// +// RUN: echo 'Content of device file' > %t.tgt + +// +// Check bitcode produced by the wrapper tool. +// +// RUN: clang-offload-wrapper -target=x86_64-pc-linux-gnu -offload-targets=powerpc64le-ibm-linux-gnu -o %t.wrapper.bc %t.tgt +// RUN: llvm-dis %t.wrapper.bc -o - | FileCheck %s --check-prefix CHECK-IR + +// CHECK-IR: target triple = "x86_64-pc-linux-gnu" + +// CHECK-IR: @.omp_offloading.img_start.powerpc64le-ibm-linux-gnu = hidden unnamed_addr constant [{{[0-9]+}} x i8] c"Content of device file{{.+}}", section ".omp_offloading.powerpc64le-ibm-linux-gnu" +// CHECK-IR: @.omp_offloading.img_end.powerpc64le-ibm-linux-gnu = hidden unnamed_addr constant [0 x i8] zeroinitializer, section ".omp_offloading.powerpc64le-ibm-linux-gnu" + Index: clang/test/Driver/openmp-offload-gpu.c =================================================================== --- clang/test/Driver/openmp-offload-gpu.c +++ clang/test/Driver/openmp-offload-gpu.c @@ -55,7 +55,7 @@ // RUN: -fopenmp-targets=nvptx64-nvidia-cuda %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-CUBIN-NVLINK %s -// CHK-CUBIN-NVLINK: clang{{.*}}" "-o" "[[PTX:.*\.s]]" +// CHK-CUBIN-NVLINK: clang{{.*}}" {{.*}}"-fopenmp-is-device" {{.*}}"-o" "[[PTX:.*\.s]]" // CHK-CUBIN-NVLINK-NEXT: ptxas{{.*}}" "--output-file" "[[CUBIN:.*\.cubin]]" {{.*}}"[[PTX]]" // CHK-CUBIN-NVLINK-NEXT: nvlink{{.*}}" {{.*}}"[[CUBIN]]" Index: clang/test/Driver/openmp-offload.c =================================================================== --- clang/test/Driver/openmp-offload.c +++ clang/test/Driver/openmp-offload.c @@ -106,15 +106,18 @@ // CHK-PHASES: 2: compiler, {1}, ir, (host-openmp) // CHK-PHASES: 3: backend, {2}, assembler, (host-openmp) // CHK-PHASES: 4: assembler, {3}, object, (host-openmp) -// CHK-PHASES: 5: linker, {4}, image, (host-openmp) -// CHK-PHASES: 6: input, "[[INPUT]]", c, (device-openmp) -// CHK-PHASES: 7: preprocessor, {6}, cpp-output, (device-openmp) -// CHK-PHASES: 8: compiler, {7}, ir, (device-openmp) -// CHK-PHASES: 9: offload, "host-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-openmp (x86_64-pc-linux-gnu)" {8}, ir -// CHK-PHASES: 10: backend, {9}, assembler, (device-openmp) -// CHK-PHASES: 11: assembler, {10}, object, (device-openmp) -// CHK-PHASES: 12: linker, {11}, image, (device-openmp) -// CHK-PHASES: 13: offload, "host-openmp (powerpc64le-ibm-linux-gnu)" {5}, "device-openmp (x86_64-pc-linux-gnu)" {12}, image +// CHK-PHASES: 5: input, "[[INPUT]]", c, (device-openmp) +// CHK-PHASES: 6: preprocessor, {5}, cpp-output, (device-openmp) +// CHK-PHASES: 7: compiler, {6}, ir, (device-openmp) +// CHK-PHASES: 8: offload, "host-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-openmp (x86_64-pc-linux-gnu)" {7}, ir +// CHK-PHASES: 9: backend, {8}, assembler, (device-openmp) +// CHK-PHASES: 10: assembler, {9}, object, (device-openmp) +// CHK-PHASES: 11: linker, {10}, image, (device-openmp) +// CHK-PHASES: 12: offload, "device-openmp (x86_64-pc-linux-gnu)" {11}, image +// CHK-PHASES: 13: clang-offload-wrapper, {12}, ir, (host-openmp) +// CHK-PHASES: 14: backend, {13}, assembler, (host-openmp) +// CHK-PHASES: 15: assembler, {14}, object, (host-openmp) +// CHK-PHASES: 16: linker, {4, 15}, image, (host-openmp) /// ########################################################################### @@ -128,15 +131,15 @@ // CHK-PHASES-LIB: 3: compiler, {2}, ir, (host-openmp) // CHK-PHASES-LIB: 4: backend, {3}, assembler, (host-openmp) // CHK-PHASES-LIB: 5: assembler, {4}, object, (host-openmp) -// CHK-PHASES-LIB: 6: linker, {0, 5}, image, (host-openmp) -// CHK-PHASES-LIB: 7: input, "somelib", object, (device-openmp) -// CHK-PHASES-LIB: 8: input, "[[INPUT]]", c, (device-openmp) -// CHK-PHASES-LIB: 9: preprocessor, {8}, cpp-output, (device-openmp) -// CHK-PHASES-LIB: 10: compiler, {9}, ir, (device-openmp) -// CHK-PHASES-LIB: 11: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {10}, ir -// CHK-PHASES-LIB: 12: backend, {11}, assembler, (device-openmp) -// CHK-PHASES-LIB: 13: assembler, {12}, object, (device-openmp) -// CHK-PHASES-LIB: 14: linker, {7, 13}, image, (device-openmp) +// CHK-PHASES-LIB: 6: input, "somelib", object, (device-openmp) +// CHK-PHASES-LIB: 7: input, "[[INPUT]]", c, (device-openmp) +// CHK-PHASES-LIB: 8: preprocessor, {7}, cpp-output, (device-openmp) +// CHK-PHASES-LIB: 9: compiler, {8}, ir, (device-openmp) +// CHK-PHASES-LIB: 10: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {9}, ir +// CHK-PHASES-LIB: 11: backend, {10}, assembler, (device-openmp) +// CHK-PHASES-LIB: 12: assembler, {11}, object, (device-openmp) +// CHK-PHASES-LIB: 13: linker, {6, 12}, image, (device-openmp) +// CHK-PHASES-LIB: 14: offload, "device-openmp (x86_64-pc-linux-gnu)" {13}, image // CHK-PHASES-LIB: 15: input, "somelib", object, (device-openmp) // CHK-PHASES-LIB: 16: input, "[[INPUT]]", c, (device-openmp) // CHK-PHASES-LIB: 17: preprocessor, {16}, cpp-output, (device-openmp) @@ -145,8 +148,11 @@ // CHK-PHASES-LIB: 20: backend, {19}, assembler, (device-openmp) // CHK-PHASES-LIB: 21: assembler, {20}, object, (device-openmp) // CHK-PHASES-LIB: 22: linker, {15, 21}, image, (device-openmp) -// CHK-PHASES-LIB: 23: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {6}, "device-openmp (x86_64-pc-linux-gnu)" {14}, "device-openmp (powerpc64-ibm-linux-gnu)" {22}, image - +// CHK-PHASES-LIB: 23: offload, "device-openmp (powerpc64-ibm-linux-gnu)" {22}, image +// CHK-PHASES-LIB: 24: clang-offload-wrapper, {14, 23}, ir, (host-openmp) +// CHK-PHASES-LIB: 25: backend, {24}, assembler, (host-openmp) +// CHK-PHASES-LIB: 26: assembler, {25}, object, (host-openmp) +// CHK-PHASES-LIB: 27: linker, {0, 5, 26}, image, (host-openmp) /// ########################################################################### @@ -165,21 +171,21 @@ // CHK-PHASES-FILES: 8: compiler, {7}, ir, (host-openmp) // CHK-PHASES-FILES: 9: backend, {8}, assembler, (host-openmp) // CHK-PHASES-FILES: 10: assembler, {9}, object, (host-openmp) -// CHK-PHASES-FILES: 11: linker, {0, 5, 10}, image, (host-openmp) -// CHK-PHASES-FILES: 12: input, "somelib", object, (device-openmp) -// CHK-PHASES-FILES: 13: input, "[[INPUT1]]", c, (device-openmp) -// CHK-PHASES-FILES: 14: preprocessor, {13}, cpp-output, (device-openmp) -// CHK-PHASES-FILES: 15: compiler, {14}, ir, (device-openmp) -// CHK-PHASES-FILES: 16: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {15}, ir -// CHK-PHASES-FILES: 17: backend, {16}, assembler, (device-openmp) -// CHK-PHASES-FILES: 18: assembler, {17}, object, (device-openmp) -// CHK-PHASES-FILES: 19: input, "[[INPUT2]]", c, (device-openmp) -// CHK-PHASES-FILES: 20: preprocessor, {19}, cpp-output, (device-openmp) -// CHK-PHASES-FILES: 21: compiler, {20}, ir, (device-openmp) -// CHK-PHASES-FILES: 22: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {8}, "device-openmp (x86_64-pc-linux-gnu)" {21}, ir -// CHK-PHASES-FILES: 23: backend, {22}, assembler, (device-openmp) -// CHK-PHASES-FILES: 24: assembler, {23}, object, (device-openmp) -// CHK-PHASES-FILES: 25: linker, {12, 18, 24}, image, (device-openmp) +// CHK-PHASES-FILES: 11: input, "somelib", object, (device-openmp) +// CHK-PHASES-FILES: 12: input, "[[INPUT1]]", c, (device-openmp) +// CHK-PHASES-FILES: 13: preprocessor, {12}, cpp-output, (device-openmp) +// CHK-PHASES-FILES: 14: compiler, {13}, ir, (device-openmp) +// CHK-PHASES-FILES: 15: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {14}, ir +// CHK-PHASES-FILES: 16: backend, {15}, assembler, (device-openmp) +// CHK-PHASES-FILES: 17: assembler, {16}, object, (device-openmp) +// CHK-PHASES-FILES: 18: input, "[[INPUT2]]", c, (device-openmp) +// CHK-PHASES-FILES: 19: preprocessor, {18}, cpp-output, (device-openmp) +// CHK-PHASES-FILES: 20: compiler, {19}, ir, (device-openmp) +// CHK-PHASES-FILES: 21: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {8}, "device-openmp (x86_64-pc-linux-gnu)" {20}, ir +// CHK-PHASES-FILES: 22: backend, {21}, assembler, (device-openmp) +// CHK-PHASES-FILES: 23: assembler, {22}, object, (device-openmp) +// CHK-PHASES-FILES: 24: linker, {11, 17, 23}, image, (device-openmp) +// CHK-PHASES-FILES: 25: offload, "device-openmp (x86_64-pc-linux-gnu)" {24}, image // CHK-PHASES-FILES: 26: input, "somelib", object, (device-openmp) // CHK-PHASES-FILES: 27: input, "[[INPUT1]]", c, (device-openmp) // CHK-PHASES-FILES: 28: preprocessor, {27}, cpp-output, (device-openmp) @@ -194,7 +200,11 @@ // CHK-PHASES-FILES: 37: backend, {36}, assembler, (device-openmp) // CHK-PHASES-FILES: 38: assembler, {37}, object, (device-openmp) // CHK-PHASES-FILES: 39: linker, {26, 32, 38}, image, (device-openmp) -// CHK-PHASES-FILES: 40: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {11}, "device-openmp (x86_64-pc-linux-gnu)" {25}, "device-openmp (powerpc64-ibm-linux-gnu)" {39}, image +// CHK-PHASES-FILES: 40: offload, "device-openmp (powerpc64-ibm-linux-gnu)" {39}, image +// CHK-PHASES-FILES: 41: clang-offload-wrapper, {25, 40}, ir, (host-openmp) +// CHK-PHASES-FILES: 42: backend, {41}, assembler, (host-openmp) +// CHK-PHASES-FILES: 43: assembler, {42}, object, (host-openmp) +// CHK-PHASES-FILES: 44: linker, {0, 5, 10, 43}, image, (host-openmp) /// ########################################################################### @@ -216,15 +226,18 @@ // CHK-PHASES-WITH-CUDA: 11: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {10}, ir // CHK-PHASES-WITH-CUDA: 12: backend, {11}, assembler, (host-cuda-openmp) // CHK-PHASES-WITH-CUDA: 13: assembler, {12}, object, (host-cuda-openmp) -// CHK-PHASES-WITH-CUDA: 14: linker, {13}, image, (host-cuda-openmp) -// CHK-PHASES-WITH-CUDA: 15: input, "[[INPUT]]", cuda, (device-openmp) -// CHK-PHASES-WITH-CUDA: 16: preprocessor, {15}, cuda-cpp-output, (device-openmp) -// CHK-PHASES-WITH-CUDA: 17: compiler, {16}, ir, (device-openmp) -// CHK-PHASES-WITH-CUDA: 18: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-openmp (nvptx64-nvidia-cuda)" {17}, ir -// CHK-PHASES-WITH-CUDA: 19: backend, {18}, assembler, (device-openmp) -// CHK-PHASES-WITH-CUDA: 20: assembler, {19}, object, (device-openmp) -// CHK-PHASES-WITH-CUDA: 21: linker, {20}, image, (device-openmp) -// CHK-PHASES-WITH-CUDA: 22: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {14}, "device-openmp (nvptx64-nvidia-cuda)" {21}, image +// CHK-PHASES-WITH-CUDA: 14: input, "[[INPUT]]", cuda, (device-openmp) +// CHK-PHASES-WITH-CUDA: 15: preprocessor, {14}, cuda-cpp-output, (device-openmp) +// CHK-PHASES-WITH-CUDA: 16: compiler, {15}, ir, (device-openmp) +// CHK-PHASES-WITH-CUDA: 17: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-openmp (nvptx64-nvidia-cuda)" {16}, ir +// CHK-PHASES-WITH-CUDA: 18: backend, {17}, assembler, (device-openmp) +// CHK-PHASES-WITH-CUDA: 19: assembler, {18}, object, (device-openmp) +// CHK-PHASES-WITH-CUDA: 20: linker, {19}, image, (device-openmp) +// CHK-PHASES-WITH-CUDA: 21: offload, "device-openmp (nvptx64-nvidia-cuda)" {20}, image +// CHK-PHASES-WITH-CUDA: 22: clang-offload-wrapper, {21}, ir, (host-cuda-openmp) +// CHK-PHASES-WITH-CUDA: 23: backend, {22}, assembler, (host-cuda-openmp) +// CHK-PHASES-WITH-CUDA: 24: assembler, {23}, object, (host-cuda-openmp) +// CHK-PHASES-WITH-CUDA: 25: linker, {13, 24}, image, (host-cuda-openmp) /// ########################################################################### @@ -237,65 +250,31 @@ /// -fopenmp-host-ir-file-path: specifies the host IR file that can be loaded by /// the target code generation to gather information about which declaration /// really need to be emitted. -/// We use -fopenmp-dump-offload-linker-script to dump the linker script and -/// check its contents. /// -// RUN: %clang -### -fopenmp=libomp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -fopenmp-dump-offload-linker-script -no-canonical-prefixes 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-COMMANDS -check-prefix=CHK-LKS -check-prefix=CHK-LKS-REG %s -// RUN: %clang -### -fopenmp=libomp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -save-temps -fopenmp-dump-offload-linker-script -no-canonical-prefixes 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-COMMANDS-ST -check-prefix=CHK-LKS -check-prefix=CHK-LKS-ST %s - -// Make sure we are not dumping the script unless the user requested it. // RUN: %clang -### -fopenmp=libomp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -no-canonical-prefixes 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-LKS-NODUMP %s +// RUN: | FileCheck -check-prefix=CHK-COMMANDS %s // RUN: %clang -### -fopenmp=libomp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -save-temps -no-canonical-prefixes 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-LKS-NODUMP %s - -// -// Check the linker script contains what we expect. -// -// CHK-LKS: /* -// CHK-LKS: OpenMP Offload Linker Script -// CHK-LKS: *** Automatically generated by Clang *** -// CHK-LKS-NODUMP-NOT: OpenMP Offload Linker Script. -// CHK-LKS: */ -// CHK-LKS: TARGET(binary) -// CHK-LKS-REG: INPUT([[T1BIN:.+\.out]]) -// CHK-LKS-REG: INPUT([[T2BIN:.+\.out]]) -// CHK-LKS-ST: INPUT([[T1BIN:.+\.out-openmp-powerpc64le-ibm-linux-gnu]]) -// CHK-LKS-ST: INPUT([[T2BIN:.+\.out-openmp-x86_64-pc-linux-gnu]]) -// CHK-LKS: SECTIONS -// CHK-LKS: { -// CHK-LKS: .omp_offloading.powerpc64le-ibm-linux-gnu : -// CHK-LKS: ALIGN(0x10) -// CHK-LKS: { -// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.img_start.powerpc64le-ibm-linux-gnu = .); -// CHK-LKS: [[T1BIN]] -// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.img_end.powerpc64le-ibm-linux-gnu = .); -// CHK-LKS: } -// CHK-LKS: .omp_offloading.x86_64-pc-linux-gnu : -// CHK-LKS: ALIGN(0x10) -// CHK-LKS: { -// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.img_start.x86_64-pc-linux-gnu = .); -// CHK-LKS: [[T2BIN]] -// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.img_end.x86_64-pc-linux-gnu = .); -// CHK-LKS: } -// CHK-LKS: } -// CHK-LKS: INSERT BEFORE .data +// RUN: | FileCheck -check-prefix=CHK-COMMANDS-ST %s // -// Generate host BC file. +// Generate host BC file and host object. // // CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" // CHK-COMMANDS-SAME: "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" // CHK-COMMANDS-SAME: "-o" " // CHK-COMMANDS-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "c" " // CHK-COMMANDS-SAME: [[INPUT:[^\\/]+\.c]]" +// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" " +// CHK-COMMANDS-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[HOSTBC]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-E" {{.*}}"-fopenmp" {{.*}}"-o" " // CHK-COMMANDS-ST-SAME: [[HOSTPP:[^\\/]+\.i]]" "-x" "c" " // CHK-COMMANDS-ST-SAME: [[INPUT:[^\\/]+\.c]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" {{.*}}"-o" " // CHK-COMMANDS-ST-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]" +// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" " +// CHK-COMMANDS-ST-SAME: [[HOSTASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[HOSTBC]]" +// CHK-COMMANDS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-unknown-linux" "-filetype" "obj" {{.*}}"-o" " +// CHK-COMMANDS-ST-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "{{.*}}[[HOSTASM]]" // // Compile for the powerpc device. @@ -335,21 +314,26 @@ // CHK-COMMANDS-ST-SAME: [[T2BIN:[^\\/]+\.out-openmp-x86_64-pc-linux-gnu]]" {{.*}}"{{.*}}[[T2OBJ]]" // -// Generate host object from the BC file and link using the linker script. +// Create wrapper BC file and wrapper object. // +// CHK-COMMANDS: clang-offload-wrapper{{(\.exe)?}}" "-target" "powerpc64le-unknown-linux" {{.*}}"-o" " +// CHK-COMMANDS-SAME: [[WRAPPERBC:[^\\/]+\.bc]]" "{{.*}}[[T1BIN]]" "{{.*}}[[T2BIN]]" // CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" " -// CHK-COMMANDS-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[HOSTBC]]" -// CHK-COMMANDS: ld{{(\.exe)?}}" {{.*}}"-o" " -// CHK-COMMANDS-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"-lomptarget" {{.*}}"-T" " -// CHK-COMMANDS-SAME: [[HOSTLK:[^\\/]+\.lk]]" +// CHK-COMMANDS-SAME: [[WRAPPEROBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[WRAPPERBC]]" +// CHK-COMMANDS-ST: clang-offload-wrapper{{(\.exe)?}}" "-target" "powerpc64le-unknown-linux" {{.*}}"-o" " +// CHK-COMMANDS-ST-SAME: [[WRAPPERBC:[^\\/]+\.bc]]" "{{.*}}[[T1BIN]]" "{{.*}}[[T2BIN]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" " -// CHK-COMMANDS-ST-SAME: [[HOSTASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[HOSTBC]]" +// CHK-COMMANDS-ST-SAME: [[WRAPPERASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[WRAPPERBC]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-unknown-linux" "-filetype" "obj" {{.*}}"-o" " -// CHK-COMMANDS-ST-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "{{.*}}[[HOSTASM]]" -// CHK-COMMANDS-ST: ld{{(\.exe)?}}" {{.*}}"-o" " -// CHK-COMMANDS-ST-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"-lomptarget" {{.*}}"-T" " -// CHK-COMMANDS-ST-SAME: [[HOSTLK:[^\\/]+\.lk]]" +// CHK-COMMANDS-ST-SAME: [[WRAPPEROBJ:[^\\/]+\.o]]" "{{.*}}[[WRAPPERASM]]" +// +// Link host binary. +// +// CHK-COMMANDS: ld{{(\.exe)?}}" {{.*}}"-o" " +// CHK-COMMANDS-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" "{{.*}}[[WRAPPEROBJ]]" {{.*}}"-lomptarget" +// CHK-COMMANDS-ST: ld{{(\.exe)?}}" {{.*}}"-o" " +// CHK-COMMANDS-ST-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" "{{.*}}[[WRAPPEROBJ]]" {{.*}}"-lomptarget" /// ########################################################################### @@ -391,20 +375,24 @@ // CHK-UBACTIONS: 3: compiler, {2}, ir, (host-openmp) // CHK-UBACTIONS: 4: backend, {3}, assembler, (host-openmp) // CHK-UBACTIONS: 5: assembler, {4}, object, (host-openmp) -// CHK-UBACTIONS: 6: linker, {0, 5}, image, (host-openmp) -// CHK-UBACTIONS: 7: input, "somelib", object, (device-openmp) -// CHK-UBACTIONS: 8: compiler, {2}, ir, (device-openmp) -// CHK-UBACTIONS: 9: offload, "host-openmp (powerpc64le-unknown-linux)" {3}, "device-openmp (powerpc64le-ibm-linux-gnu)" {8}, ir -// CHK-UBACTIONS: 10: backend, {9}, assembler, (device-openmp) -// CHK-UBACTIONS: 11: assembler, {10}, object, (device-openmp) -// CHK-UBACTIONS: 12: linker, {7, 11}, image, (device-openmp) +// CHK-UBACTIONS: 6: input, "somelib", object, (device-openmp) +// CHK-UBACTIONS: 7: compiler, {2}, ir, (device-openmp) +// CHK-UBACTIONS: 8: offload, "host-openmp (powerpc64le-unknown-linux)" {3}, "device-openmp (powerpc64le-ibm-linux-gnu)" {7}, ir +// CHK-UBACTIONS: 9: backend, {8}, assembler, (device-openmp) +// CHK-UBACTIONS: 10: assembler, {9}, object, (device-openmp) +// CHK-UBACTIONS: 11: linker, {6, 10}, image, (device-openmp) +// CHK-UBACTIONS: 12: offload, "device-openmp (powerpc64le-ibm-linux-gnu)" {11}, image // CHK-UBACTIONS: 13: input, "somelib", object, (device-openmp) // CHK-UBACTIONS: 14: compiler, {2}, ir, (device-openmp) // CHK-UBACTIONS: 15: offload, "host-openmp (powerpc64le-unknown-linux)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {14}, ir // CHK-UBACTIONS: 16: backend, {15}, assembler, (device-openmp) // CHK-UBACTIONS: 17: assembler, {16}, object, (device-openmp) // CHK-UBACTIONS: 18: linker, {13, 17}, image, (device-openmp) -// CHK-UBACTIONS: 19: offload, "host-openmp (powerpc64le-unknown-linux)" {6}, "device-openmp (powerpc64le-ibm-linux-gnu)" {12}, "device-openmp (x86_64-pc-linux-gnu)" {18}, image +// CHK-UBACTIONS: 19: offload, "device-openmp (x86_64-pc-linux-gnu)" {18}, image +// CHK-UBACTIONS: 20: clang-offload-wrapper, {12, 19}, ir, (host-openmp) +// CHK-UBACTIONS: 21: backend, {20}, assembler, (host-openmp) +// CHK-UBACTIONS: 22: assembler, {21}, object, (host-openmp) +// CHK-UBACTIONS: 23: linker, {0, 5, 22}, image, (host-openmp) /// ########################################################################### @@ -507,6 +495,8 @@ // CHK-UBJOBS-SAME: [[T2PP:[^\\/]+\.i]]" "-unbundle" // CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" {{.*}}"-o" " // CHK-UBJOBS-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]" +// CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" " +// CHK-UBJOBS-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[HOSTBC]]" // CHK-UBJOBS-ST: clang-offload-bundler{{.*}}" "-type=i" "-targets=host-powerpc64le-unknown-linux,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu" "-inputs= // CHK-UBJOBS-ST-SAME: [[INPUT:[^\\/]+\.i]]" "-outputs= // CHK-UBJOBS-ST-SAME: [[HOSTPP:[^\\/,]+\.i]], @@ -514,6 +504,10 @@ // CHK-UBJOBS-ST-SAME: [[T2PP:[^\\/,]+\.i]]" "-unbundle" // CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" {{.*}}"-o" " // CHK-UBJOBS-ST-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]" +// CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" " +// CHK-UBJOBS-ST-SAME: [[HOSTASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[HOSTBC]]" +// CHK-UBJOBS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-unknown-linux" "-filetype" "obj" {{.*}}"-o" " +// CHK-UBJOBS-ST-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "{{.*}}[[HOSTASM]]" // Create target 1 object. // CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" " @@ -543,19 +537,23 @@ // CHK-UBJOBS-ST: ld{{(\.exe)?}}" {{.*}}"-o" " // CHK-UBJOBS-ST-SAME: [[T2BIN:[^\\/]+\.out-openmp-x86_64-pc-linux-gnu]]" {{.*}}"{{.*}}[[T2OBJ]]" -// Create binary. +// Create wrapper BC file and wrapper object. +// CHK-UBJOBS: clang-offload-wrapper{{(\.exe)?}}" "-target" "powerpc64le-unknown-linux" {{.*}}"-o" " +// CHK-UBJOBS-SAME: [[WRAPPERBC:[^\\/]+\.bc]]" "{{.*}}[[T1BIN]]" "{{.*}}[[T2BIN]]" // CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" " -// CHK-UBJOBS-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[HOSTBC]]" -// CHK-UBJOBS: ld{{(\.exe)?}}" {{.*}}"-o" " -// CHK-UBJOBS-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" {{.*}}"-T" " -// CHK-UBJOBS-SAME: [[LKS:[^\\/]+\.lk]]" +// CHK-UBJOBS-SAME: [[WRAPPEROBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[WRAPPERBC]]" +// CHK-UBJOBS-ST: clang-offload-wrapper{{(\.exe)?}}" "-target" "powerpc64le-unknown-linux" {{.*}}"-o" " +// CHK-UBJOBS-ST-SAME: [[WRAPPERBC:[^\\/]+\.bc]]" "{{.*}}[[T1BIN]]" "{{.*}}[[T2BIN]]" // CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" " -// CHK-UBJOBS-ST-SAME: [[HOSTASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[HOSTBC]]" +// CHK-UBJOBS-ST-SAME: [[WRAPPERASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[WRAPPERBC]]" // CHK-UBJOBS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-unknown-linux" "-filetype" "obj" {{.*}}"-o" " -// CHK-UBJOBS-ST-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "{{.*}}[[HOSTASM]]" +// CHK-UBJOBS-ST-SAME: [[WRAPPEROBJ:[^\\/]+\.o]]" "{{.*}}[[WRAPPERASM]]" + +// Create binary. +// CHK-UBJOBS: ld{{(\.exe)?}}" {{.*}}"-o" " +// CHK-UBJOBS-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" "{{.*}}[[WRAPPEROBJ]]" // CHK-UBJOBS-ST: ld{{(\.exe)?}}" {{.*}}"-o" " -// CHK-UBJOBS-ST-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" {{.*}}"-T" " -// CHK-UBJOBS-ST-SAME: [[LKS:[^\\/]+\.lk]]" +// CHK-UBJOBS-ST-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" "{{.*}}[[WRAPPEROBJ]]" // Unbundle object file. // CHK-UBJOBS2: clang-offload-bundler{{.*}}" "-type=o" "-targets=host-powerpc64le-unknown-linux,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu" "-inputs= @@ -567,9 +565,12 @@ // CHK-UBJOBS2-SAME: [[T1BIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[T1OBJ]]" // CHK-UBJOBS2: ld{{(\.exe)?}}" {{.*}}"-o" " // CHK-UBJOBS2-SAME: [[T2BIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[T2OBJ]]" +// CHK-UBJOBS2: clang-offload-wrapper{{(\.exe)?}}" "-target" "powerpc64le-unknown-linux" {{.*}}"-o" " +// CHK-UBJOBS2-SAME: [[WRAPPERBC:[^\\/]+\.bc]]" "{{.*}}[[T1BIN]]" "{{.*}}[[T2BIN]]" +// CHK-UBJOBS2: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" " +// CHK-UBJOBS2-SAME: [[WRAPPEROBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[WRAPPERBC]]" // CHK-UBJOBS2: ld{{(\.exe)?}}" {{.*}}"-o" " -// CHK-UBJOBS2-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" {{.*}}"-T" " -// CHK-UBJOBS2-SAME: [[LKS:[^\\/]+\.lk]]" +// CHK-UBJOBS2-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" "{{.*}}[[WRAPPEROBJ]]" // CHK-UBJOBS2-ST-NOT: clang-offload-bundler{{.*}}in.so // CHK-UBJOBS2-ST: clang-offload-bundler{{.*}}" "-type=o" "-targets=host-powerpc64le-unknown-linux,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu" "-inputs= // CHK-UBJOBS2-ST-SAME: [[INPUT:[^\\/]+\.o]]" "-outputs= @@ -581,9 +582,14 @@ // CHK-UBJOBS2-ST-SAME: [[T1BIN:[^\\/]+\.out-openmp-powerpc64le-ibm-linux-gnu]]" {{.*}}"{{.*}}[[T1OBJ]]" // CHK-UBJOBS2-ST: ld{{(\.exe)?}}" {{.*}}"-o" " // CHK-UBJOBS2-ST-SAME: [[T2BIN:[^\\/]+\.out-openmp-x86_64-pc-linux-gnu]]" {{.*}}"{{.*}}[[T2OBJ]]" +// CHK-UBJOBS2-ST: clang-offload-wrapper{{(\.exe)?}}" "-target" "powerpc64le-unknown-linux" {{.*}}"-o" " +// CHK-UBJOBS2-ST-SAME: [[WRAPPERBC:[^\\/]+\.bc]]" "{{.*}}[[T1BIN]]" "{{.*}}[[T2BIN]]" +// CHK-UBJOBS2-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" "-S" {{.*}}"-fopenmp" {{.*}}"-o" " +// CHK-UBJOBS2-ST-SAME: [[WRAPPERASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[WRAPPERBC]]" +// CHK-UBJOBS2-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-unknown-linux" "-filetype" "obj" {{.*}}"-o" " +// CHK-UBJOBS2-ST-SAME: [[WRAPPEROBJ:[^\\/]+\.o]]" "{{.*}}[[WRAPPERASM]]" // CHK-UBJOBS2-ST: ld{{(\.exe)?}}" {{.*}}"-o" " -// CHK-UBJOBS2-ST-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" {{.*}}"-T" " -// CHK-UBJOBS2-ST-SAME: [[LKS:[^\\/]+\.lk]]" +// CHK-UBJOBS2-ST-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" "{{.*}}[[WRAPPEROBJ]]" /// ########################################################################### Index: clang/tools/CMakeLists.txt =================================================================== --- clang/tools/CMakeLists.txt +++ clang/tools/CMakeLists.txt @@ -8,6 +8,7 @@ add_clang_subdirectory(clang-fuzzer) add_clang_subdirectory(clang-import-test) add_clang_subdirectory(clang-offload-bundler) +add_clang_subdirectory(clang-offload-wrapper) add_clang_subdirectory(clang-scan-deps) add_clang_subdirectory(c-index-test) Index: clang/tools/clang-offload-wrapper/CMakeLists.txt =================================================================== --- /dev/null +++ clang/tools/clang-offload-wrapper/CMakeLists.txt @@ -0,0 +1,25 @@ +set(LLVM_LINK_COMPONENTS BitWriter Core Support TransformUtils) + +if(NOT CLANG_BUILT_STANDALONE) + set(tablegen_deps intrinsics_gen) +endif() + +add_clang_executable(clang-offload-wrapper + ClangOffloadWrapper.cpp + + DEPENDS + ${tablegen_deps} + ) + +set(CLANG_OFFLOAD_WRAPPER_LIB_DEPS + clangBasic + ) + +add_dependencies(clang clang-offload-wrapper) + +target_link_libraries(clang-offload-wrapper + PRIVATE + ${CLANG_OFFLOAD_WRAPPER_LIB_DEPS} + ) + +install(TARGETS clang-offload-wrapper RUNTIME DESTINATION bin) Index: clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp =================================================================== --- /dev/null +++ clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp @@ -0,0 +1,196 @@ +//===-- clang-offload-wrapper/ClangOffloadWrapper.cpp -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Implementation of the offload wrapper tool. It takes offload target binaries +/// as input and creates wrapper bitcode file containing target binaries +/// packaged as data. +/// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/Version.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Bitcode/BitcodeWriter.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/StringSaver.h" +#include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +using namespace llvm; + +static cl::opt Help("h", cl::desc("Alias for -help"), cl::Hidden); + +// Mark all our options with this category, everything else (except for -version +// and -help) will be hidden. +static cl::OptionCategory + ClangOffloadWrapperCategory("clang-offload-wrapper options"); + +static cl::opt Output("o", cl::Required, + cl::desc("Output filename"), + cl::value_desc("filename"), + cl::cat(ClangOffloadWrapperCategory)); + +static cl::list Inputs(cl::Positional, cl::OneOrMore, + cl::desc(""), + cl::cat(ClangOffloadWrapperCategory)); + +static cl::opt + Target("target", cl::Required, + cl::desc("Target triple for the output module"), + cl::value_desc("triple"), cl::cat(ClangOffloadWrapperCategory)); + +static cl::list + OffloadTargets("offload-targets", cl::CommaSeparated, cl::OneOrMore, + cl::desc("Comma-separated list of device target triples"), + cl::value_desc("triples"), + cl::cat(ClangOffloadWrapperCategory)); + +namespace { + +class BinaryWrapper { +public: + // Binary descriptor. The first field is the a reference to the binary bits, + // and the second is the target triple the binary was built for. + using BinaryDesc = std::pair, StringRef>; + +private: + LLVMContext C; + Module M; + + // Saver for generated strings. + BumpPtrAllocator Alloc; + UniqueStringSaver SS; + +private: + void createImages(ArrayRef Binaries) { + for (const BinaryDesc &Bin : Binaries) { + StringRef SectionName = SS.save(".omp_offloading." + Bin.second); + + auto *DataC = ConstantDataArray::get(C, Bin.first); + auto *ImageB = + new GlobalVariable(M, DataC->getType(), /*isConstant=*/true, + GlobalVariable::ExternalLinkage, DataC, + ".omp_offloading.img_start." + Bin.second); + ImageB->setSection(SectionName); + ImageB->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + ImageB->setVisibility(llvm::GlobalValue::HiddenVisibility); + + auto *EmptyC = + ConstantAggregateZero::get(ArrayType::get(Type::getInt8Ty(C), 0u)); + auto *ImageE = + new GlobalVariable(M, EmptyC->getType(), /*isConstant=*/true, + GlobalVariable::ExternalLinkage, EmptyC, + ".omp_offloading.img_end." + Bin.second); + ImageE->setSection(SectionName); + ImageE->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + ImageE->setVisibility(GlobalValue::HiddenVisibility); + } + } + +public: + BinaryWrapper(StringRef Target) : M("offload.wrapper.object", C), SS(Alloc) { + M.setTargetTriple(Target); + } + + const Module &wrapBinaries(ArrayRef Binaries) { + createImages(Binaries); + return M; + } +}; + +} // anonymous namespace + +int main(int argc, const char **argv) { + sys::PrintStackTraceOnErrorSignal(argv[0]); + + cl::HideUnrelatedOptions(ClangOffloadWrapperCategory); + cl::SetVersionPrinter([](raw_ostream &OS) { + OS << clang::getClangToolFullVersion("clang-offload-wrapper") << '\n'; + }); + cl::ParseCommandLineOptions( + argc, argv, + "A tool to create a wrapper bitcode for offload target binaries. Takes " + "offload\ntarget binaries as input and produces bitcode file containing " + "target binaries packaged\nas data.\n"); + + if (Help) { + cl::PrintHelpMessage(); + return 0; + } + + auto reportError = [argv](Error E) { + logAllUnhandledErrors(std::move(E), WithColor::error(errs(), argv[0])); + }; + + if (Triple(Target).getArch() == Triple::UnknownArch) { + reportError(createStringError( + errc::invalid_argument, "'" + Target + "': unsupported target triple")); + return 1; + } + + if (Inputs.size() != OffloadTargets.size()) { + reportError(createStringError( + errc::invalid_argument, + "number of input files and offload targets should match")); + return 1; + } + + // Read device binaries. + SmallVector, 4u> Buffers; + SmallVector Images; + Buffers.reserve(Inputs.size()); + Images.reserve(Inputs.size()); + for (unsigned I = 0; I < Inputs.size(); ++I) { + const std::string &File = Inputs[I]; + ErrorOr> BufOrErr = + MemoryBuffer::getFileOrSTDIN(File); + if (!BufOrErr) { + reportError(createFileError(File, BufOrErr.getError())); + return 1; + } + const std::unique_ptr &Buf = + Buffers.emplace_back(std::move(*BufOrErr)); + Images.emplace_back( + makeArrayRef(Buf->getBufferStart(), Buf->getBufferSize()), + OffloadTargets[I]); + } + + // Create the output file to write the resulting bitcode to. + std::error_code EC; + ToolOutputFile Out(Output, EC, sys::fs::OF_None); + if (EC) { + reportError(createFileError(Output, EC)); + return 1; + } + + // Create a wrapper for device binaries and write its bitcode to the file. + WriteBitcodeToFile(BinaryWrapper(Target).wrapBinaries( + makeArrayRef(Images.data(), Images.size())), + Out.os()); + if (Out.os().has_error()) { + reportError(createFileError(Output, Out.os().error())); + return 1; + } + + // Success. + Out.keep(); + return 0; +}