diff --git a/clang/docs/ClangOffloadPackager.rst b/clang/docs/ClangOffloadPackager.rst new file mode 100644 --- /dev/null +++ b/clang/docs/ClangOffloadPackager.rst @@ -0,0 +1,72 @@ +====================== +Clang Offload Packager +====================== + +.. contents:: + :local: + +.. _clang-offload-packager: + +Introduction +============ + +This tool bundles device files into a single image containing necessary +metadata. We use a custom binary format for bundling all the device images +together. The image format is a small header wrapping around a string map. This +tool creates bundled binaries so that they can be embedded into the host to +create a fat-binary. + +An embedded binary is marked by the ``0x10FF10AD`` magic bytes, followed by a +version. Each created binary contains its own magic bytes. This allows us to +locate all the embedded offloading sections even after they may have been merged +by the linker, such as when using relocatable linking. The format used is +primarily a binary serialization of the following struct. + +.. code-block:: c++ + + struct OffloadingImage { + uint16_t TheImageKind; + uint16_t TheOffloadKind; + uint32_t Flags; + StringMap StringData; + MemoryBufferRef Image; + }; + +Usage +===== + +This tool can be used with the following arguments. Generally information is +passed as a key-value pair to the ``image=`` argument. The ``file``, ``triple``, +and ``arch`` arguments are considered mandatory to make a valid image. + +.. code-block:: console + + OVERVIEW: A utility for bundling several object files into a single binary. + The output binary can then be embedded into the host section table + to create a fatbinary containing offloading code. + + USAGE: clang-offload-packager [options] + + OPTIONS: + + Generic Options: + + --help - Display available options (--help-hidden for more) + --help-list - Display list of available options (--help-list-hidden for more) + --version - Display the version of this program + + clang-offload-packager options: + + --image=<=,...> - List of key and value arguments. Required + keywords are 'file' and 'triple'. + -o= - Write output to . + +Example +======= + +This tool simply takes many input files from the ``image`` option and creates a +single output file with all the images combined. + +.. code-block:: console + + clang-offload-packager -o out.bin --image=file=input.o,triple=nvptx64,arch=sm_70 diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -276,12 +276,8 @@ /// CUDA runtime back-end for incorporating them into host-side object file. std::string CudaGpuBinaryFileName; - /// List of filenames and metadata passed in using the -fembed-offload-object - /// option to embed device-side offloading objects into the host as a named - /// section. Input passed in as 'filename,kind,triple,arch'. - /// - /// NOTE: This will need to be expanded whenever we want to pass in more - /// metadata, at some point this should be its own clang tool. + /// List of filenames passed in using the -fembed-offload-object option. These + /// are offloading binaries containing device images and metadata. std::vector OffloadObjects; /// The name of the file to which the backend should save YAML optimization diff --git a/clang/include/clang/Driver/Action.h b/clang/include/clang/Driver/Action.h --- a/clang/include/clang/Driver/Action.h +++ b/clang/include/clang/Driver/Action.h @@ -74,6 +74,7 @@ OffloadBundlingJobClass, OffloadUnbundlingJobClass, OffloadWrapperJobClass, + OffloadPackagerJobClass, LinkerWrapperJobClass, StaticLibJobClass, @@ -669,6 +670,17 @@ } }; +class OffloadPackagerJobAction : public JobAction { + void anchor() override; + +public: + OffloadPackagerJobAction(ActionList &Inputs, types::ID Type); + + static bool classof(const Action *A) { + return A->getKind() == OffloadPackagerJobClass; + } +}; + class LinkerWrapperJobAction : public JobAction { void anchor() override; diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -151,6 +151,7 @@ mutable std::unique_ptr IfsMerge; mutable std::unique_ptr OffloadBundler; mutable std::unique_ptr OffloadWrapper; + mutable std::unique_ptr OffloadPackager; mutable std::unique_ptr LinkerWrapper; Tool *getClang() const; @@ -162,6 +163,7 @@ Tool *getClangAs() const; Tool *getOffloadBundler() const; Tool *getOffloadWrapper() const; + Tool *getOffloadPackager() const; Tool *getLinkerWrapper() const; mutable bool SanitizerArgsChecked = false; diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1211,33 +1211,16 @@ return; for (StringRef OffloadObject : CGOpts.OffloadObjects) { - SmallVector ObjectFields; - OffloadObject.split(ObjectFields, ','); - - if (ObjectFields.size() != 4) { - auto DiagID = Diags.getCustomDiagID( - DiagnosticsEngine::Error, "Expected at least four arguments '%0'"); - Diags.Report(DiagID) << OffloadObject; - return; - } - llvm::ErrorOr> ObjectOrErr = - llvm::MemoryBuffer::getFileOrSTDIN(ObjectFields[0]); + llvm::MemoryBuffer::getFileOrSTDIN(OffloadObject); if (std::error_code EC = ObjectOrErr.getError()) { auto DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, "could not open '%0' for embedding"); - Diags.Report(DiagID) << ObjectFields[0]; + Diags.Report(DiagID) << OffloadObject; return; } - OffloadBinary::OffloadingImage Image{}; - Image.TheImageKind = getImageKind(ObjectFields[0].rsplit(".").second); - Image.TheOffloadKind = getOffloadKind(ObjectFields[1]); - Image.StringData = {{"triple", ObjectFields[2]}, {"arch", ObjectFields[3]}}; - Image.Image = **ObjectOrErr; - - std::unique_ptr OffloadBuffer = OffloadBinary::write(Image); - llvm::embedBufferInModule(*M, *OffloadBuffer, ".llvm.offloading", + llvm::embedBufferInModule(*M, **ObjectOrErr, ".llvm.offloading", Align(OffloadBinary::getAlignment())); } } diff --git a/clang/lib/Driver/Action.cpp b/clang/lib/Driver/Action.cpp --- a/clang/lib/Driver/Action.cpp +++ b/clang/lib/Driver/Action.cpp @@ -45,6 +45,8 @@ return "clang-offload-unbundler"; case OffloadWrapperJobClass: return "clang-offload-wrapper"; + case OffloadPackagerJobClass: + return "clang-offload-packager"; case LinkerWrapperJobClass: return "clang-linker-wrapper"; case StaticLibJobClass: @@ -432,6 +434,12 @@ types::ID Type) : JobAction(OffloadWrapperJobClass, Inputs, Type) {} +void OffloadPackagerJobAction::anchor() {} + +OffloadPackagerJobAction::OffloadPackagerJobAction(ActionList &Inputs, + types::ID Type) + : JobAction(OffloadPackagerJobClass, Inputs, Type) {} + void LinkerWrapperJobAction::anchor() {} LinkerWrapperJobAction::LinkerWrapperJobAction(ActionList &Inputs, diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4382,6 +4382,7 @@ getFinalPhase(Args) == phases::Preprocess)) return HostAction; + ActionList OffloadActions; OffloadAction::DeviceDependences DDeps; const Action::OffloadKind OffloadKinds[] = { @@ -4457,6 +4458,9 @@ auto TCAndArch = TCAndArchs.begin(); for (Action *A : DeviceActions) { DDeps.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind); + OffloadAction::DeviceDependences DDep; + DDep.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind); + OffloadActions.push_back(C.MakeAction(DDep, A->getType())); ++TCAndArch; } } @@ -4464,10 +4468,16 @@ if (DeviceOnly) return C.MakeAction(DDeps, types::TY_Nothing); + Action *OffloadPackager = + C.MakeAction(OffloadActions, types::TY_Image); + OffloadAction::DeviceDependences DDep; + DDep.add(*OffloadPackager, *C.getSingleOffloadToolChain(), + nullptr, Action::OFK_None); OffloadAction::HostDependence HDep( *HostAction, *C.getSingleOffloadToolChain(), - /*BoundArch=*/nullptr, DDeps); - return C.MakeAction(HDep, DDeps); + /*BoundArch=*/nullptr, isa(HostAction) ? DDep : DDeps); + return C.MakeAction( + HDep, isa(HostAction) ? DDep : DDeps); } Action *Driver::ConstructPhaseAction( diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -328,6 +328,12 @@ return OffloadWrapper.get(); } +Tool *ToolChain::getOffloadPackager() const { + if (!OffloadPackager) + OffloadPackager.reset(new tools::OffloadPackager(*this)); + return OffloadPackager.get(); +} + Tool *ToolChain::getLinkerWrapper() const { if (!LinkerWrapper) LinkerWrapper.reset(new tools::LinkerWrapper(*this, getLink())); @@ -373,6 +379,8 @@ case Action::OffloadWrapperJobClass: return getOffloadWrapper(); + case Action::OffloadPackagerJobClass: + return getOffloadPackager(); case Action::LinkerWrapperJobClass: return getLinkerWrapper(); } diff --git a/clang/lib/Driver/ToolChains/Clang.h b/clang/lib/Driver/ToolChains/Clang.h --- a/clang/lib/Driver/ToolChains/Clang.h +++ b/clang/lib/Driver/ToolChains/Clang.h @@ -170,6 +170,19 @@ const char *LinkingOutput) const override; }; +/// Offload binary tool. +class LLVM_LIBRARY_VISIBILITY OffloadPackager final : public Tool { +public: + OffloadPackager(const ToolChain &TC) + : Tool("Offload::Packager", "clang-offload-packager", TC) {} + + bool hasIntegratedCPP() const override { return false; } + void ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, const InputInfoList &Inputs, + const llvm::opt::ArgList &TCArgs, + const char *LinkingOutput) const override; +}; + /// Linker wrapper tool. class LLVM_LIBRARY_VISIBILITY LinkerWrapper final : public Tool { const Tool *Linker; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6985,22 +6985,9 @@ // Host-side offloading recieves the device object files and embeds it in a // named section including the associated target triple and architecture. - for (const InputInfo Input : HostOffloadingInputs) { - const Action *OffloadAction = Input.getAction(); - const ToolChain *TC = OffloadAction->getOffloadingToolChain(); - const ArgList &TCArgs = - C.getArgsForToolChain(TC, OffloadAction->getOffloadingArch(), - OffloadAction->getOffloadingDeviceKind()); - StringRef File = C.getArgs().MakeArgString(TC->getInputFilename(Input)); - StringRef Arch = (OffloadAction->getOffloadingArch()) - ? OffloadAction->getOffloadingArch() - : TCArgs.getLastArgValue(options::OPT_march_EQ); - - CmdArgs.push_back(Args.MakeArgString( - "-fembed-offload-object=" + File + "," + - Action::GetOffloadKindName(OffloadAction->getOffloadingDeviceKind()) + - "," + TC->getTripleString() + "," + Arch)); - } + for (const InputInfo Input : HostOffloadingInputs) + CmdArgs.push_back(Args.MakeArgString("-fembed-offload-object=" + + TC.getInputFilename(Input))); if (Triple.isAMDGPU()) { handleAMDGPUCodeObjectVersionOptions(D, Args, CmdArgs); @@ -8237,6 +8224,42 @@ CmdArgs, Inputs, Output)); } +void OffloadPackager::ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, + const InputInfoList &Inputs, + const llvm::opt::ArgList &Args, + const char *LinkingOutput) const { + ArgStringList CmdArgs; + + // Add the output file name. + assert(Output.isFilename() && "Invalid output."); + CmdArgs.push_back("-o"); + CmdArgs.push_back(Output.getFilename()); + + // Create the inputs to bundle the needed metadata. + for (const InputInfo &Input : Inputs) { + const Action *OffloadAction = Input.getAction(); + const ToolChain *TC = OffloadAction->getOffloadingToolChain(); + const ArgList &TCArgs = + C.getArgsForToolChain(TC, OffloadAction->getOffloadingArch(), + OffloadAction->getOffloadingDeviceKind()); + StringRef File = C.getArgs().MakeArgString(TC->getInputFilename(Input)); + StringRef Arch = (OffloadAction->getOffloadingArch()) + ? OffloadAction->getOffloadingArch() + : TCArgs.getLastArgValue(options::OPT_march_EQ); + + CmdArgs.push_back(Args.MakeArgString( + "--image=file=" + File + "," + "triple=" + TC->getTripleString() + "," + + "arch=" + Arch + "," + "kind=" + + Action::GetOffloadKindName(OffloadAction->getOffloadingDeviceKind()))); + } + + C.addCommand(std::make_unique( + JA, *this, ResponseFileSupport::None(), + Args.MakeArgString(getToolChain().GetProgramPath(getShortName())), + CmdArgs, Inputs, Output)); +} + void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, diff --git a/clang/test/Driver/amdgpu-openmp-toolchain-new.c b/clang/test/Driver/amdgpu-openmp-toolchain-new.c --- a/clang/test/Driver/amdgpu-openmp-toolchain-new.c +++ b/clang/test/Driver/amdgpu-openmp-toolchain-new.c @@ -15,7 +15,7 @@ // RUN: %clang -ccc-print-phases --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-PHASES %s -// CHECK-PHASES: 0: input, "[[INPUT:.*]]", c, (host-openmp) +// CHECK-PHASES: 0: input, "[[INPUT:.+]]", c, (host-openmp) // CHECK-PHASES: 1: preprocessor, {0}, cpp-output, (host-openmp) // CHECK-PHASES: 2: compiler, {1}, ir, (host-openmp) // CHECK-PHASES: 3: input, "[[INPUT]]", c, (device-openmp) @@ -24,10 +24,12 @@ // CHECK-PHASES: 6: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, "device-openmp (amdgcn-amd-amdhsa)" {5}, ir // CHECK-PHASES: 7: backend, {6}, assembler, (device-openmp) // CHECK-PHASES: 8: assembler, {7}, object, (device-openmp) -// CHECK-PHASES: 9: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, "device-openmp (amdgcn-amd-amdhsa)" {8}, ir -// CHECK-PHASES: 10: backend, {9}, assembler, (host-openmp) -// CHECK-PHASES: 11: assembler, {10}, object, (host-openmp) -// CHECK-PHASES: 12: clang-linker-wrapper, {11}, image, (host-openmp) +// CHECK-PHASES: 9: offload, "device-openmp (amdgcn-amd-amdhsa)" {8}, object +// CHECK-PHASES: 10: clang-offload-packager, {9}, image +// CHECK-PHASES: 11: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, " (x86_64-unknown-linux-gnu)" {10}, ir +// CHECK-PHASES: 12: backend, {11}, assembler, (host-openmp) +// CHECK-PHASES: 13: assembler, {12}, object, (host-openmp) +// CHECK-PHASES: 14: clang-linker-wrapper, {13}, image, (host-openmp) // handling of --libomptarget-amdgpu-bc-path // RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib/libomptarget-amdgpu-gfx803.bc %s 2>&1 | FileCheck %s --check-prefix=CHECK-LIBOMPTARGET @@ -38,9 +40,10 @@ // RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-BINDINGS // RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa --offload-arch=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-BINDINGS -// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[HOST_BC:.*]]" -// CHECK-BINDINGS: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC:.*]]" -// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_BC]]"], output: "[[HOST_OBJ:.*]]" +// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HOST_BC:.+]]" +// CHECK-BINDINGS: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC:.+]]" +// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[DEVICE_BC]]"], output: "[[BINARY:.+]]" +// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[BINARY]]"], output: "[[HOST_OBJ:.+]]" // CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out" // RUN: %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR diff --git a/clang/test/Driver/cuda-openmp-driver.cu b/clang/test/Driver/cuda-openmp-driver.cu --- a/clang/test/Driver/cuda-openmp-driver.cu +++ b/clang/test/Driver/cuda-openmp-driver.cu @@ -11,7 +11,8 @@ // BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[PTX_SM_70:.+]]" // BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_70:.+]]"], output: "[[CUBIN_SM_70:.+]]" // BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_70]]", "[[PTX_SM_70:.+]]"], output: "[[FATBIN_SM_70:.+]]" -// BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[FATBIN_SM_35]]", "[[FATBIN_SM_70]]"], output: "[[HOST_OBJ:.+]]" +// BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[FATBIN_SM_35]]", "[[FATBIN_SM_70]]"], output: "[[BINARY:.+]]" +// BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[BINARY]]"], output: "[[HOST_OBJ:.+]]" // BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out" // RUN: %clang -### -nocudalib --offload-new-driver %s 2>&1 | FileCheck -check-prefix RDC %s diff --git a/clang/test/Driver/cuda-phases.cu b/clang/test/Driver/cuda-phases.cu --- a/clang/test/Driver/cuda-phases.cu +++ b/clang/test/Driver/cuda-phases.cu @@ -223,9 +223,9 @@ // // RUN: %clang -### -target powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver \ // RUN: --offload-arch=sm_52 --offload-arch=sm_70 %s 2>&1 | FileCheck --check-prefix=NEW_DRIVER %s -// NEW_DRIVER: 0: input, "[[INPUT:.*]]", cuda, (host-cuda) -// NEW_DRIVER: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) -// NEW_DRIVER: 2: compiler, {1}, ir, (host-cuda) +// NEW_DRIVER: 0: input, "[[INPUT:.+]]", cuda +// NEW_DRIVER: 1: preprocessor, {0}, cuda-cpp-output +// NEW_DRIVER: 2: compiler, {1}, ir // NEW_DRIVER: 3: input, "[[INPUT]]", cuda, (device-cuda, sm_52) // NEW_DRIVER: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_52) // NEW_DRIVER: 5: compiler, {4}, ir, (device-cuda, sm_52) @@ -234,15 +234,18 @@ // NEW_DRIVER: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, object // NEW_DRIVER: 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {6}, assembler // NEW_DRIVER: 10: linker, {8, 9}, cuda-fatbin, (device-cuda, sm_52) -// NEW_DRIVER: 11: input, "[[INPUT]]", cuda, (device-cuda, sm_70) -// NEW_DRIVER: 12: preprocessor, {11}, cuda-cpp-output, (device-cuda, sm_70) -// NEW_DRIVER: 13: compiler, {12}, ir, (device-cuda, sm_70) -// NEW_DRIVER: 14: backend, {13}, assembler, (device-cuda, sm_70) -// NEW_DRIVER: 15: assembler, {14}, object, (device-cuda, sm_70) -// NEW_DRIVER: 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {15}, object -// NEW_DRIVER: 17: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {14}, assembler -// NEW_DRIVER: 18: linker, {16, 17}, cuda-fatbin, (device-cuda, sm_70) -// NEW_DRIVER: 19: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {10}, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {18}, ir -// NEW_DRIVER: 20: backend, {19}, assembler, (host-cuda) -// NEW_DRIVER: 21: assembler, {20}, object, (host-cuda) -// NEW_DRIVER: 22: clang-linker-wrapper, {21}, image, (host-cuda) +// NEW_DRIVER: 11: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {10}, cuda-fatbin +// NEW_DRIVER: 12: input, "[[INPUT]]", cuda, (device-cuda, sm_70) +// NEW_DRIVER: 13: preprocessor, {12}, cuda-cpp-output, (device-cuda, sm_70) +// NEW_DRIVER: 14: compiler, {13}, ir, (device-cuda, sm_70) +// NEW_DRIVER: 15: backend, {14}, assembler, (device-cuda, sm_70) +// NEW_DRIVER: 16: assembler, {15}, object, (device-cuda, sm_70) +// NEW_DRIVER: 17: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {16}, object +// NEW_DRIVER: 18: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {15}, assembler +// NEW_DRIVER: 19: linker, {17, 18}, cuda-fatbin, (device-cuda, sm_70) +// NEW_DRIVER: 20: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {19}, cuda-fatbin +// NEW_DRIVER: 21: clang-offload-packager, {11, 20}, image +// NEW_DRIVER: 22: offload, " (powerpc64le-ibm-linux-gnu)" {2}, " (powerpc64le-ibm-linux-gnu)" {21}, ir +// NEW_DRIVER: 23: backend, {22}, assembler, (host-cuda) +// NEW_DRIVER: 24: assembler, {23}, object, (host-cuda) +// NEW_DRIVER: 25: clang-linker-wrapper, {24}, image, (host-cuda) diff --git a/clang/test/Driver/linker-wrapper-image.c b/clang/test/Driver/linker-wrapper-image.c --- a/clang/test/Driver/linker-wrapper-image.c +++ b/clang/test/Driver/linker-wrapper-image.c @@ -2,8 +2,9 @@ // REQUIRES: nvptx-registered-target // REQUIRES: amdgpu-registered-target +// RUN: clang-offload-packager -o %t.out --image=file=%S/Inputs/dummy-elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ -// RUN: -fembed-offload-object=%S/Inputs/dummy-elf.o,openmp,nvptx64-nvida-cuda,sm_70 +// RUN: -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple x86_64-unknown-linux-gnu \ // RUN: -linker-path /usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=OPENMP @@ -28,8 +29,9 @@ // OPENMP-NEXT: ret void // OPENMP-NEXT: } +// RUN: clang-offload-packager -o %t.out --image=file=%S/Inputs/dummy-elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_70 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ -// RUN: -fembed-offload-object=%S/Inputs/dummy-elf.o,cuda,nvptx64-nvida-cuda,sm_70 +// RUN: -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple x86_64-unknown-linux-gnu \ // RUN: -linker-path /usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c --- a/clang/test/Driver/linker-wrapper.c +++ b/clang/test/Driver/linker-wrapper.c @@ -2,25 +2,28 @@ // REQUIRES: nvptx-registered-target // REQUIRES: amdgpu-registered-target -// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ -// RUN: -fembed-offload-object=%S/Inputs/dummy-elf.o,openmp,nvptx64-nvida-cuda,sm_70 \ -// RUN: -fembed-offload-object=%S/Inputs/dummy-elf.o,openmp,nvptx64-nvida-cuda,sm_70 +// RUN: clang-offload-packager -o %t.out \ +// RUN: --image=file=%S/Inputs/dummy-elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ +// RUN: --image=file=%S/Inputs/dummy-elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 +// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple x86_64-unknown-linux-gnu --dry-run -linker-path \ // RUN: /usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX_LINK // NVPTX_LINK: nvlink{{.*}}-m64 -o {{.*}}.out -arch sm_70 {{.*}}.o {{.*}}.o -// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ -// RUN: -fembed-offload-object=%S/Inputs/dummy-elf.o,openmp,amdgcn-amd-amdhsa,gfx908 \ -// RUN: -fembed-offload-object=%S/Inputs/dummy-elf.o,openmp,amdgcn-amd-amdhsa,gfx908 +// RUN: clang-offload-packager -o %t.out \ +// RUN: --image=file=%S/Inputs/dummy-elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \ +// RUN: --image=file=%S/Inputs/dummy-elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 +// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple x86_64-unknown-linux-gnu --dry-run -linker-path \ // RUN: /usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU_LINK // AMDGPU_LINK: lld{{.*}}-flavor gnu --no-undefined -shared -o {{.*}}.out {{.*}}.o {{.*}}.o -// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ -// RUN: -fembed-offload-object=%S/Inputs/dummy-elf.o,openmp,x86_64-unknown-linux-gnu, \ -// RUN: -fembed-offload-object=%S/Inputs/dummy-elf.o,openmp,x86_64-unknown-linux-gnu, +// RUN: clang-offload-packager -o %t.out \ +// RUN: --image=file=%S/Inputs/dummy-elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \ +// RUN: --image=file=%S/Inputs/dummy-elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu +// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple x86_64-unknown-linux-gnu --dry-run -linker-path \ // RUN: /usr/bin/ld.lld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CPU_LINK @@ -32,42 +35,48 @@ // HOST_LINK: ld.lld{{.*}}-a -b -c {{.*}}.o -o a.out -// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ -// RUN: -fembed-offload-object=%S/Inputs/dummy-bc.bc,openmp,nvptx64-nvida-cuda,sm_70 \ -// RUN: -fembed-offload-object=%S/Inputs/dummy-bc.bc,openmp,nvptx64-nvida-cuda,sm_70 +// RUN: clang-offload-packager -o %t.out \ +// RUN: --image=file=%S/Inputs/dummy-bc.bc,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ +// RUN: --image=file=%S/Inputs/dummy-bc.bc,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 +// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple x86_64-unknown-linux-gnu --dry-run -linker-path \ // RUN: /usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=LTO // LTO: ptxas{{.*}}-m64 -o {{.*}}.cubin -O2 --gpu-name sm_70 {{.*}}.s // LTO-NOT: nvlink -// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ -// RUN: -fembed-offload-object=%S/Inputs/dummy-elf.o,openmp,nvptx64-nvida-cuda,sm_70 \ -// RUN: -fembed-offload-object=%S/Inputs/dummy-elf.o,cuda,nvptx64-nvida-cuda,sm_70 +// RUN: clang-offload-packager -o %t.out \ +// RUN: --image=file=%S/Inputs/dummy-elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ +// RUN: --image=file=%S/Inputs/dummy-elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_70 +// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple x86_64-unknown-linux-gnu --dry-run -linker-path \ // RUN: /usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA_OMP_LINK // CUDA_OMP_LINK: nvlink{{.*}}-m64 -o {{.*}}.out -arch sm_70 {{.*}}.o {{.*}}.o -// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t-lib.o \ -// RUN: -fembed-offload-object=%S/Inputs/dummy-elf.o,openmp,nvptx64-nvida-cuda,sm_70 \ -// RUN: -fembed-offload-object=%S/Inputs/dummy-elf.o,openmp,nvptx64-nvida-cuda,sm_52 -// RUN: llvm-ar rcs %t.a %t-lib.o -// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t-obj.o \ -// RUN: -fembed-offload-object=%S/Inputs/dummy-elf.o,openmp,nvptx64-nvida-cuda,sm_70 +// RUN: clang-offload-packager -o %t-lib.out \ +// RUN: --image=file=%S/Inputs/dummy-elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ +// RUN: --image=file=%S/Inputs/dummy-elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_52 +// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t-lib.out +// RUN: llvm-ar rcs %t.a %t.o +// RUN: clang-offload-packager -o %t.out \ +// RUN: --image=file=%S/Inputs/dummy-elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 +// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t-obj.o -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --host-triple x86_64-unknown-linux-gnu --dry-run -linker-path \ // RUN: /usr/bin/ld -- %t.a %t-obj.o -o a.out 2>&1 | FileCheck %s --check-prefix=STATIC-LIBRARY // STATIC-LIBRARY: nvlink{{.*}} -arch sm_70 // STATIC-LIBRARY-NOT: nvlink{{.*}} -arch sm_50 +// RUN: clang-offload-packager -o %t.out \ +// RUN: --image=file=%S/Inputs/dummy-elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_70 \ +// RUN: --image=file=%S/Inputs/dummy-elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ +// RUN: --image=file=%S/Inputs/dummy-elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_52 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ -// RUN: -fembed-offload-object=%S/Inputs/dummy-elf.o,cuda,nvptx64-nvida-cuda,sm_70 \ -// RUN: -fembed-offload-object=%S/Inputs/dummy-elf.o,openmp,nvptx64-nvida-cuda,sm_70 \ -// RUN: -fembed-offload-object=%S/Inputs/dummy-elf.o,cuda,nvptx64-nvida-cuda,sm_52 +// RUN: -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --dry-run --host-triple x86_64-unknown-linux-gnu -linker-path \ // RUN: /usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA -// CUDA: nvlink{{.*}}-m64 -o {{.*}}.out -arch sm_70 {{.*}}.o {{.*}}.o // CUDA: nvlink{{.*}}-m64 -o {{.*}}.out -arch sm_52 {{.*}}.o -// CUDA: fatbinary{{.*}}-64 --create {{.*}}.fatbin --image=profile=sm_70,file={{.*}}.out --image=profile=sm_52,file={{.*}}.out +// CUDA: nvlink{{.*}}-m64 -o {{.*}}.out -arch sm_70 {{.*}}.o {{.*}}.o +// CUDA: fatbinary{{.*}}-64 --create {{.*}}.fatbin --image=profile=sm_52,file={{.*}}.out --image=profile=sm_70,file={{.*}}.out diff --git a/clang/test/Driver/openmp-offload-gpu-new.c b/clang/test/Driver/openmp-offload-gpu-new.c --- a/clang/test/Driver/openmp-offload-gpu-new.c +++ b/clang/test/Driver/openmp-offload-gpu-new.c @@ -23,7 +23,7 @@ // RUN: %clang -ccc-print-phases --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-PHASES %s -// CHECK-PHASES: 0: input, "[[INPUT:.*]]", c, (host-openmp) +// CHECK-PHASES: 0: input, "[[INPUT:.+]]", c, (host-openmp) // CHECK-PHASES: 1: preprocessor, {0}, cpp-output, (host-openmp) // CHECK-PHASES: 2: compiler, {1}, ir, (host-openmp) // CHECK-PHASES: 3: input, "[[INPUT]]", c, (device-openmp) @@ -32,16 +32,19 @@ // CHECK-PHASES: 6: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, "device-openmp (nvptx64-nvidia-cuda)" {5}, ir // CHECK-PHASES: 7: backend, {6}, assembler, (device-openmp) // CHECK-PHASES: 8: assembler, {7}, object, (device-openmp) -// CHECK-PHASES: 9: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, "device-openmp (nvptx64-nvidia-cuda)" {8}, ir -// CHECK-PHASES: 10: backend, {9}, assembler, (host-openmp) -// CHECK-PHASES: 11: assembler, {10}, object, (host-openmp) -// CHECK-PHASES: 12: clang-linker-wrapper, {11}, image, (host-openmp) +// CHECK-PHASES: 9: offload, "device-openmp (nvptx64-nvidia-cuda)" {8}, object +// CHECK-PHASES: 10: clang-offload-packager, {9}, image +// CHECK-PHASES: 11: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, " (x86_64-unknown-linux-gnu)" {10}, ir +// CHECK-PHASES: 12: backend, {11}, assembler, (host-openmp) +// CHECK-PHASES: 13: assembler, {12}, object, (host-openmp) +// CHECK-PHASES: 14: clang-linker-wrapper, {13}, image, (host-openmp) // RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-BINDINGS // CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[HOST_BC:.*]]" // CHECK-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC:.*]]" // CHECK-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC]]"], output: "[[DEVICE_OBJ:.*]]" -// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_OBJ]]"], output: "[[HOST_OBJ:.*]]" +// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[DEVICE_OBJ]]"], output: "[[BINARY:.*]]" +// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[BINARY]]"], output: "[[HOST_OBJ:.*]]" // CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out" // RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda --offload-arch=sm_52 --offload-arch=sm_70 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS @@ -50,7 +53,8 @@ // CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_52]]"], output: "[[DEVICE_OBJ_SM_52:.*]]" // CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_70:.*]]" // CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_70]]"], output: "[[DEVICE_OBJ_SM_70:.*]]" -// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_OBJ_SM_52]]", "[[DEVICE_OBJ_SM_70]]"], output: "[[HOST_OBJ:.*]]" +// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[DEVICE_OBJ_SM_52]]", "[[DEVICE_OBJ_SM_70]]"], output: "[[BINARY:.*]]" +// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[BINARY]]"], output: "[[HOST_OBJ:.*]]" // CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out" // RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp \ @@ -62,7 +66,8 @@ // CHECK-NVIDIA-AMDGPU: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[NVIDIA_PTX:.+]]" // CHECK-NVIDIA-AMDGPU: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[NVIDIA_PTX]]"], output: "[[NVIDIA_CUBIN:.+]]" // CHECK-NVIDIA-AMDGPU: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[AMD_BC:.+]]" -// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[NVIDIA_CUBIN]]", "[[AMD_BC]]"], output: "[[HOST_OBJ:.+]]" +// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[NVIDIA_CUBIN]]", "[[AMD_BC]]"], output: "[[BINARY:.*]]" +// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[BINARY]]"], output: "[[HOST_OBJ:.+]]" // CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out" // RUN: %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR @@ -73,7 +78,7 @@ // RUN: -nogpulib %s -o openmp-offload-gpu 2>&1 \ // RUN: | FileCheck -check-prefix=DRIVER_EMBEDDING %s -// DRIVER_EMBEDDING: -fembed-offload-object=[[CUBIN:.*\.cubin]],openmp,nvptx64-nvidia-cuda,sm_70 +// DRIVER_EMBEDDING: -fembed-offload-object={{.*}}.out // RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \ // RUN: --offload-host-only -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HOST-ONLY diff --git a/clang/test/Driver/openmp-offload-infer.c b/clang/test/Driver/openmp-offload-infer.c --- a/clang/test/Driver/openmp-offload-infer.c +++ b/clang/test/Driver/openmp-offload-infer.c @@ -23,7 +23,8 @@ // CHECK-NVIDIA-AMDGPU: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[AMD_BC:.+]]" // CHECK-NVIDIA-AMDGPU: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[NVIDIA_PTX:.+]]" // CHECK-NVIDIA-AMDGPU: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[NVIDIA_PTX]]"], output: "[[NVIDIA_CUBIN:.+]]" -// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[AMD_BC]]", "[[NVIDIA_CUBIN]]"], output: "[[HOST_OBJ:.+]]" +// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[AMD_BC]]", "[[NVIDIA_CUBIN]]"], output: "[[BINARY:.+]]" +// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[BINARY]]"], output: "[[HOST_OBJ:.+]]" // CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out" // RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp \ @@ -34,7 +35,8 @@ // CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_52]]"], output: "[[DEVICE_OBJ_SM_52:.*]]" // CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_70:.*]]" // CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_70]]"], output: "[[DEVICE_OBJ_SM_70:.*]]" -// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_OBJ_SM_52]]", "[[DEVICE_OBJ_SM_70]]"], output: "[[HOST_OBJ:.*]]" +// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[DEVICE_OBJ_SM_52]]", "[[DEVICE_OBJ_SM_70]]"], output: "[[BINARY:.+]]" +// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[BINARY]]"], output: "[[HOST_OBJ:.*]]" // CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out" // RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp \ diff --git a/clang/test/Frontend/embed-object.c b/clang/test/Frontend/embed-object.c --- a/clang/test/Frontend/embed-object.c +++ b/clang/test/Frontend/embed-object.c @@ -1,7 +1,6 @@ -// RUN: %clang_cc1 -x c -triple x86_64-unknown-linux-gnu -emit-llvm -fembed-offload-object=%S/Inputs/empty.h,,, -o - %s | FileCheck %s +// RUN: %clang_cc1 -x c -triple x86_64-unknown-linux-gnu -emit-llvm -fembed-offload-object=%S/Inputs/empty.h -o - %s | FileCheck %s -// CHECK: @[[OBJECT:.+]] = private constant [120 x i8] c"\10\FF\10\AD{{.*}}", section ".llvm.offloading", align 8 +// CHECK: @[[OBJECT:.+]] = private constant [0 x i8] zeroinitializer, section ".llvm.offloading", align 8 // CHECK: @llvm.compiler.used = appending global [1 x ptr] [ptr @[[OBJECT]]], section "llvm.metadata" - void foo(void) {} diff --git a/clang/test/Frontend/embed-object.ll b/clang/test/Frontend/embed-object.ll --- a/clang/test/Frontend/embed-object.ll +++ b/clang/test/Frontend/embed-object.ll @@ -1,10 +1,10 @@ ; RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \ -; RUN: -fembed-offload-object=%S/Inputs/empty.h,,, \ -; RUN: -fembed-offload-object=%S/Inputs/empty.h,,, -x ir %s -o - \ +; RUN: -fembed-offload-object=%S/Inputs/empty.h \ +; RUN: -fembed-offload-object=%S/Inputs/empty.h -x ir %s -o - \ ; RUN: | FileCheck %s -check-prefix=CHECK -; CHECK: @[[OBJECT_1:.+]] = private constant [120 x i8] c"\10\FF\10\AD{{.*}}\00", section ".llvm.offloading", align 8 -; CHECK: @[[OBJECT_2:.+]] = private constant [120 x i8] c"\10\FF\10\AD{{.*}}\00", section ".llvm.offloading", align 8 +; CHECK: @[[OBJECT_1:.+]] = private constant [0 x i8] zeroinitializer, section ".llvm.offloading", align 8 +; CHECK: @[[OBJECT_2:.+]] = private constant [0 x i8] zeroinitializer, section ".llvm.offloading", align 8 ; CHECK: @llvm.compiler.used = appending global [3 x ptr] [ptr @x, ptr @[[OBJECT_1]], ptr @[[OBJECT_2]]], section "llvm.metadata" @x = private constant i8 1 diff --git a/clang/test/lit.cfg.py b/clang/test/lit.cfg.py --- a/clang/test/lit.cfg.py +++ b/clang/test/lit.cfg.py @@ -63,7 +63,7 @@ tool_dirs = [config.clang_tools_dir, config.llvm_tools_dir] tools = [ - 'apinotes-test', 'c-index-test', 'clang-diff', 'clang-format', 'clang-repl', + 'apinotes-test', 'c-index-test', 'clang-diff', 'clang-format', 'clang-repl', 'clang-offload-packager', 'clang-tblgen', 'clang-scan-deps', 'opt', 'llvm-ifs', 'yaml2obj', 'clang-linker-wrapper', ToolSubst('%clang_extdef_map', command=FindTool( 'clang-extdef-mapping'), unresolved='ignore'), diff --git a/clang/tools/CMakeLists.txt b/clang/tools/CMakeLists.txt --- a/clang/tools/CMakeLists.txt +++ b/clang/tools/CMakeLists.txt @@ -10,6 +10,7 @@ add_clang_subdirectory(clang-import-test) add_clang_subdirectory(clang-nvlink-wrapper) add_clang_subdirectory(clang-linker-wrapper) +add_clang_subdirectory(clang-offload-packager) add_clang_subdirectory(clang-offload-bundler) add_clang_subdirectory(clang-offload-wrapper) add_clang_subdirectory(clang-scan-deps) diff --git a/clang/tools/clang-offload-packager/CMakeLists.txt b/clang/tools/clang-offload-packager/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/clang/tools/clang-offload-packager/CMakeLists.txt @@ -0,0 +1,28 @@ +set(LLVM_LINK_COMPONENTS + ${LLVM_TARGETS_TO_BUILD} + Object + Support) + +if(NOT CLANG_BUILT_STANDALONE) + set(tablegen_deps intrinsics_gen) +endif() + +add_clang_executable(clang-offload-packager + ClangOffloadPackager.cpp + + DEPENDS + ${tablegen_deps} + ) + +set(CLANG_LINKER_WRAPPER_LIB_DEPS + clangBasic + ) + +add_dependencies(clang clang-offload-packager) + +target_link_libraries(clang-offload-packager + PRIVATE + ${CLANG_LINKER_WRAPPER_LIB_DEPS} + ) + +install(TARGETS clang-offload-packager RUNTIME DESTINATION bin) diff --git a/clang/tools/clang-offload-packager/ClangOffloadPackager.cpp b/clang/tools/clang-offload-packager/ClangOffloadPackager.cpp new file mode 100644 --- /dev/null +++ b/clang/tools/clang-offload-packager/ClangOffloadPackager.cpp @@ -0,0 +1,114 @@ +//===-- clang-offload-packager/ClangOffloadPackager.cpp - file bundler ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// +// +// This tool takes several device object files and bundles them into a single +// binary image using a custom binary format. This is intended to be used to +// embed many device files into an application to create a fat binary. +// +//===---------------------------------------------------------------------===// + +#include "clang/Basic/Version.h" + +#include "llvm/Object/Binary.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/OffloadBinary.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/WithColor.h" + +using namespace llvm; +using namespace llvm::object; + +static cl::opt Help("h", cl::desc("Alias for -help"), cl::Hidden); + +static cl::OptionCategory + ClangOffloadPackagerCategory("clang-offload-packager options"); + +static cl::opt OutputFile("o", cl::Required, + cl::desc("Write output to ."), + cl::value_desc("file"), + cl::cat(ClangOffloadPackagerCategory)); + +static cl::list + DeviceImages("image", cl::ZeroOrMore, + cl::desc("List of key and value arguments. Required keywords " + "are 'file' and 'triple'."), + cl::value_desc("=,..."), + cl::cat(ClangOffloadPackagerCategory)); + +static void PrintVersion(raw_ostream &OS) { + OS << clang::getClangToolFullVersion("clang-offload-packager") << '\n'; +} + +int main(int argc, const char **argv) { + sys::PrintStackTraceOnErrorSignal(argv[0]); + cl::HideUnrelatedOptions(ClangOffloadPackagerCategory); + cl::SetVersionPrinter(PrintVersion); + cl::ParseCommandLineOptions( + argc, argv, + "A utility for bundling several object files into a single binary.\n" + "The output binary can then be embedded into the host section table\n" + "to create a fatbinary containing offloading code.\n"); + + if (Help) { + cl::PrintHelpMessage(); + return EXIT_SUCCESS; + } + + auto reportError = [argv](Error E) { + logAllUnhandledErrors(std::move(E), WithColor::error(errs(), argv[0])); + return EXIT_FAILURE; + }; + + SmallVector BinaryData; + raw_svector_ostream OS(BinaryData); + for (StringRef Image : DeviceImages) { + StringMap Args; + for (StringRef Arg : llvm::split(Image, ",")) + Args.insert(Arg.split("=")); + + if (!Args.count("triple") || !Args.count("file")) + return reportError(createStringError( + inconvertibleErrorCode(), + "'file' and 'triple' are required image arguments")); + + OffloadBinary::OffloadingImage ImageBinary{}; + std::unique_ptr DeviceImage; + for (const auto &KeyAndValue : Args) { + StringRef Key = KeyAndValue.getKey(); + if (Key == "file") { + llvm::ErrorOr> ObjectOrErr = + llvm::MemoryBuffer::getFileOrSTDIN(KeyAndValue.getValue()); + if (std::error_code EC = ObjectOrErr.getError()) + return reportError(errorCodeToError(EC)); + DeviceImage = std::move(*ObjectOrErr); + ImageBinary.Image = *DeviceImage; + ImageBinary.TheImageKind = getImageKind( + sys::path::extension(KeyAndValue.getValue()).drop_front()); + } else if (Key == "kind") { + ImageBinary.TheOffloadKind = getOffloadKind(KeyAndValue.getValue()); + } else { + ImageBinary.StringData[Key] = KeyAndValue.getValue(); + } + } + std::unique_ptr Buffer = OffloadBinary::write(ImageBinary); + OS << Buffer->getBuffer(); + } + + Expected> OutputOrErr = + FileOutputBuffer::create(OutputFile, BinaryData.size()); + if (!OutputOrErr) + return reportError(OutputOrErr.takeError()); + std::unique_ptr Output = std::move(*OutputOrErr); + std::copy(BinaryData.begin(), BinaryData.end(), Output->getBufferStart()); + if (Error E = Output->commit()) + return reportError(std::move(E)); +}