diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -276,9 +276,12 @@ /// CUDA runtime back-end for incorporating them into host-side object file. std::string CudaGpuBinaryFileName; - /// List of filenames and section name pairs passed in using the - /// -fembed-offload-object option to embed device-side offloading objects into - /// the host as a named section. Input passed in as ',
' + /// List of filenames and metadata passed in using the -fembed-offload-object + /// option to embed device-side offloading objects into the host as a named + /// section. Input passed in as 'filename,kind,triple,arch'. + /// + /// NOTE: This will need to be expanded whenever we want to pass in more + /// metadata, at some point this should be its own clang tool. std::vector OffloadObjects; /// The name of the file to which the backend should save YAML optimization diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -39,6 +39,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/MC/TargetRegistry.h" +#include "llvm/Object/OffloadBinary.h" #include "llvm/Passes/PassBuilder.h" #include "llvm/Passes/PassPlugin.h" #include "llvm/Passes/StandardInstrumentations.h" @@ -1764,22 +1765,33 @@ return; for (StringRef OffloadObject : CGOpts.OffloadObjects) { - if (OffloadObject.count(',') != 1) - Diags.Report(Diags.getCustomDiagID( - DiagnosticsEngine::Error, "Invalid string pair for embedding '%0'")) - << OffloadObject; - auto FilenameAndSection = OffloadObject.split(','); + SmallVector ObjectFields; + OffloadObject.split(ObjectFields, ','); + + if (ObjectFields.size() != 4) { + auto DiagID = Diags.getCustomDiagID( + DiagnosticsEngine::Error, "Expected at least four arguments '%0'"); + Diags.Report(DiagID) << OffloadObject; + return; + } + llvm::ErrorOr> ObjectOrErr = - llvm::MemoryBuffer::getFileOrSTDIN(FilenameAndSection.first); + llvm::MemoryBuffer::getFileOrSTDIN(ObjectFields[0]); if (std::error_code EC = ObjectOrErr.getError()) { auto DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, "could not open '%0' for embedding"); - Diags.Report(DiagID) << FilenameAndSection.first; + Diags.Report(DiagID) << ObjectFields[0]; return; } - SmallString<128> SectionName( - {".llvm.offloading.", FilenameAndSection.second}); - llvm::embedBufferInModule(*M, **ObjectOrErr, SectionName); + OffloadBinary::OffloadingImage Image{}; + Image.TheImageKind = getImageKind(ObjectFields[0].rsplit(".").second); + Image.TheOffloadKind = getOffloadKind(ObjectFields[1]); + Image.StringData = {{"triple", ObjectFields[2]}, {"arch", ObjectFields[3]}}; + Image.Image = **ObjectOrErr; + + std::unique_ptr OffloadBuffer = OffloadBinary::write(Image); + llvm::embedBufferInModule(*M, *OffloadBuffer, ".llvm.offloading", + Align(OffloadBinary::getAlignment())); } } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6970,13 +6970,12 @@ const ArgList &TCArgs = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP); StringRef File = C.getArgs().MakeArgString(TC->getInputFilename(*InputFile)); - StringRef InputName = Clang::getBaseInputStem(Args, Inputs); - CmdArgs.push_back(Args.MakeArgString( - "-fembed-offload-object=" + File + "," + - Action::GetOffloadKindName(Action::OFK_OpenMP) + "." + - TC->getTripleString() + "." + - TCArgs.getLastArgValue(options::OPT_march_EQ) + "." + InputName)); + CmdArgs.push_back( + Args.MakeArgString("-fembed-offload-object=" + File + "," + + Action::GetOffloadKindName(Action::OFK_OpenMP) + + "," + TC->getTripleString() + "," + + TCArgs.getLastArgValue(options::OPT_march_EQ))); } } diff --git a/clang/test/Driver/openmp-offload-gpu.c b/clang/test/Driver/openmp-offload-gpu.c --- a/clang/test/Driver/openmp-offload-gpu.c +++ b/clang/test/Driver/openmp-offload-gpu.c @@ -345,4 +345,4 @@ // RUN: -fopenmp-new-driver -no-canonical-prefixes -nogpulib %s -o openmp-offload-gpu 2>&1 \ // RUN: | FileCheck -check-prefix=NEW_DRIVER_EMBEDDING %s -// NEW_DRIVER_EMBEDDING: -fembed-offload-object=[[CUBIN:.*\.cubin]],openmp.nvptx64-nvidia-cuda.sm_70 +// NEW_DRIVER_EMBEDDING: -fembed-offload-object=[[CUBIN:.*\.cubin]],openmp,nvptx64-nvidia-cuda,sm_70 diff --git a/clang/test/Frontend/embed-object.c b/clang/test/Frontend/embed-object.c --- a/clang/test/Frontend/embed-object.c +++ b/clang/test/Frontend/embed-object.c @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -x c -triple x86_64-unknown-linux-gnu -emit-llvm -fembed-offload-object=%S/Inputs/empty.h,section +// RUN: %clang_cc1 -x c -triple x86_64-unknown-linux-gnu -emit-llvm -fembed-offload-object=%S/Inputs/empty.h,,, -o - | FileCheck %s -// CHECK: @[[OBJECT:.+]] = private constant [0 x i8] zeroinitializer, section ".llvm.offloading.section" -// CHECK: @llvm.compiler.used = appending global [3 x i8*] [i8* getelementptr inbounds ([0 x i8], [0 x i8]* @[[OBJECT1]]], section "llvm.metadata" +// CHECK: @[[OBJECT:.+]] = private constant [120 x i8] c"\10\FF\10\AD\01{{.*}}\00\00", section ".llvm.offloading", align 8 +// CHECK: @llvm.compiler.used = appending global [1 x i8*] [i8* getelementptr inbounds ([120 x i8], [120 x i8]* @[[OBJECT]], i32 0, i32 0)], section "llvm.metadata" void foo(void) {} diff --git a/clang/test/Frontend/embed-object.ll b/clang/test/Frontend/embed-object.ll --- a/clang/test/Frontend/embed-object.ll +++ b/clang/test/Frontend/embed-object.ll @@ -1,11 +1,9 @@ ; RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \ -; RUN: -fembed-offload-object=%S/Inputs/empty.h,section1 \ -; RUN: -fembed-offload-object=%S/Inputs/empty.h,section2 -x ir %s -o - \ +; RUN: -fembed-offload-object=%S/Inputs/empty.h,,, -x ir %s -o - \ ; RUN: | FileCheck %s -check-prefix=CHECK -; CHECK: @[[OBJECT1:.+]] = hidden constant [0 x i8] zeroinitializer, section ".llvm.offloading.section1" -; CHECK: @[[OBJECT2:.+]] = hidden constant [0 x i8] zeroinitializer, section ".llvm.offloading.section2" -; CHECK: @llvm.compiler.used = appending global [3 x i8*] [i8* @x, i8* getelementptr inbounds ([0 x i8], [0 x i8]* @[[OBJECT1]], i32 0, i32 0), i8* getelementptr inbounds ([0 x i8], [0 x i8]* @[[OBJECT2]], i32 0, i32 0)], section "llvm.metadata" +; CHECK: @[[OBJECT:.+]] = private constant [120 x i8] c"\10\FF\10\AD{{.*}}\00", section ".llvm.offloading", align 8 +; CHECK: @llvm.compiler.used = appending global [2 x i8*] [i8* @x, i8* getelementptr inbounds ([120 x i8], [120 x i8]* @[[OBJECT]], i32 0, i32 0)], section "llvm.metadata" @x = private constant i8 1 @llvm.compiler.used = appending global [1 x i8*] [i8* @x], section "llvm.metadata" diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -29,6 +29,7 @@ #include "llvm/Object/ArchiveWriter.h" #include "llvm/Object/Binary.h" #include "llvm/Object/ObjectFile.h" +#include "llvm/Object/OffloadBinary.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Errc.h" #include "llvm/Support/FileOutputBuffer.h" @@ -146,8 +147,8 @@ static codegen::RegisterCodeGenFlags CodeGenFlags; /// Magic section string that marks the existence of offloading data. The -/// section string will be formatted as `.llvm.offloading..`. -#define OFFLOAD_SECTION_MAGIC_STR ".llvm.offloading." +/// section will contain one or more offloading binaries stored contiguously. +#define OFFLOAD_SECTION_MAGIC_STR ".llvm.offloading" /// Information for a device offloading file extracted from the host. struct DeviceFile { @@ -201,16 +202,6 @@ llvm::errs() << *IC << (std::next(IC) != IE ? " " : "\n"); } -static StringRef getDeviceFileExtension(StringRef DeviceTriple, - bool IsBitcode = false) { - Triple TheTriple(DeviceTriple); - if (TheTriple.isAMDGPU() || IsBitcode) - return "bc"; - if (TheTriple.isNVPTX()) - return "cubin"; - return "o"; -} - std::string getMainExecutable(const char *Name) { void *Ptr = (void *)(intptr_t)&getMainExecutable; auto COWPath = sys::fs::getMainExecutable(Name, Ptr); @@ -296,39 +287,58 @@ StringRef Prefix = sys::path::stem(Obj.getFileName()); SmallVector ToBeStripped; - // Extract data from sections of the form `.llvm.offloading..`. + // Extract offloading binaries from sections with the name `.llvm.offloading`. for (const SectionRef &Sec : Obj.sections()) { Expected Name = Sec.getName(); - if (!Name || !Name->startswith(OFFLOAD_SECTION_MAGIC_STR)) + if (!Name || !Name->equals(OFFLOAD_SECTION_MAGIC_STR)) continue; - SmallVector SectionFields; - Name->split(SectionFields, '.'); - StringRef Kind = SectionFields[3]; - StringRef DeviceTriple = SectionFields[4]; - StringRef Arch = SectionFields[5]; + Expected Contents = Sec.getContents(); + if (!Contents) + return Contents.takeError(); + + uint64_t Offset = 0; + // There could be multiple offloading binaries stored at this section. + while (Offset < Contents->size()) { + std::unique_ptr Buffer = + MemoryBuffer::getMemBuffer(Contents->drop_front(Offset), *Name, + /*RequiresNullTerminator*/ false); + auto BinaryOrErr = OffloadBinary::create(*Buffer); + if (!BinaryOrErr) + return BinaryOrErr.takeError(); + OffloadBinary &Binary = **BinaryOrErr; + + assert(Binary.getVersion() == 1 && "Incompatible device image version"); + + StringRef Kind = + getOffloadKindName(static_cast(Binary.getOffloadKind())); + StringRef Suffix = + getImageKindName(static_cast(Binary.getImageKind())); - if (Expected Contents = Sec.getContents()) { SmallString<128> TempFile; - StringRef DeviceExtension = getDeviceFileExtension( - DeviceTriple, identify_magic(*Contents) == file_magic::bitcode); - if (Error Err = createOutputFile(Prefix + "-" + Kind + "-" + - DeviceTriple + "-" + Arch, - DeviceExtension, TempFile)) + if (Error Err = + createOutputFile(Prefix + "-" + Kind + "-" + Binary.getTriple() + + "-" + Binary.getArch(), + Suffix, TempFile)) return std::move(Err); Expected> OutputOrErr = - FileOutputBuffer::create(TempFile, Sec.getSize()); + FileOutputBuffer::create(TempFile, Binary.getImage().size()); if (!OutputOrErr) return OutputOrErr.takeError(); std::unique_ptr Output = std::move(*OutputOrErr); - std::copy(Contents->begin(), Contents->end(), Output->getBufferStart()); + std::copy(Binary.getImage().bytes_begin(), Binary.getImage().bytes_end(), + Output->getBufferStart()); if (Error E = Output->commit()) return std::move(E); - DeviceFiles.emplace_back(Kind, DeviceTriple, Arch, TempFile); - ToBeStripped.push_back(*Name); + DeviceFiles.emplace_back(Kind, Binary.getTriple(), Binary.getArch(), + TempFile); + + Offset += Binary.getSize(); } + + ToBeStripped.push_back(*Name); } if (ToBeStripped.empty() || !StripSections) @@ -405,42 +415,58 @@ SmallVector ToBeDeleted; - // Extract data from the global string containing a section of the form - // `.llvm.offloading..`. + // Extract offloading data from globals with the `.llvm.offloading` section + // name. for (GlobalVariable &GV : M->globals()) { - if (!GV.hasSection() || - !GV.getSection().startswith(OFFLOAD_SECTION_MAGIC_STR)) + if (!GV.hasSection() || !GV.getSection().equals(OFFLOAD_SECTION_MAGIC_STR)) continue; auto *CDS = dyn_cast(GV.getInitializer()); if (!CDS) continue; - SmallVector SectionFields; - GV.getSection().split(SectionFields, '.'); - StringRef Kind = SectionFields[3]; - StringRef DeviceTriple = SectionFields[4]; - StringRef Arch = SectionFields[5]; - StringRef Contents = CDS->getAsString(); - SmallString<128> TempFile; - StringRef DeviceExtension = getDeviceFileExtension( - DeviceTriple, identify_magic(Contents) == file_magic::bitcode); - if (Error Err = createOutputFile(Prefix + "-" + Kind + "-" + DeviceTriple + - "-" + Arch, - DeviceExtension, TempFile)) - return std::move(Err); - Expected> OutputOrErr = - FileOutputBuffer::create(TempFile, Contents.size()); - if (!OutputOrErr) - return OutputOrErr.takeError(); - std::unique_ptr Output = std::move(*OutputOrErr); - std::copy(Contents.begin(), Contents.end(), Output->getBufferStart()); - if (Error E = Output->commit()) - return std::move(E); + uint64_t Offset = 0; + // There could be multiple offloading binaries stored at this section. + while (Offset < Contents.size()) { + std::unique_ptr Buffer = + MemoryBuffer::getMemBuffer(Contents.drop_front(Offset), GV.getName(), + /*RequiresNullTerminator*/ false); + auto BinaryOrErr = OffloadBinary::create(*Buffer); + if (!BinaryOrErr) + return BinaryOrErr.takeError(); + OffloadBinary &Binary = **BinaryOrErr; + + assert(Binary.getVersion() == 1 && "Incompatible device image version"); + + StringRef Kind = + getOffloadKindName(static_cast(Binary.getOffloadKind())); + StringRef Suffix = + getImageKindName(static_cast(Binary.getImageKind())); + + SmallString<128> TempFile; + if (Error Err = + createOutputFile(Prefix + "-" + Kind + "-" + Binary.getTriple() + + "-" + Binary.getArch(), + Suffix, TempFile)) + return std::move(Err); + + Expected> OutputOrErr = + FileOutputBuffer::create(TempFile, Binary.getImage().size()); + if (!OutputOrErr) + return OutputOrErr.takeError(); + std::unique_ptr Output = std::move(*OutputOrErr); + std::copy(Binary.getImage().bytes_begin(), Binary.getImage().bytes_end(), + Output->getBufferStart()); + if (Error E = Output->commit()) + return std::move(E); - DeviceFiles.emplace_back(Kind, DeviceTriple, Arch, TempFile); + DeviceFiles.emplace_back(Kind, Binary.getTriple(), Binary.getArch(), + TempFile); + + Offset += Binary.getSize(); + } ToBeDeleted.push_back(&GV); } diff --git a/llvm/include/llvm/Object/OffloadBinary.h b/llvm/include/llvm/Object/OffloadBinary.h --- a/llvm/include/llvm/Object/OffloadBinary.h +++ b/llvm/include/llvm/Object/OffloadBinary.h @@ -73,6 +73,7 @@ ImageKind getImageKind() const { return TheEntry->TheImageKind; } OffloadKind getOffloadKind() const { return TheEntry->TheOffloadKind; } + uint32_t getVersion() const { return TheHeader->Version; } uint32_t getFlags() const { return TheEntry->Flags; } uint64_t getSize() const { return TheHeader->Size; } diff --git a/llvm/include/llvm/Transforms/Utils/ModuleUtils.h b/llvm/include/llvm/Transforms/Utils/ModuleUtils.h --- a/llvm/include/llvm/Transforms/Utils/ModuleUtils.h +++ b/llvm/include/llvm/Transforms/Utils/ModuleUtils.h @@ -14,6 +14,7 @@ #define LLVM_TRANSFORMS_UTILS_MODULEUTILS_H #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/MemoryBufferRef.h" #include // for std::pair @@ -109,7 +110,8 @@ /// Embed the memory buffer \p Buf into the module \p M as a global using the /// specified section name. -void embedBufferInModule(Module &M, MemoryBufferRef Buf, StringRef SectionName); +void embedBufferInModule(Module &M, MemoryBufferRef Buf, StringRef SectionName, + Align Alignment = Align(1)); class CallInst; namespace VFABI { diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp --- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -265,15 +265,15 @@ } void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf, - StringRef SectionName) { - // Embed the buffer into the module. + StringRef SectionName, Align Alignment) { + // Embed the memory buffer into the module. Constant *ModuleConstant = ConstantDataArray::get( M.getContext(), makeArrayRef(Buf.getBufferStart(), Buf.getBufferSize())); GlobalVariable *GV = new GlobalVariable( - M, ModuleConstant->getType(), true, GlobalValue::ExternalLinkage, - ModuleConstant, SectionName.drop_front()); + M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage, + ModuleConstant, "llvm.embedded.object"); GV->setSection(SectionName); - GV->setVisibility(GlobalValue::HiddenVisibility); + GV->setAlignment(Alignment); appendToCompilerUsed(M, GV); }