Index: lld/ELF/Config.h =================================================================== --- lld/ELF/Config.h +++ lld/ELF/Config.h @@ -199,6 +199,7 @@ bool warnIfuncTextrel; bool warnMissingEntry; bool warnSymbolOrdering; + bool watermark; bool writeAddends; bool zCombreloc; bool zCopyreloc; Index: lld/ELF/Driver.cpp =================================================================== --- lld/ELF/Driver.cpp +++ lld/ELF/Driver.cpp @@ -966,6 +966,7 @@ args.hasFlag(OPT_warn_ifunc_textrel, OPT_no_warn_ifunc_textrel, false); config->warnSymbolOrdering = args.hasFlag(OPT_warn_symbol_ordering, OPT_no_warn_symbol_ordering, true); + config->watermark = args.hasFlag(OPT_watermark, OPT_no_watermark, false); config->zCombreloc = getZFlag(args, "combreloc", "nocombreloc", true); config->zCopyreloc = getZFlag(args, "copyreloc", "nocopyreloc", true); config->zGlobal = hasZOption(args, "global"); Index: lld/ELF/InputFiles.cpp =================================================================== --- lld/ELF/InputFiles.cpp +++ lld/ELF/InputFiles.cpp @@ -745,7 +745,7 @@ auto *nhdr = reinterpret_cast(data.data()); if (data.size() < nhdr->getSize()) fatal(toString(obj) + ": .note.gnu.property: section too short"); - + Elf_Note note(*nhdr); if (nhdr->n_type != NT_GNU_PROPERTY_TYPE_0 || note.getName() != "GNU") { data = data.slice(nhdr->getSize()); @@ -979,12 +979,12 @@ name == ".gnu.linkonce.t.__i686.get_pc_thunk.bx") return &InputSection::discarded; - // If we are creating a new .build-id section, strip existing .build-id - // sections so that the output won't have more than one .build-id. + // If we are creating a new .build-id section or watermark, strip existing + // sections so that the output won't have more than one. // This is not usually a problem because input object files normally don't - // have .build-id sections, but you can create such files by - // "ld.{bfd,gold,lld} -r --build-id", and we want to guard against it. - if (name == ".note.gnu.build-id" && config->buildId != BuildIdKind::None) + // have .build-id sections or watermark, but you can create such files by + // "ld.{bfd,gold,lld} -r --build-id/--watermark", and we want to guard against it. + if (name == ".note.gnu.build-id" && config->buildId != BuildIdKind::None || name == ".note.llvm.watermark") return &InputSection::discarded; // The linker merges EH (exception handling) frames and creates a Index: lld/ELF/Options.td =================================================================== --- lld/ELF/Options.td +++ lld/ELF/Options.td @@ -428,6 +428,10 @@ def visual_studio_diagnostics_format : F<"vs-diagnostics">, HelpText<"Format diagnostics for Visual Studio compatibility">; +defm watermark : B<"watermark", + "Enable the computation of a hash for loadable sections", + "Disable the computation of a hash for loadable sections">; + // Aliases def: Separate<["-"], "f">, Alias, HelpText<"Alias for --auxiliary">; def: F<"call_shared">, Alias, HelpText<"Alias for --Bdynamic">; Index: lld/ELF/SyntheticSections.h =================================================================== --- lld/ELF/SyntheticSections.h +++ lld/ELF/SyntheticSections.h @@ -25,6 +25,7 @@ #include "InputSection.h" #include "llvm/ADT/MapVector.h" #include "llvm/MC/StringTableBuilder.h" +#include "llvm/Object/Watermark.h" #include "llvm/Support/Endian.h" #include @@ -174,6 +175,24 @@ uint8_t *hashBuf; }; +// .note.llvm-watermark section. +class WatermarkSection : public SyntheticSection { + llvm::watermark::Watermarker watermarker; + static const unsigned headerSize = 20; +public: + WatermarkSection(); + void writeTo(uint8_t *buf) override; + size_t getSize() const override { + return headerSize + watermarker.getVersionSize() + + watermarker.getHashSize(); + } + void writeWatermark(llvm::ArrayRef buf); + llvm::watermark::Watermarker& getWatermarker() { return watermarker; } + +private: + uint8_t *watermarkBuf; +}; + // BssSection is used to reserve space for copy relocations and common symbols. // We create three instances of this class for .bss, .bss.rel.ro and "COMMON", // that are used for writable symbols, read-only symbols and common symbols, @@ -1134,6 +1153,7 @@ VersionDefinitionSection *verDef; SyntheticSection *verNeed; VersionTableSection *verSym; + WatermarkSection *watermark; unsigned getNumber() const { return this - &partitions[0] + 1; } }; Index: lld/ELF/SyntheticSections.cpp =================================================================== --- lld/ELF/SyntheticSections.cpp +++ lld/ELF/SyntheticSections.cpp @@ -336,6 +336,25 @@ memcpy(hashBuf, buf.data(), hashSize); } +WatermarkSection::WatermarkSection() +:SyntheticSection(0x00, SHT_NOTE, 4, ".note.llvm.watermark") +{} + +void WatermarkSection::writeTo(uint8_t *buf) { + write32(buf, 5); // Name size + write32(buf + 4, watermarker.getVersionSize() + + watermarker.getHashSize()); // Descriptor size + write32(buf + 8, NT_LLVM_WATERMARK); // Type + memcpy(buf + 12, "LLVM\0\0\0", 8); // Name string + write32(buf + 20, watermarker.getVersion()); // Version + watermarkBuf = buf + 20 + watermarker.getVersionSize(); +} + +void WatermarkSection::writeWatermark(ArrayRef buf) { + assert(buf.size() == watermarker.getHashSize()); + memcpy(watermarkBuf, buf.data(), watermarker.getHashSize()); +} + BssSection::BssSection(StringRef name, uint64_t size, uint32_t alignment) : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_NOBITS, alignment, name) { this->bss = true; Index: lld/ELF/Writer.cpp =================================================================== --- lld/ELF/Writer.cpp +++ lld/ELF/Writer.cpp @@ -75,6 +75,7 @@ void writeSections(); void writeSectionsBinary(); void writeBuildId(); + void writeWatermark(); std::unique_ptr &buffer; @@ -385,6 +386,11 @@ add(part.buildId); } + if (config->watermark) { + part.watermark = make(); + add(part.watermark); + } + part.dynStrTab = make(".dynstr", true); part.dynSymTab = make>(*part.dynStrTab); part.dynamic = make>(); @@ -604,9 +610,14 @@ writeSectionsBinary(); } - // Backfill .note.gnu.build-id section content. This is done at last + // Backfill .note.gnu.build-id section content. This is done late // because the content is usually a hash value of the entire output file. writeBuildId(); + + // Backfill the watermark section content. + if (config->watermark) + writeWatermark(); + if (errorCount()) return; @@ -2300,8 +2311,8 @@ // File offsets are not significant for .bss sections other than the first one // in a PT_LOAD. By convention, we keep section offsets monotonically // increasing rather than setting to zero. - if (os->type == SHT_NOBITS) - return off; + if (os->type == SHT_NOBITS) + return off; // If the section is not in a PT_LOAD, we just have to align it. if (!os->ptLoad) @@ -2741,6 +2752,43 @@ part.buildId->writeBuildId(buildId); } +template void Writer::writeWatermark() { + if (!mainPart->watermark || !mainPart->watermark->getParent()) + return; + + watermark::Watermarker &w = mainPart->watermark->getWatermarker(); + + std::vector watermarkSegments = + w.extractSegmentInfo(mainPart->phdrs); + + if (watermarkSegments.empty()) + error("failed to compute watermark: no PT_LOAD segments were found"); + + size_t programHeaderTableOffset = 0; + size_t programHeaderTableSize = 0; + + auto It = std::find_if( + mainPart->phdrs.begin(), mainPart->phdrs.end(), + [](const PhdrEntry *pHdr) { return pHdr->p_type == PT_PHDR; }); + + if (It != mainPart->phdrs.end()) { + programHeaderTableOffset = (*It)->p_offset; + programHeaderTableSize = (*It)->p_filesz; + } + + Expected> watermark = w.computeWatermark( + watermarkSegments, sizeof(typename ELFT::Ehdr), programHeaderTableOffset, + programHeaderTableSize, Out::bufferStart); + + if (!watermark) { + error("failed to compute watermark: " + + llvm::toString(watermark.takeError())); + return; + } + + mainPart->watermark->writeWatermark(*watermark); +} + template void createSyntheticSections(); template void createSyntheticSections(); template void createSyntheticSections(); Index: lld/test/ELF/watermark.s =================================================================== --- /dev/null +++ lld/test/ELF/watermark.s @@ -0,0 +1,24 @@ +## Test that a watermark is placed in the correct section with the correct +## alignment when using --watermark. Check also that the watermark can +## be disabled with --no-watermark and that watermark is disabled by default. + +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t +# RUN: ld.lld %t -o %t.default +# RUN: llvm-readelf -S %t.default | FileCheck -check-prefix=NOWATERMARK %s +# RUN: ld.lld --no-watermark %t -o %t.nowatermark +# RUN: llvm-readelf -S %t.nowatermark | FileCheck -check-prefix=NOWATERMARK %s + +# NOWATERMARK-NOT: Name: .note.llvm.watermark + +# RUN: ld.lld --watermark %t -o %t.watermark +# RUN: llvm-readelf -x .note.llvm.watermark %t.watermark | FileCheck --strict-whitespace -check-prefix=CONTENT %s +# RUN: llvm-readelf -S %t.watermark | FileCheck -check-prefix=SECTION %s + +# SECTION: .note.llvm.watermark NOTE {{[0-9a-f]+}} {{[0-9a-f]+}} 000020 00 0 0 4 +# CONTENT: Hex dump of section '.note.llvm.watermark': +# CONTENT-NEXT: 05000000 0c000000 04000000 4c4c564d ............LLVM +# CONTENT-NEXT: 00000000 01000000 f9ceaa42 d8d7016d ...........B...m + +.globl _start +_start: +nop Index: llvm/include/llvm/BinaryFormat/ELF.h =================================================================== --- llvm/include/llvm/BinaryFormat/ELF.h +++ llvm/include/llvm/BinaryFormat/ELF.h @@ -1420,6 +1420,7 @@ // LLVM-specific notes. enum { NT_LLVM_HWASAN_GLOBALS = 3, + NT_LLVM_WATERMARK = 4, }; // GNU note types Index: llvm/include/llvm/Object/Watermark.h =================================================================== --- /dev/null +++ llvm/include/llvm/Object/Watermark.h @@ -0,0 +1,84 @@ +//===- Watermark.h ----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares functions for calculating a watermark of loadable +// sections. Clients must provide a pointer to the ELF file buffer, plus a +// reference to a vector of program headers. If there is a .llvm.note.watermark +// section then its offset should be provided. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_WATERMARK_H +#define LLVM_WATERMARK_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" + +#include "llvm/Support/Parallel.h" +#include "llvm/Support/xxhash.h" + +#include +#include +#include + +namespace llvm { +namespace watermark { +/// Structure for the information necessary to include a segment in the +/// watermark computation. +struct Segment { + size_t Offset; + size_t Size; + + Segment(size_t Offset, size_t Size) : Offset(Offset), Size(Size) {} +}; + +class Watermarker { + const uint32_t Version = 1u; + const size_t HashSize = 8; + + template ::value>> + void pHdrToSegment(const PHdr &pHdr, std::vector &segments) { + if (pHdr.p_type == PT_LOAD) + segments.emplace_back(pHdr.p_offset, pHdr.p_filesz); + } + + template ::value>> + void pHdrToSegment(const PHdr pHdr, std::vector &segments) { + if (pHdr->p_type == PT_LOAD) + segments.emplace_back(pHdr->p_offset, pHdr->p_filesz); + } + +public: + /// Extracts the information required to calculate the watermark. + template + std::vector extractSegmentInfo(llvm::ArrayRef ProgramHeaders) { + std::vector SegmentInfo; + for (PHdr Header : ProgramHeaders) + pHdrToSegment(Header, SegmentInfo); + + return SegmentInfo; + } + + Watermarker(){}; + size_t getHashSize() const { return HashSize; } + size_t getVersion() const { return Version; } + size_t getVersionSize() const { return sizeof(Version); } + + /// Compute the watermark, omitting the program header table and ELF header + llvm::Expected> + computeWatermark(std::vector &InputSegments, size_t ElfHeaderSize, + size_t ProgramHeaderTableOffset, + size_t ProgramHeaderTableSize, const uint8_t *Data); +}; +} // namespace watermark +} // namespace llvm +#endif // LLVM_WATERMARK_H Index: llvm/lib/Object/CMakeLists.txt =================================================================== --- llvm/lib/Object/CMakeLists.txt +++ llvm/lib/Object/CMakeLists.txt @@ -24,6 +24,7 @@ TapiFile.cpp TapiUniversal.cpp WasmObjectFile.cpp + Watermark.cpp WindowsMachineFlag.cpp WindowsResource.cpp XCOFFObjectFile.cpp Index: llvm/lib/Object/Watermark.cpp =================================================================== --- /dev/null +++ llvm/lib/Object/Watermark.cpp @@ -0,0 +1,96 @@ +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Object/Watermark.h" + +namespace llvm { +namespace watermark { + +static std::vector> split(llvm::ArrayRef Arr, + size_t ChunkSize) { + std::vector> Ret; + while (Arr.size() > ChunkSize) { + Ret.push_back(Arr.take_front(ChunkSize)); + Arr = Arr.drop_front(ChunkSize); + } + if (!Arr.empty()) + Ret.push_back(Arr); + return Ret; +} + +static void computeHash(llvm::MutableArrayRef HashDest, size_t HashSize, + llvm::ArrayRef Data) { + const size_t ChunkSize = 1024 * 1024; + std::vector> InputChunks = split(Data, ChunkSize); + std::vector ChunkHashes(InputChunks.size() * HashSize); + + for_each_n(llvm::parallel::par, (size_t)0, InputChunks.size(), [&](size_t i) { + llvm::support::endian::write64le(ChunkHashes.data() + i * HashSize, + xxHash64(InputChunks[i])); + }); + + llvm::support::endian::write64le(HashDest.data(), xxHash64(ChunkHashes)); +} + +static void omitRangeFromSegments(std::vector &InputSegments, + size_t RangeFirst, size_t RangeSize) { + size_t RangeLast = RangeFirst + RangeSize; + + for (unsigned I = 0; I < InputSegments.size(); I++) { + Segment *pHdr = &InputSegments[I]; + size_t SegmentFirst = pHdr->Offset; + size_t SegmentLast = SegmentFirst + pHdr->Size; + + if (RangeFirst >= SegmentLast || RangeLast <= SegmentFirst) + continue; + + if (RangeFirst >= SegmentFirst) { + size_t SegmentSize = pHdr->Size; + pHdr->Size = RangeFirst - SegmentFirst; + + // Section cleaves a segment. + if (RangeLast < SegmentLast) { + size_t Offset = RangeLast; + size_t Size = SegmentSize - pHdr->Size - RangeSize; + InputSegments.emplace(InputSegments.begin() + I + 1, Offset, Size); + } + } else { // Section ends within segment or at segment end. + pHdr->Offset = std::min(SegmentLast, RangeLast); + pHdr->Size = (RangeLast >= SegmentLast) ? 0 : SegmentLast - RangeLast; + } + } +} + +llvm::Expected> +Watermarker::computeWatermark(std::vector& InputSegments, size_t ElfHeaderSize,size_t ProgramHeaderTableOffset, + size_t ProgramHeaderTableSize, const uint8_t *Data) { + + // Ensure we don't include the program header + // table or ELF header, as these may be + // altered by tools such as objcopy. + omitRangeFromSegments(InputSegments, (size_t) 0, ElfHeaderSize); + + if (ProgramHeaderTableSize > 0) + omitRangeFromSegments(InputSegments, ProgramHeaderTableOffset, + ProgramHeaderTableSize); + + std::vector InputSegmentWatermarks(InputSegments.size() * HashSize); + + for_each_n( + llvm::parallel::par, (size_t) 0, InputSegments.size(), [&](size_t I) { + Segment Seg = InputSegments[I]; + + if (Seg.Size > 0) { + llvm::ArrayRef SegmentData(Data + Seg.Offset, + Seg.Size); + computeHash(InputSegmentWatermarks[I * HashSize], HashSize, SegmentData); + } + }); + + std::vector FinalWatermark(HashSize); + computeHash(FinalWatermark, HashSize, + llvm::ArrayRef(InputSegmentWatermarks.data(), InputSegmentWatermarks.size())); + + return FinalWatermark; +} + +} // namespace watermark +} // namespace llvm Index: llvm/test/Object/watermark.test =================================================================== --- /dev/null +++ llvm/test/Object/watermark.test @@ -0,0 +1,109 @@ +## Ensure that the watermark calculation is dependant on PT_LOAD semgents, +## that both the ELF header and program header table can be modified without +## affecting the watermark, and that changing the order of the segments changes +## the watermark. + +# RUN: yaml2obj --docnum=1 %s > %t.1 +# RUN: llvm-readobj --compute-watermark %t.1 | FileCheck %s --check-prefix=SAME-WATERMARK +# RUN: yaml2obj --docnum=2 %s > %t.2 +# RUN: llvm-readobj --compute-watermark %t.2 | FileCheck %s --check-prefix=SAME-WATERMARK + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 + Entry: 0x0000000000400000 +Sections: + - Name: .fill1 + Type: SHT_PROGBITS + Size: 4 + Address: 0x100 + Content: aaaaaaaa + - Name: .fill2 + Type: SHT_PROGBITS + Size: 4 + Address: 0x200 + Content: bbbbbbbb +ProgramHeaders: + - Type: PT_LOAD + FileSize: 4 + VAddr: 0x100 + Sections: + - Section: .fill1 + - Type: PT_LOAD + FileSize: 4 + VAddr: 0x200 + Sections: + - Section: .fill2 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_NONE + Machine: EM_X86_64 + Entry: 0x0000000000400000 +Sections: + - Name: .fill1 + Type: SHT_PROGBITS + Content: aaaaaaaa + - Name: .fill2 + Type: SHT_PROGBITS + Content: bbbbbbbb + - Name: .fill3 + Type: SHT_PROGBITS + Content: cccccccc +ProgramHeaders: + - Type: PT_LOAD + FileSize: 4 + Sections: + - Section: .fill1 + - Type: PT_NOTE + FileSize: 4 + Sections: + - Section: .fill3 + - Type: PT_LOAD + FileSize: 4 + Sections: + - Section: .fill2 + +# SAME-WATERMARK: Computed loadable segments watermark { +# SAME-WATERMARK: Version: 1 +# SAME-WATERMARK: Value: 0x1237491EA4CA8E6F + +# RUN: yaml2obj --docnum=3 %s > %t.3 +# RUN: llvm-readobj --compute-watermark %t.3 | FileCheck %s --check-prefix=ORDER-WATERMARK + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 + Entry: 0x0000000000400000 +Sections: + - Name: .fill1 + Type: SHT_PROGBITS + Size: 4 + Address: 0x200 + Content: aaaaaaaa + - Name: .fill2 + Type: SHT_PROGBITS + Size: 4 + Address: 0x100 + Content: bbbbbbbb +ProgramHeaders: + - Type: PT_LOAD + FileSize: 4 + Sections: + - Section: .fill2 + - Type: PT_LOAD + FileSize: 4 + Sections: + - Section: .fill1 + +# ORDER-WATERMARK: Computed loadable segments watermark { +# ORDER-WATERMARK: Version: 1 +# ORDER-WATERMARK: Value: 0xF1231786169E8DBB