diff --git a/lld/test/wasm/build-id.test b/lld/test/wasm/build-id.test new file mode 100644 --- /dev/null +++ b/lld/test/wasm/build-id.test @@ -0,0 +1,60 @@ +# RUN: llvm-mc -filetype=obj -triple=wasm32 %p/Inputs/start.s -o %t + +# RUN: wasm-ld --build-id %t -o %t2 +# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=DEFAULT %s +# RUN: wasm-ld --build-id=fast %t -o %t2 +# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=DEFAULT %s +# RUN: wasm-ld --build-id %t -o %t2 --threads=1 +# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=DEFAULT %s + +# RUN: wasm-ld --build-id=sha1 %t -o %t2 +# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=SHA1 %s +# RUN: wasm-ld --build-id=sha1 %t -o %t2 --threads=1 +# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=SHA1 %s + +# RUN: wasm-ld --build-id=tree %t -o %t2 +# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=SHA1 %s +# RUN: wasm-ld --build-id=tree %t -o %t2 --threads=1 +# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=SHA1 %s + +# RUN: wasm-ld --build-id=uuid %t -o %t2 +# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=UUID %s + +# RUN: wasm-ld --build-id=0x12345678 %t -o %t2 +# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=HEX %s + +# RUN: wasm-ld %t -o %t2 +# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=NONE %s + +# RUN: wasm-ld --build-id=sha1 --build-id=none %t -o %t2 +# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=NONE %s +# RUN: wasm-ld --build-id --build-id=none %t -o %t2 +# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=NONE %s +# RUN: wasm-ld --build-id=none --build-id %t -o %t2 +# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=DEFAULT %s + +.section .data.foo,"",@ + .globl foo + .hidden foo + .p2align 2 +foo: + .int32 1 + .size foo, 4 + + +# DEFAULT: Contents of section build_id: +# DEFAULT-NEXT: 0000 10cdbf99 f76b1f5e ebb2f36a 1bde1d6c .....k.^...j...l +# DEFAULT-NEXT: 0010 01 + +# SHA1: Contents of section build_id: +# SHA1-NEXT: 0000 14ad22e8 54d72438 94af85de 3c5592bd ..".T.$8....<U.. +# SHA1-NEXT: 0010 1b5ec96f 6b .^.ok + +# UUID: Contents of section build_id: +# UUID-NEXT: 0000 10 + +# HEX: Contents of section build_id: +# HEX-NEXT: 0000 04123456 78 ..4Vx + + +# NONE-NOT: Contents of section build_id: diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h --- a/lld/wasm/Config.h +++ b/lld/wasm/Config.h @@ -26,6 +26,9 @@ // For --unresolved-symbols. enum class UnresolvedPolicy { ReportError, Warn, Ignore, ImportDynamic }; +// For --build-id. +enum class BuildIdKind { None, Fast, Sha1, Hexstring, Uuid }; + // This struct contains the global configuration for the linker. // Most fields are direct mapping from the command line options // and such fields have the same name as the corresponding options. @@ -72,6 +75,7 @@ llvm::StringRef thinLTOJobs; bool ltoDebugPassManager; UnresolvedPolicy unresolvedSymbols; + BuildIdKind buildId = BuildIdKind::None; llvm::StringRef entry; llvm::StringRef mapFile; @@ -85,6 +89,7 @@ llvm::CachePruningPolicy thinLTOCachePolicy; std::optional<std::vector<std::string>> features; std::optional<std::vector<std::string>> extraFeatures; + llvm::SmallVector<uint8_t, 0> buildIdVector; // The following config options do not directly correspond to any // particular command line options. diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -385,6 +385,33 @@ return errorOrWarn; } +// Parse --build-id or --build-id=<style>. We handle "tree" as a +// synonym for "sha1" because all our hash functions including +// -build-id=sha1 are actually tree hashes for performance reasons. +static std::pair<BuildIdKind, SmallVector<uint8_t, 0>> +getBuildId(opt::InputArgList &args) { + auto *arg = args.getLastArg(OPT_build_id, OPT_build_id_eq); + if (!arg) + return {BuildIdKind::None, {}}; + + if (arg->getOption().getID() == OPT_build_id) + return {BuildIdKind::Fast, {}}; + + StringRef s = arg->getValue(); + if (s == "fast") + return {BuildIdKind::Fast, {}}; + if (s == "sha1" || s == "tree") + return {BuildIdKind::Sha1, {}}; + if (s == "uuid") + return {BuildIdKind::Uuid, {}}; + if (s.startswith("0x")) + return {BuildIdKind::Hexstring, parseHex(s.substr(2))}; + + if (s != "none") + error("unknown --build-id style: " + s); + return {BuildIdKind::None, {}}; +} + // Initializes Config members by the command line options. static void readConfigs(opt::InputArgList &args) { config->bsymbolic = args.hasArg(OPT_Bsymbolic); @@ -519,6 +546,8 @@ if (args.hasArg(OPT_print_map)) config->mapFile = "-"; + + std::tie(config->buildId, config->buildIdVector) = getBuildId(args); } // Some Config members do not directly correspond to any particular diff --git a/lld/wasm/Options.td b/lld/wasm/Options.td --- a/lld/wasm/Options.td +++ b/lld/wasm/Options.td @@ -42,6 +42,11 @@ def Bstatic: F<"Bstatic">, HelpText<"Do not link against shared libraries">; +def build_id: F<"build-id">, HelpText<"Alias for --build-id=fast">; + +def build_id_eq: J<"build-id=">, HelpText<"Generate build ID note">, + MetaVarName<"[fast,sha1,uuid,0x<hexstring>]">; + defm color_diagnostics: B<"color-diagnostics", "Alias for --color-diagnostics=always", "Alias for --color-diagnostics=never">; diff --git a/lld/wasm/SyntheticSections.h b/lld/wasm/SyntheticSections.h --- a/lld/wasm/SyntheticSections.h +++ b/lld/wasm/SyntheticSections.h @@ -429,6 +429,35 @@ OutputSection *sec; }; +class BuildIdSection : public SyntheticSection { +public: + BuildIdSection(); + void writeBody() override; + bool isNeeded() const override { + return config->buildId != BuildIdKind::None; + } + void writeBuildId(llvm::ArrayRef<uint8_t> buf); + void writeTo(uint8_t *buf) override { + LLVM_DEBUG(llvm::dbgs() + << "BuildId writeto buf " << buf << " offset " << offset + << " headersize " << header.size() << '\n'); + // The actual build ID is derived from a hash of all of the output + // sections, so it can't be calculated until they are written. Here + // we write the section leaving zeros in place of the hash. + SyntheticSection::writeTo(buf); + // Calculate and store the location where the hash will be written. + hashPlaceholderPtr = buf + offset + header.size() + + +sizeof(buildIdSectionName) /*name string*/ + + 1 /* hash size */; + } + + const uint32_t hashSize; + +private: + static constexpr char buildIdSectionName[] = "build_id"; + uint8_t *hashPlaceholderPtr = nullptr; +}; + // Linker generated output sections struct OutStruct { DylinkSection *dylinkSec; @@ -447,6 +476,7 @@ NameSection *nameSec; ProducersSection *producersSec; TargetFeaturesSection *targetFeaturesSec; + BuildIdSection *buildIdSec; }; extern OutStruct out; diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp --- a/lld/wasm/SyntheticSections.cpp +++ b/lld/wasm/SyntheticSections.cpp @@ -888,5 +888,39 @@ sec->writeRelocations(bodyOutputStream); } +static size_t getHashSize() { + switch (config->buildId) { + case BuildIdKind::Fast: + case BuildIdKind::Uuid: + return 16; + case BuildIdKind::Sha1: + return 20; + case BuildIdKind::Hexstring: + return config->buildIdVector.size(); + case BuildIdKind::None: + return 0; + } +} + +BuildIdSection::BuildIdSection() + : SyntheticSection(llvm::wasm::WASM_SEC_CUSTOM, buildIdSectionName), + hashSize(getHashSize()) {} + +void BuildIdSection::writeBody() { + LLVM_DEBUG(llvm::dbgs() << "BuildId writebody\n"); + // Write hash size + auto &os = bodyOutputStream; + writeUleb128(os, hashSize, "build id size"); + writeBytes(os, std::vector<char>(hashSize, ' ').data(), hashSize, + "placeholder"); +} + +void BuildIdSection::writeBuildId(llvm::ArrayRef<uint8_t> buf) { + assert(buf.size() == hashSize); + LLVM_DEBUG(dbgs() << "buildid write " << buf.size() << " " + << hashPlaceholderPtr << '\n'); + memcpy(hashPlaceholderPtr, buf.data(), hashSize); +} + } // namespace wasm } // namespace lld diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -17,8 +17,10 @@ #include "SymbolTable.h" #include "SyntheticSections.h" #include "WriterUtils.h" +#include "lld/Common/Arrays.h" #include "lld/Common/CommonLinkerContext.h" #include "lld/Common/Strings.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -30,6 +32,9 @@ #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/Parallel.h" +#include "llvm/Support/RandomNumberGenerator.h" +#include "llvm/Support/SHA1.h" +#include "llvm/Support/xxhash.h" #include <cstdarg> #include <map> @@ -103,6 +108,7 @@ void writeHeader(); void writeSections(); + void writeBuildId(); uint64_t fileSize = 0; @@ -219,6 +225,93 @@ }); } +// Computes a hash value of Data using a given hash function. +// In order to utilize multiple cores, we first split data into 1MB +// chunks, compute a hash for each chunk, and then compute a hash value +// of the hash values. + +static void +computeHash(llvm::MutableArrayRef<uint8_t> hashBuf, + llvm::ArrayRef<uint8_t> data, + std::function<void(uint8_t *dest, ArrayRef<uint8_t> arr)> hashFn) { + std::vector<ArrayRef<uint8_t>> chunks = split(data, 1024 * 1024); + std::vector<uint8_t> hashes(chunks.size() * hashBuf.size()); + + // Compute hash values. + parallelFor(0, chunks.size(), [&](size_t i) { + hashFn(hashes.data() + i * hashBuf.size(), chunks[i]); + }); + + // Write to the final output buffer. + hashFn(hashBuf.data(), hashes); +} + +static void makeUUID(unsigned version, llvm::ArrayRef<uint8_t> fileHash, + llvm::MutableArrayRef<uint8_t> output) { + assert(version == 4 || version == 5 && "Unknown UUID version"); + assert(output.size() == 16 && "Wrong size for UUID output"); + if (version == 5) { + // Build a valid v5 UUID from a hardcoded (randomly-generated) namespace + // UUID, and the computed hash of the output. + std::array<uint8_t, 16> namespaceUUID{0xA1, 0xFA, 0x48, 0x2D, 0x0E, 0x22, + 0x03, 0x8D, 0x33, 0x8B, 0x52, 0x1C, + 0xD6, 0xD2, 0x12, 0xB2}; + SHA1 sha; + sha.update(namespaceUUID); + sha.update(fileHash); + auto s = sha.final(); + std::copy(static_cast<uint8_t*>(s.begin()), + // Explicit cast to make MSVC happy + static_cast<uint8_t *>(&s[output.size()]), output.begin()); + } else if (version == 4) { + if (auto ec = llvm::getRandomBytes(output.data(), output.size())) + error("entropy source failure: " + ec.message()); + } + // Set the UUID version and variant fields. + // The version is the upper nibble of byte 6 (0b0101xxxx or 0b0100xxxx) + output[6] = (static_cast<uint8_t>(version) << 4) | (output[6] & 0xF); + + // The variant is DCE 1.1/ISO 11578 (0b10xxxxxx) + output[8] &= 0xBF; + output[8] |= 0x80; +} + +void Writer::writeBuildId() { + if (!out.buildIdSec->isNeeded()) + return; + if (config->buildId == BuildIdKind::Hexstring) { + out.buildIdSec->writeBuildId(config->buildIdVector); + return; + } + + // Compute a hash of all sections of the output file. + size_t hashSize = out.buildIdSec->hashSize; + std::vector<uint8_t> buildId(hashSize); + llvm::ArrayRef<uint8_t> buf{buffer->getBufferStart(), size_t(fileSize)}; + + switch (config->buildId) { + case BuildIdKind::Fast: { + std::vector<uint8_t> fileHash(8); + computeHash(fileHash, buf, [](uint8_t *dest, ArrayRef<uint8_t> arr) { + support::endian::write64le(dest, xxHash64(arr)); + }); + makeUUID(5, fileHash, buildId); + break; + } + case BuildIdKind::Sha1: + computeHash(buildId, buf, [&](uint8_t *dest, ArrayRef<uint8_t> arr) { + memcpy(dest, SHA1::hash(arr).data(), hashSize); + }); + break; + case BuildIdKind::Uuid: + makeUUID(4, {}, buildId); + break; + default: + llvm_unreachable("unknown BuildIdKind"); + } + out.buildIdSec->writeBuildId(buildId); +} + static void setGlobalPtr(DefinedGlobal *g, uint64_t memoryPtr) { LLVM_DEBUG(dbgs() << "setGlobalPtr " << g->getName() << " -> " << memoryPtr << "\n"); g->global->setPointerValue(memoryPtr); @@ -456,6 +549,7 @@ addSection(out.nameSec); addSection(out.producersSec); addSection(out.targetFeaturesSec); + addSection(out.buildIdSec); } void Writer::finalizeSections() { @@ -1577,6 +1671,7 @@ out.elemSec = make<ElemSection>(); out.producersSec = make<ProducersSection>(); out.targetFeaturesSec = make<TargetFeaturesSection>(); + out.buildIdSec = make<BuildIdSection>(); } void Writer::createSyntheticSectionsPostLayout() { @@ -1738,6 +1833,7 @@ log("-- writeSections"); writeSections(); + writeBuildId(); if (errorCount()) return;