diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -265,16 +265,25 @@ auto *buf = reinterpret_cast(mb.getBufferStart()); for (const Section &sec : sections) { - if (config->mergeLiterals && sectionType(sec.flags) == S_CSTRING_LITERALS) { + if (config->mergeLiterals && + (sectionType(sec.flags) == S_CSTRING_LITERALS || + isWordLiteralSection(sec.flags))) { if (sec.nreloc) fatal(toString(this) + " contains relocations in " + sec.segname + "," + sec.sectname + ", so LLD cannot do literal merging. Try re-running with " "--no-literal-merge."); - auto *isec = make(); - parseSection(this, buf, sec, isec); - isec->splitIntoPieces(); // FIXME: parallelize this? + InputSection *isec; + if (sectionType(sec.flags) == S_CSTRING_LITERALS) { + isec = make(); + parseSection(this, buf, sec, isec); + // FIXME: parallelize this? + cast(isec)->splitIntoPieces(); + } else { + isec = make(); + parseSection(this, buf, sec, isec); + } subsections.push_back({{0, isec}}); } else { auto *isec = make(); diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -27,6 +27,7 @@ enum Kind { ConcatKind, CStringLiteralKind, + WordLiteralKind, }; Kind kind() const { return sectionKind; } @@ -141,6 +142,17 @@ std::vector pieces; }; +class WordLiteralInputSection : public InputSection { +public: + WordLiteralInputSection() : InputSection(WordLiteralKind) {} + uint64_t getFileOffset(uint64_t off) const override; + uint64_t getOffset(uint64_t off) const override; + + static bool classof(const InputSection *isec) { + return isec->kind() == WordLiteralKind; + } +}; + inline uint8_t sectionType(uint32_t flags) { return flags & llvm::MachO::SECTION_TYPE; } @@ -164,6 +176,12 @@ llvm::MachO::S_ATTR_DEBUG; } +inline bool isWordLiteralSection(uint32_t flags) { + return sectionType(flags) == llvm::MachO::S_4BYTE_LITERALS || + sectionType(flags) == llvm::MachO::S_8BYTE_LITERALS || + sectionType(flags) == llvm::MachO::S_16BYTE_LITERALS; +} + bool isCodeSection(const InputSection *); extern std::vector inputSections; @@ -192,6 +210,7 @@ constexpr const char const_[] = "__const"; constexpr const char lazySymbolPtr[] = "__la_symbol_ptr"; constexpr const char lazyBinding[] = "__lazy_binding"; +constexpr const char literals[] = "__literals"; constexpr const char moduleInitFunc[] = "__mod_init_func"; constexpr const char moduleTermFunc[] = "__mod_term_func"; constexpr const char nonLazySymbolPtr[] = "__nl_symbol_ptr"; diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -127,6 +127,25 @@ return piece.outSecOff + addend; } +uint64_t WordLiteralInputSection::getFileOffset(uint64_t off) const { + return parent->fileOff + getOffset(off); +} + +uint64_t WordLiteralInputSection::getOffset(uint64_t off) const { + auto *osec = cast(parent); + const uint8_t *buf = data.data(); + switch (sectionType(flags)) { + case S_4BYTE_LITERALS: + return osec->getLiteral4Offset(buf + off); + case S_8BYTE_LITERALS: + return osec->getLiteral8Offset(buf + off); + case S_16BYTE_LITERALS: + return osec->getLiteral16Offset(buf + off); + default: + llvm_unreachable("invalid literal section type"); + } +} + bool macho::isCodeSection(const InputSection *isec) { uint32_t type = isec->flags & SECTION_TYPE; if (type != S_REGULAR && type != S_COALESCED) diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -22,6 +22,8 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include + namespace llvm { class DWARFUnit; } // namespace llvm @@ -297,6 +299,7 @@ // have a corresponding entry in the LazyPointerSection. bool addEntry(Symbol *); uint64_t getVA(uint32_t stubsIndex) const { + assert(isFinal || target->usesThunks()); // ConcatOutputSection::finalize() can seek the address of a // stub before its address is assigned. Before __stubs is // finalized, return a contrived out-of-range address. @@ -531,9 +534,61 @@ llvm::StringTableBuilder builder; }; +/* + * This section contains deduplicated literal values. The 16-byte values are + * laid out first, followed by the 8- and then the 4-byte ones. + */ +class WordLiteralSection : public SyntheticSection { +public: + using UInt128 = std::pair; + // I don't think the standard guarantees the size of a pair, so let's make + // sure it's exact -- that way we can construct it via `mmap`. + static_assert(sizeof(UInt128) == 16, ""); + + WordLiteralSection(); + void addInput(WordLiteralInputSection *); + void writeTo(uint8_t *buf) const override; + + uint64_t getSize() const override { + return literal16Map.size() * 16 + literal8Map.size() * 8 + + literal4Map.size() * 4; + } + + bool isNeeded() const override { + return !literal16Map.empty() || !literal4Map.empty() || + !literal8Map.empty(); + } + + uint64_t getLiteral16Offset(const uint8_t *buf) const { + return literal16Map.at(*reinterpret_cast(buf)) * 16; + } + + uint64_t getLiteral8Offset(const uint8_t *buf) const { + return literal16Map.size() * 16 + + literal8Map.at(*reinterpret_cast(buf)) * 8; + } + + uint64_t getLiteral4Offset(const uint8_t *buf) const { + return literal16Map.size() * 16 + literal8Map.size() * 8 + + literal4Map.at(*reinterpret_cast(buf)) * 4; + } + +private: + template struct Hasher { + llvm::hash_code operator()(T v) const { return llvm::hash_value(v); } + }; + // We're using unordered_map instead of DenseMap here because we need to + // support all possible integer values -- there are no suitable tombstone + // values for DenseMap. + std::unordered_map> literal16Map; + std::unordered_map literal8Map; + std::unordered_map literal4Map; +}; + struct InStruct { MachHeaderSection *header = nullptr; CStringSection *cStringSection = nullptr; + WordLiteralSection *wordLiteralSection = nullptr; RebaseSection *rebase = nullptr; BindingSection *binding = nullptr; WeakBindingSection *weakBinding = nullptr; diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -1103,6 +1103,63 @@ } } +// This section is actually emitted as __TEXT,__const by ld64, but clang may +// emit input sections of that name, and LLD doesn't currently support mixing +// synthetic and concat-type OutputSections. To work around this, I've given +// our merged-literals section a different name. +WordLiteralSection::WordLiteralSection() + : SyntheticSection(segment_names::text, section_names::literals) {} + +void WordLiteralSection::addInput(WordLiteralInputSection *isec) { + isec->parent = this; + align = std::max(align, isec->align); + // We do all processing of the InputSection here, so it will be effectively + // finalized. + isec->isFinal = true; + const uint8_t *buf = isec->data.data(); + switch (sectionType(isec->flags)) { + case S_4BYTE_LITERALS: { + for (size_t i = 0, e = isec->data.size() / 4; i < e; ++i) { + uint32_t value = *reinterpret_cast(buf + i * 4); + literal4Map.emplace(value, literal4Map.size()); + } + break; + } + case S_8BYTE_LITERALS: { + for (size_t i = 0, e = isec->data.size() / 8; i < e; ++i) { + uint64_t value = *reinterpret_cast(buf + i * 8); + literal8Map.emplace(value, literal8Map.size()); + } + break; + } + case S_16BYTE_LITERALS: { + for (size_t i = 0, e = isec->data.size() / 16; i < e; ++i) { + UInt128 value = *reinterpret_cast(buf + i * 16); + literal16Map.emplace(value, literal16Map.size()); + } + break; + } + default: + llvm_unreachable("invalid literal section type"); + } +} + +void WordLiteralSection::writeTo(uint8_t *buf) const { + // Note that we don't attempt to do any endianness conversion in addInput(), + // so we don't do it here either -- just write out the original value, + // byte-for-byte. + for (const auto &p : literal16Map) + memcpy(buf + p.second * 16, &p.first, 16); + buf += literal16Map.size() * 16; + + for (const auto &p : literal8Map) + memcpy(buf + p.second * 8, &p.first, 8); + buf += literal8Map.size() * 8; + + for (const auto &p : literal4Map) + memcpy(buf + p.second * 4, &p.first, 4); +} + void macho::createSyntheticSymbols() { auto addHeaderSymbol = [](const char *name) { symtab->addSynthetic(name, in.header->isec, /*value=*/0, diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -828,19 +828,24 @@ InputSection *isec = p.value(); if (isec->shouldOmitFromOutput()) continue; + OutputSection *osec; if (auto *concatIsec = dyn_cast(isec)) { NamePair names = maybeRenameSection({isec->segname, isec->name}); - ConcatOutputSection *&osec = concatOutputSections[names]; - if (osec == nullptr) { - osec = make(names.second); - osec->inputOrder = p.index(); - } - osec->addInput(concatIsec); + ConcatOutputSection *&concatOsec = concatOutputSections[names]; + if (concatOsec == nullptr) + concatOsec = make(names.second); + concatOsec->addInput(concatIsec); + osec = concatOsec; } else if (auto *cStringIsec = dyn_cast(isec)) { - if (in.cStringSection->inputs.empty()) - in.cStringSection->inputOrder = p.index(); in.cStringSection->addInput(cStringIsec); + osec = in.cStringSection; + } else if (auto *litIsec = dyn_cast(isec)) { + in.wordLiteralSection->addInput(litIsec); + osec = in.wordLiteralSection; + } else { + llvm_unreachable("unhandled InputSection type"); } + osec->inputOrder = std::min(osec->inputOrder, static_cast(p.index())); } // Once all the inputs are added, we can finalize the output section @@ -1015,6 +1020,8 @@ void macho::createSyntheticSections() { in.header = make(); in.cStringSection = config->mergeLiterals ? make() : nullptr; + in.wordLiteralSection = + config->mergeLiterals ? make() : nullptr; in.rebase = make(); in.binding = make(); in.weakBinding = make(); diff --git a/lld/test/MachO/literal-merging.s b/lld/test/MachO/literal-merging.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/literal-merging.s @@ -0,0 +1,110 @@ +# REQUIRES: x86 +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/qux.s -o %t/qux.o +# RUN: %lld -dylib %t/test.o %t/qux.o -o %t/test +# RUN: llvm-objdump --macho --section="__TEXT,__literals" --section="__DATA,ptrs" --syms %t/test | FileCheck %s +# RUN: llvm-readobj --section-headers %t/test | FileCheck %s --check-prefix=HEADER + +# CHECK: Contents of (__TEXT,__literals) section +# CHECK-NEXT: [[#%.16x,DEADBEEF16:]] ef be ad de ef be ad de ef be ad de ef be ad de +# CHECK-NEXT: [[#%.16x,FEEDFACE16:]] ce fa ed fe ce fa ed fe ce fa ed fe ce fa ed fe +# CHECK-NEXT: [[#%.16x,DEADBEEF8:]] ef be ad de ef be ad de ce fa ed fe ce fa ed fe +# CHECK-NEXT: [[#%.16x,DEADBEEF4:]] ef be ad de ce fa ed fe +# CHECK-NEXT: Contents of (__DATA,ptrs) section +# CHECK-NEXT: 0000000000001000 0x[[#%x,DEADBEEF16]] +# CHECK-NEXT: 0000000000001008 0x[[#%x,DEADBEEF16]] +# CHECK-NEXT: 0000000000001010 0x[[#%x,FEEDFACE16]] +# CHECK-NEXT: 0000000000001018 0x[[#%x,DEADBEEF16]] +# CHECK-NEXT: 0000000000001020 0x[[#%x,DEADBEEF8]] +# CHECK-NEXT: 0000000000001028 0x[[#%x,DEADBEEF8]] +# CHECK-NEXT: 0000000000001030 0x[[#%x,DEADBEEF8 + 8]] +# CHECK-NEXT: 0000000000001038 0x[[#%x,DEADBEEF8]] +# CHECK-NEXT: 0000000000001040 0x[[#%x,DEADBEEF4]] +# CHECK-NEXT: 0000000000001048 0x[[#%x,DEADBEEF4]] +# CHECK-NEXT: 0000000000001050 0x[[#%x,DEADBEEF4 + 4]] +# CHECK-NEXT: 0000000000001058 0x[[#%x,DEADBEEF4]] + +## Make sure the symbol addresses are correct too. +# CHECK: SYMBOL TABLE: +# CHECK-DAG: [[#DEADBEEF16]] g O __TEXT,__literals _qux16 +# CHECK-DAG: [[#DEADBEEF8]] g O __TEXT,__literals _qux8 +# CHECK-DAG: [[#DEADBEEF4]] g O __TEXT,__literals _qux4 + +## Make sure we set the right alignment and flags. +# HEADER: Name: __literals +# HEADER-NEXT: Segment: __TEXT +# HEADER-NEXT: Address: +# HEADER-NEXT: Size: +# HEADER-NEXT: Offset: +# HEADER-NEXT: Alignment: 4 +# HEADER-NEXT: RelocationOffset: +# HEADER-NEXT: RelocationCount: 0 +# HEADER-NEXT: Type: Regular +# HEADER-NEXT: Attributes [ (0x0) +# HEADER-NEXT: ] +# HEADER-NEXT: Reserved1: 0x0 +# HEADER-NEXT: Reserved2: 0x0 +# HEADER-NEXT: Reserved3: 0x0 + +#--- test.s +.literal4 +.p2align 2 +L._foo4: + .long 0xdeadbeef +L._bar4: + .long 0xdeadbeef +L._baz4: + .long 0xfeedface + +.literal8 +L._foo8: + .quad 0xdeadbeefdeadbeef +L._bar8: + .quad 0xdeadbeefdeadbeef +L._baz8: + .quad 0xfeedfacefeedface + +.literal16 +L._foo16: + .quad 0xdeadbeefdeadbeef + .quad 0xdeadbeefdeadbeef +L._bar16: + .quad 0xdeadbeefdeadbeef + .quad 0xdeadbeefdeadbeef +L._baz16: + .quad 0xfeedfacefeedface + .quad 0xfeedfacefeedface + +.section __DATA,ptrs,literal_pointers +.quad L._foo16 +.quad L._bar16 +.quad L._baz16 +.quad _qux16 + +.quad L._foo8 +.quad L._bar8 +.quad L._baz8 +.quad _qux8 + +.quad L._foo4 +.quad L._bar4 +.quad L._baz4 +.quad _qux4 + +#--- qux.s +.globl _qux4, _qux8, _qux16 + +.literal4 +.p2align 2 +_qux4: + .long 0xdeadbeef + +.literal8 +_qux8: + .quad 0xdeadbeefdeadbeef + +.literal16 +_qux16: + .quad 0xdeadbeefdeadbeef + .quad 0xdeadbeefdeadbeef diff --git a/lld/test/MachO/mattrs.ll b/lld/test/MachO/mattrs.ll --- a/lld/test/MachO/mattrs.ll +++ b/lld/test/MachO/mattrs.ll @@ -3,7 +3,7 @@ ;; Verify that LTO behavior can be tweaked using -mattr. -; RUN: %lld -mcpu haswell -mllvm -mattr=+fma %t.o -o %t.dylib -dylib +; RUN: %lld -save-temps -mcpu haswell -mllvm -mattr=+fma %t.o -o %t.dylib -dylib ; RUN: llvm-objdump -d --section="__text" --no-leading-addr --no-show-raw-insn %t.dylib | FileCheck %s --check-prefix=FMA ; RUN: %lld -mcpu haswell -mllvm -mattr=-fma %t.o -o %t.dylib -dylib @@ -11,14 +11,14 @@ ; FMA: <_foo>: ; FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 -; FMA-NEXT: vfmsub213ss 7(%rip), %xmm1, %xmm0 +; FMA-NEXT: vfmsub213ss [[#]](%rip), %xmm1, %xmm0 ; FMA-NEXT: vfnmadd132ss %xmm1, %xmm1, %xmm0 ; FMA-NEXT: retq ; NO-FMA: <_foo>: ; NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 ; NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 -; NO-FMA-NEXT: vmovss 16(%rip), %xmm2 +; NO-FMA-NEXT: vmovss [[#]](%rip), %xmm2 ; NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0 ; NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 ; NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0