diff --git a/lld/MachO/ConcatOutputSection.h b/lld/MachO/ConcatOutputSection.h --- a/lld/MachO/ConcatOutputSection.h +++ b/lld/MachO/ConcatOutputSection.h @@ -29,22 +29,22 @@ explicit ConcatOutputSection(StringRef name) : OutputSection(ConcatKind, name) {} - const InputSection *firstSection() const { return inputs.front(); } - const InputSection *lastSection() const { return inputs.back(); } + const ConcatInputSection *firstSection() const { return inputs.front(); } + const ConcatInputSection *lastSection() const { return inputs.back(); } // These accessors will only be valid after finalizing the section uint64_t getSize() const override { return size; } uint64_t getFileSize() const override { return fileSize; } - void addInput(InputSection *input); + void addInput(ConcatInputSection *input); void finalize() override; bool needsThunks() const; uint64_t estimateStubsInRangeVA(size_t callIdx) const; void writeTo(uint8_t *buf) const override; - std::vector inputs; - std::vector thunks; + std::vector inputs; + std::vector thunks; static bool classof(const OutputSection *sec) { return sec->kind() == ConcatKind; @@ -69,8 +69,8 @@ struct ThunkInfo { // These denote the active thunk: - Defined *sym = nullptr; // private-extern symbol for active thunk - InputSection *isec = nullptr; // input section for active thunk + Defined *sym = nullptr; // private-extern symbol for active thunk + ConcatInputSection *isec = nullptr; // input section for active thunk // The following values are cumulative across all thunks on this function uint32_t callSiteCount = 0; // how many calls to the real function? diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp --- a/lld/MachO/ConcatOutputSection.cpp +++ b/lld/MachO/ConcatOutputSection.cpp @@ -25,7 +25,7 @@ using namespace lld; using namespace lld::macho; -void ConcatOutputSection::addInput(InputSection *input) { +void ConcatOutputSection::addInput(ConcatInputSection *input) { if (inputs.empty()) { align = input->align; flags = input->flags; @@ -154,7 +154,7 @@ uint64_t ConcatOutputSection::estimateStubsInRangeVA(size_t callIdx) const { uint64_t branchRange = target->branchRange; size_t endIdx = inputs.size(); - InputSection *isec = inputs[callIdx]; + ConcatInputSection *isec = inputs[callIdx]; uint64_t isecVA = isec->getVA(); // Tally the non-stub functions which still have call sites // remaining to process, which yields the maximum number @@ -188,7 +188,7 @@ void ConcatOutputSection::finalize() { uint64_t isecAddr = addr; uint64_t isecFileOff = fileOff; - auto finalizeOne = [&](InputSection *isec) { + auto finalizeOne = [&](ConcatInputSection *isec) { isecAddr = alignTo(isecAddr, isec->align); isecFileOff = alignTo(isecFileOff, isec->align); isec->outSecOff = isecAddr - addr; @@ -199,7 +199,7 @@ }; if (!needsThunks()) { - for (InputSection *isec : inputs) + for (ConcatInputSection *isec : inputs) finalizeOne(isec); size = isecAddr - addr; fileSize = isecFileOff - fileOff; @@ -221,7 +221,7 @@ ++callIdx) { if (finalIdx == callIdx) finalizeOne(inputs[finalIdx++]); - InputSection *isec = inputs[callIdx]; + ConcatInputSection *isec = inputs[callIdx]; assert(isec->isFinal); uint64_t isecVA = isec->getVA(); // Assign addresses up-to the forward branch-range limit @@ -290,7 +290,7 @@ // unfinalized inputs[finalIdx]. fatal(Twine(__FUNCTION__) + ": FIXME: thunk range overrun"); } - thunkInfo.isec = make(); + thunkInfo.isec = make(); thunkInfo.isec->name = isec->name; thunkInfo.isec->segname = isec->segname; thunkInfo.isec->parent = this; diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -105,6 +105,7 @@ bool emitEncryptionInfo = false; bool timeTraceEnabled = false; bool dataConst = false; + bool dedupLiterals = true; uint32_t headerPad; uint32_t dylibCompatibilityVersion = 0; uint32_t dylibCurrentVersion = 0; diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -529,7 +529,7 @@ if (common == nullptr) continue; - auto *isec = make(); + auto *isec = make(); isec->file = common->getFile(); isec->name = section_names::common; isec->segname = segment_names::data; @@ -1037,6 +1037,7 @@ config->implicitDylibs = !args.hasArg(OPT_no_implicit_dylibs); config->emitFunctionStarts = !args.hasArg(OPT_no_function_starts); config->emitBitcodeBundle = args.hasArg(OPT_bitcode_bundle); + config->dedupLiterals = args.hasArg(OPT_deduplicate_literals); // FIXME: Add a commandline flag for this too. config->zeroModTime = getenv("ZERO_AR_DATE"); diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h --- a/lld/MachO/InputFiles.h +++ b/lld/MachO/InputFiles.h @@ -39,7 +39,7 @@ namespace macho { struct PlatformInfo; -class InputSection; +class ConcatInputSection; class Symbol; struct Reloc; enum class RefState : uint8_t; @@ -103,7 +103,7 @@ llvm::DWARFUnit *compileUnit = nullptr; const uint32_t modTime; - std::vector debugSections; + std::vector debugSections; private: template void parse(); diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -53,6 +53,7 @@ #include "OutputSegment.h" #include "SymbolTable.h" #include "Symbols.h" +#include "SyntheticSections.h" #include "Target.h" #include "lld/Common/DWARF.h" @@ -240,37 +241,55 @@ InputFile::InputFile(Kind kind, const InterfaceFile &interface) : id(idCount++), fileKind(kind), name(saver.save(interface.getPath())) {} +template +static void parseSection(ObjFile *file, const uint8_t *buf, const Section &sec, + InputSection *isec) { + isec->file = file; + isec->name = + StringRef(sec.sectname, strnlen(sec.sectname, sizeof(sec.sectname))); + isec->segname = + StringRef(sec.segname, strnlen(sec.segname, sizeof(sec.segname))); + isec->data = {isZeroFill(sec.flags) ? nullptr : buf + sec.offset, + static_cast(sec.size)}; + if (sec.align >= 32) + error("alignment " + std::to_string(sec.align) + " of section " + + isec->name + " is too large"); + else + isec->align = 1 << sec.align; + isec->flags = sec.flags; +} + template void ObjFile::parseSections(ArrayRef
sections) { subsections.reserve(sections.size()); auto *buf = reinterpret_cast(mb.getBufferStart()); for (const Section &sec : sections) { - InputSection *isec = make(); - isec->file = this; - isec->name = - StringRef(sec.sectname, strnlen(sec.sectname, sizeof(sec.sectname))); - isec->segname = - StringRef(sec.segname, strnlen(sec.segname, sizeof(sec.segname))); - isec->data = {isZeroFill(sec.flags) ? nullptr : buf + sec.offset, - static_cast(sec.size)}; - if (sec.align >= 32) - error("alignment " + std::to_string(sec.align) + " of section " + - isec->name + " is too large"); - else - isec->align = 1 << sec.align; - isec->flags = sec.flags; - - if (!(isDebugSection(isec->flags) && - isec->segname == segment_names::dwarf)) { + if (config->dedupLiterals && sectionType(sec.flags) == S_CSTRING_LITERALS) { + if (sec.nreloc) + fatal(toString(this) + " contains relocations in " + sec.segname + "," + + sec.sectname + + ", so LLD cannot deduplicate literals. Try re-running without " + "--deduplicate-literals."); + + auto *isec = make(); + parseSection(this, buf, sec, isec); + isec->splitIntoPieces(); // FIXME: parallelize this? subsections.push_back({{0, isec}}); } else { - // Instead of emitting DWARF sections, we emit STABS symbols to the - // object files that contain them. We filter them out early to avoid - // parsing their relocations unnecessarily. But we must still push an - // empty map to ensure the indices line up for the remaining sections. - subsections.push_back({}); - debugSections.push_back(isec); + auto *isec = make(); + parseSection(this, buf, sec, isec); + if (!(isDebugSection(isec->flags) && + isec->segname == segment_names::dwarf)) { + subsections.push_back({{0, isec}}); + } else { + // Instead of emitting DWARF sections, we emit STABS symbols to the + // object files that contain them. We filter them out early to avoid + // parsing their relocations unnecessarily. But we must still push an + // empty map to ensure the indices line up for the remaining sections. + subsections.push_back({}); + debugSections.push_back(isec); + } } } } @@ -601,20 +620,22 @@ j + 1 < symbolIndices.size() ? nList[symbolIndices[j + 1]].n_value - sym.n_value : isec->data.size() - symbolOffset; - // There are 3 cases where we do not need to create a new subsection: + // There are 4 cases where we do not need to create a new subsection: // 1. If the input file does not use subsections-via-symbols. // 2. Multiple symbols at the same address only induce one subsection. // (The symbolOffset == 0 check covers both this case as well as // the first loop iteration.) // 3. Alternative entry points do not induce new subsections. + // 4. If we have a literal section (e.g. __cstring and __literal4). if (!subsectionsViaSymbols || symbolOffset == 0 || - sym.n_desc & N_ALT_ENTRY) { + sym.n_desc & N_ALT_ENTRY || !isa(isec)) { symbols[symIndex] = createDefined(sym, name, isec, symbolOffset, symbolSize); continue; } + auto *concatIsec = cast(isec); - auto *nextIsec = make(*isec); + auto *nextIsec = make(*concatIsec); nextIsec->data = isec->data.slice(symbolOffset); nextIsec->numRefs = 0; nextIsec->wasCoalesced = false; @@ -637,7 +658,7 @@ OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName, StringRef sectName) : InputFile(OpaqueKind, mb) { - InputSection *isec = make(); + ConcatInputSection *isec = make(); isec->file = this; isec->name = sectName.take_front(16); isec->segname = segName.take_front(16); diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -14,6 +14,7 @@ #include "lld/Common/LLVM.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/CachedHashString.h" #include "llvm/BinaryFormat/MachO.h" namespace lld { @@ -24,11 +25,21 @@ class InputSection { public: + enum Kind { + ConcatKind, + CStringLiteralKind, + }; + + Kind kind() const { return sectionKind; } virtual ~InputSection() = default; virtual uint64_t getSize() const { return data.size(); } uint64_t getFileSize() const; - uint64_t getFileOffset() const; - uint64_t getVA() const; + // Translates \p off -- an offset relative to this InputSection -- into an + // offset from the beginning of its parent OutputSection. + virtual uint64_t getOffset(uint64_t off) const = 0; + // The offset from the beginning of the file. + virtual uint64_t getFileOffset(uint64_t off) const = 0; + uint64_t getVA(uint64_t off) const; void writeTo(uint8_t *buf); @@ -37,8 +48,6 @@ StringRef segname; OutputSection *parent = nullptr; - uint64_t outSecOff = 0; - uint64_t outSecFileOff = 0; uint32_t align = 1; uint32_t flags = 0; @@ -62,6 +71,79 @@ ArrayRef data; std::vector relocs; + +protected: + explicit InputSection(Kind kind) : sectionKind(kind) {} + +private: + Kind sectionKind; +}; + +// ConcatInputSections are combined into (Concat)OutputSections through simple +// concatentation, in contrast with literal sections which may have their +// contents merged before output. +class ConcatInputSection : public InputSection { +public: + ConcatInputSection() : InputSection(ConcatKind) {} + uint64_t getFileOffset(uint64_t off) const override; + uint64_t getOffset(uint64_t off) const override { return outSecOff + off; } + uint64_t getVA() const { return InputSection::getVA(0); } + + static bool classof(const InputSection *isec) { + return isec->kind() == ConcatKind; + } + + uint64_t outSecOff = 0; + uint64_t outSecFileOff = 0; +}; + +// We allocate a lot of these and binary search on them, so they should be as +// compact as possible. Hence the use of 32 rather than 64 bits for the hash. +struct StringPiece { + // Offset from the start of the containing input section. + uint32_t inSecOff; + uint32_t hash; + // Offset from the start of the containing output section. + uint64_t outSecOff; + + StringPiece(uint64_t off, uint32_t hash) : inSecOff(off), hash(hash) {} +}; + +// CStringInputSections are composed of multiple null-terminated string +// literals, which we represent using StringPieces. These literals can be +// deduplicated and tail-merged, so translating offsets between the input and +// outputs sections is more complicated. +// +// NOTE: One significant difference between LLD and ld64 is that we merge all +// cstring literals, even those referenced directly by non-private symbols. +// ld64 is more conservative and does not do that. This was mostly done for +// implementation simplicity; if we find programs that need the more +// conservative behavior we can certainly implement that. +class CStringInputSection : public InputSection { +public: + CStringInputSection() : InputSection(CStringLiteralKind) {} + uint64_t getFileOffset(uint64_t off) const override; + uint64_t getOffset(uint64_t off) const override; + // Find the StringPiece that contains this offset. + const StringPiece &getStringPiece(uint64_t off) const; + // Split at each null byte. + void splitIntoPieces(); + + // Returns i'th piece as a CachedHashStringRef. This function is very hot when + // string merging is enabled, so we want to inline. + LLVM_ATTRIBUTE_ALWAYS_INLINE + llvm::CachedHashStringRef getCachedHashStringRef(size_t i) const { + size_t begin = pieces[i].inSecOff; + size_t end = + (pieces.size() - 1 == i) ? data.size() : pieces[i + 1].inSecOff; + return {toStringRef(data.slice(begin, end - begin)), pieces[i].hash}; + } + + static bool classof(const InputSection *isec) { + return isec->kind() == CStringLiteralKind; + } + + std::vector pieces; }; inline uint8_t sectionType(uint32_t flags) { @@ -97,6 +179,7 @@ constexpr const char authPtr[] = "__auth_ptr"; constexpr const char binding[] = "__binding"; constexpr const char bitcodeBundle[] = "__bundle"; +constexpr const char cString[] = "__cstring"; constexpr const char cfString[] = "__cfstring"; constexpr const char codeSignature[] = "__code_signature"; constexpr const char common[] = "__common"; diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -15,6 +15,7 @@ #include "Writer.h" #include "lld/Common/Memory.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/xxhash.h" using namespace llvm; using namespace llvm::MachO; @@ -24,15 +25,17 @@ std::vector macho::inputSections; -uint64_t InputSection::getFileOffset() const { - return parent->fileOff + outSecFileOff; +uint64_t ConcatInputSection::getFileOffset(uint64_t off) const { + return parent->fileOff + outSecFileOff + off; } uint64_t InputSection::getFileSize() const { return isZeroFill(flags) ? 0 : getSize(); } -uint64_t InputSection::getVA() const { return parent->addr + outSecOff; } +uint64_t InputSection::getVA(uint64_t off) const { + return parent->addr + getOffset(off); +} static uint64_t resolveSymbolVA(const Symbol *sym, uint8_t type) { const RelocAttrs &relocAttrs = target->getRelocAttrs(type); @@ -62,18 +65,18 @@ const Reloc &minuend = relocs[++i]; uint64_t minuendVA; if (const Symbol *toSym = minuend.referent.dyn_cast()) - minuendVA = toSym->getVA(); + minuendVA = toSym->getVA() + minuend.addend; else { auto *referentIsec = minuend.referent.get(); assert(!referentIsec->shouldOmitFromOutput()); - minuendVA = referentIsec->getVA(); + minuendVA = referentIsec->getVA(minuend.addend); } - referentVA = minuendVA - fromSym->getVA() + minuend.addend; + referentVA = minuendVA - fromSym->getVA(); } else if (auto *referentSym = r.referent.dyn_cast()) { if (target->hasAttr(r.type, RelocAttrBits::LOAD) && !referentSym->isInGot()) target->relaxGotLoad(loc, r.type); - referentVA = resolveSymbolVA(referentSym, r.type); + referentVA = resolveSymbolVA(referentSym, r.type) + r.addend; if (isThreadLocalVariables(flags)) { // References from thread-local variable sections are treated as offsets @@ -85,12 +88,45 @@ } } else if (auto *referentIsec = r.referent.dyn_cast()) { assert(!referentIsec->shouldOmitFromOutput()); - referentVA = referentIsec->getVA(); + referentVA = referentIsec->getVA(r.addend); } - target->relocateOne(loc, r, referentVA + r.addend, getVA() + r.offset); + target->relocateOne(loc, r, referentVA, getVA(r.offset)); + } +} + +void CStringInputSection::splitIntoPieces() { + size_t off = 0; + StringRef s = toStringRef(data); + while (!s.empty()) { + size_t end = s.find(0); + if (end == StringRef::npos) + fatal(toString(this) + ": string is not null terminated"); + size_t size = end + 1; + pieces.emplace_back(off, xxHash64(s.substr(0, size))); + s = s.substr(size); + off += size; } } +const StringPiece &CStringInputSection::getStringPiece(uint64_t off) const { + if (off >= data.size()) + fatal(toString(this) + ": offset is outside the section"); + + auto it = + partition_point(pieces, [=](StringPiece p) { return p.inSecOff <= off; }); + return it[-1]; +} + +uint64_t CStringInputSection::getFileOffset(uint64_t off) const { + return parent->fileOff + getOffset(off); +} + +uint64_t CStringInputSection::getOffset(uint64_t off) const { + const StringPiece &piece = getStringPiece(off); + uint64_t addend = off - piece.inSecOff; + return piece.outSecOff + addend; +} + bool macho::isCodeSection(const InputSection *isec) { uint32_t type = isec->flags & SECTION_TYPE; if (type != S_REGULAR && type != S_COALESCED) diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -50,6 +50,7 @@ def time_trace_granularity_eq: Joined<["--"], "time-trace-granularity=">, HelpText<"Minimum time granularity (in microseconds) traced by time profiler">; def time_trace_file_eq: Joined<["--"], "time-trace-file=">, HelpText<"Specify time trace output file">; +def deduplicate_literals: Flag<["--"], "deduplicate-literals">, HelpText<"Enable literal deduplication">; // This is a complete Options.td compiled from Apple's ld(1) manpage // dated 2018-03-07 and cross checked with ld64 source code in repo diff --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp --- a/lld/MachO/Symbols.cpp +++ b/lld/MachO/Symbols.cpp @@ -68,7 +68,7 @@ // expedient to return a contrived out-of-range address. return TargetInfo::outOfRangeVA; } - return isec->getVA() + value; + return isec->getVA(value); } uint64_t Defined::getFileOffset() const { @@ -77,7 +77,7 @@ " does not have a file offset"); return 0; } - return isec->getFileOffset() + value; + return isec->getFileOffset(value); } uint64_t DylibSymbol::getVA() const { diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -18,6 +18,7 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/SetVector.h" +#include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -154,7 +155,7 @@ Location(const InputSection *isec, uint64_t offset) : isec(isec), offset(offset) {} - uint64_t getVA() const { return isec->getVA() + offset; } + uint64_t getVA() const { return isec->getVA(offset); } }; // Stores rebase opcodes, which tell dyld where absolute addresses have been @@ -326,7 +327,7 @@ // to cache an address to the image loader it uses. Note that unlike the other // synthetic sections, which are OutputSections, the ImageLoaderCacheSection is // an InputSection that gets merged into the __data OutputSection. -class ImageLoaderCacheSection : public InputSection { +class ImageLoaderCacheSection : public ConcatInputSection { public: ImageLoaderCacheSection(); uint64_t getSize() const override { return target->wordSize; } @@ -515,8 +516,24 @@ uint64_t xarSize; }; +class CStringSection : public SyntheticSection { +public: + CStringSection(); + void addInput(CStringInputSection *); + uint64_t getSize() const override { return builder.getSize(); } + void finalize() override; + bool isNeeded() const override { return !inputs.empty(); } + void writeTo(uint8_t *buf) const override { builder.write(buf); } + + std::vector inputs; + +private: + llvm::StringTableBuilder builder; +}; + struct InStruct { MachHeaderSection *header = nullptr; + CStringSection *cStringSection = nullptr; RebaseSection *rebase = nullptr; BindingSection *binding = nullptr; WeakBindingSection *weakBinding = nullptr; diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -48,11 +48,10 @@ SyntheticSection::SyntheticSection(const char *segname, const char *name) : OutputSection(SyntheticKind, name), segname(segname) { - isec = make(); + isec = make(); isec->segname = segname; isec->name = name; isec->parent = this; - isec->outSecOff = 0; syntheticSections.push_back(this); } @@ -202,10 +201,10 @@ os << static_cast(REBASE_OPCODE_SET_TYPE_IMM | REBASE_TYPE_POINTER); llvm::sort(locations, [](const Location &a, const Location &b) { - return a.isec->getVA() < b.isec->getVA(); + return a.isec->getVA(a.offset) < b.isec->getVA(b.offset); }); for (const Location &loc : locations) - encodeRebase(loc.isec->parent, loc.isec->outSecOff + loc.offset, lastRebase, + encodeRebase(loc.isec->parent, loc.isec->getOffset(loc.offset), lastRebase, os); if (lastRebase.consecutiveCount != 0) encodeDoRebase(lastRebase, os); @@ -367,7 +366,7 @@ lastBinding.ordinal = ordinal; } encodeBinding(b.dysym, b.target.isec->parent, - b.target.isec->outSecOff + b.target.offset, b.addend, + b.target.isec->getOffset(b.target.offset), b.addend, /*isWeakBinding=*/false, lastBinding, os); } if (!bindings.empty()) @@ -396,7 +395,7 @@ }); for (const WeakBindingEntry &b : bindings) encodeBinding(b.symbol, b.target.isec->parent, - b.target.isec->outSecOff + b.target.offset, b.addend, + b.target.isec->getOffset(b.target.offset), b.addend, /*isWeakBinding=*/true, lastBinding, os); if (!bindings.empty() || !definitions.empty()) os << static_cast(BIND_OPCODE_DONE); @@ -1076,6 +1075,63 @@ remove(xarPath); } +// Mergeable cstring literals are found under the __TEXT,__cstring section. In +// contrast to ELF, which puts strings that need different alignments into +// different sections, clang's Mach-O backend puts them all in one section. +// Strings that need to be aligned have the .p2align directive emitted before +// them, which simply translates into zero padding in the object file. +// +// I *think* ld64 extracts the desired per-string alignment from this data by +// preserving each string's offset from the last section-aligned address. I'm +// not entirely certain since it doesn't seem consistent about doing this, and +// in fact doesn't seem to be correct in general: we can in fact can induce ld64 +// to produce a crashing binary just by linking in an additional object file +// that only contains a duplicate cstring at a different alignment. See PR50563 +// for details. +// +// In practice, the cstrings we've seen so far that require special aligment are +// all accessed by x86_64 SIMD operations -- x86_64 requires SIMD accesses to be +// 16-byte-aligned. So for now, I'm just aligning all strings to 16 bytes on +// x86_64. This is indeed wasteful, but implementation-wise it's simpler than +// preserving per-string alignment+offsets. It also avoids the aforementioned +// crash after deduplication of differently-aligned strings. Finally, the +// overhead is not huge: using 16-byte alignment (vs no alignment) is only a +// 0.5% size overhead when linking chromium_framework. +CStringSection::CStringSection() + : SyntheticSection(segment_names::text, section_names::cString), + builder(StringTableBuilder::RAW, + /*Alignment=*/target->cpuType == CPU_TYPE_X86_64 ? 16 : 1) { + align = target->cpuType == CPU_TYPE_X86_64 ? 16 : 1; + flags = S_CSTRING_LITERALS; +} + +void CStringSection::addInput(CStringInputSection *isec) { + isec->parent = this; + inputs.push_back(isec); +} + +void CStringSection::finalize() { + // Add all string pieces to the string table builder to create section + // contents. + for (const CStringInputSection *isec : inputs) + for (size_t i = 0, e = isec->pieces.size(); i != e; ++i) + builder.add(isec->getCachedHashStringRef(i)); + + // Fix the string table content. After this, the contents will never change. + builder.finalizeInOrder(); + + // finalize() fixed tail-optimized strings, so we can now get + // offsets of strings. Get an offset for each string and save it + // to a corresponding SectionPiece for easy access. + for (CStringInputSection *isec : inputs) { + for (size_t i = 0, e = isec->pieces.size(); i != e; ++i) { + isec->pieces[i].outSecOff = + builder.getOffset(isec->getCachedHashStringRef(i)); + isec->isFinal = true; + } + } +} + void macho::createSyntheticSymbols() { auto addHeaderSymbol = [](const char *name) { symtab->addSynthetic(name, in.header->isec, /*value=*/0, diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp --- a/lld/MachO/UnwindInfoSection.cpp +++ b/lld/MachO/UnwindInfoSection.cpp @@ -214,7 +214,7 @@ static void relocateCompactUnwind(ConcatOutputSection *compactUnwindSection, std::vector> &cuVector) { - for (const InputSection *isec : compactUnwindSection->inputs) { + for (const ConcatInputSection *isec : compactUnwindSection->inputs) { assert(isec->parent == compactUnwindSection); uint8_t *buf = @@ -238,7 +238,7 @@ if (referentIsec->shouldOmitFromOutput()) referentVA = UINT64_MAX; // Tombstone value else - referentVA = referentIsec->getVA() + r.addend; + referentVA = referentIsec->getVA(r.addend); } writeAddress(buf + r.offset, referentVA, r.length); diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -863,15 +863,23 @@ InputSection *isec = p.value(); if (isec->shouldOmitFromOutput()) continue; - NamePair names = maybeRenameSection({isec->segname, isec->name}); - ConcatOutputSection *&osec = concatOutputSections[names]; - if (osec == nullptr) { - osec = make(names.second); - osec->inputOrder = p.index(); + if (auto *concatIsec = dyn_cast(isec)) { + NamePair names = maybeRenameSection({isec->segname, isec->name}); + ConcatOutputSection *&osec = concatOutputSections[names]; + if (osec == nullptr) { + osec = make(names.second); + osec->inputOrder = p.index(); + } + osec->addInput(concatIsec); + } else if (auto *cStringIsec = dyn_cast(isec)) { + if (in.cStringSection->inputs.empty()) + in.cStringSection->inputOrder = p.index(); + in.cStringSection->addInput(cStringIsec); } - osec->addInput(isec); } + // Once all the inputs are added, we can finalize the output section + // properties and create the corresponding output segments. for (const auto &it : concatOutputSections) { StringRef segname = it.first.first; ConcatOutputSection *osec = it.second; @@ -885,13 +893,14 @@ for (SyntheticSection *ssec : syntheticSections) { auto it = concatOutputSections.find({ssec->segname, ssec->name}); - if (it == concatOutputSections.end()) { - if (ssec->isNeeded()) + if (ssec->isNeeded()) { + if (it == concatOutputSections.end()) { getOrCreateOutputSegment(ssec->segname)->addOutputSection(ssec); - } else { - error("section from " + toString(it->second->firstSection()->file) + - " conflicts with synthetic section " + ssec->segname + "," + - ssec->name); + } else { + fatal("section from " + toString(it->second->firstSection()->file) + + " conflicts with synthetic section " + ssec->segname + "," + + ssec->name); + } } } @@ -1040,6 +1049,7 @@ void macho::createSyntheticSections() { in.header = make(); + in.cStringSection = config->dedupLiterals ? make() : nullptr; in.rebase = make(); in.binding = make(); in.weakBinding = make(); diff --git a/lld/test/MachO/cstring-dedup.s b/lld/test/MachO/cstring-dedup.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/cstring-dedup.s @@ -0,0 +1,107 @@ +# REQUIRES: x86 +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/more-foo.s -o %t/more-foo.o +# RUN: %lld -dylib --deduplicate-literals %t/test.o %t/more-foo.o -o %t/test +# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/test | \ +# RUN: FileCheck %s --check-prefix=STR --implicit-check-not foo --implicit-check-not bar +# RUN: llvm-objdump --macho --section="__DATA,ptrs" --syms %t/test | FileCheck %s +# RUN: llvm-readobj --section-headers %t/test | FileCheck %s --check-prefix=HEADER + +## Make sure we only have 3 deduplicated strings in __cstring, and that they +## are 16-byte-aligned. +# STR: Contents of (__TEXT,__cstring) section +# STR: {{.*}}0 foo +# STR: {{.*}}0 barbaz +# STR: {{.*}}0 {{$}} + +## Make sure both symbol and section relocations point to the right thing. +# CHECK: Contents of (__DATA,ptrs) section +# CHECK-NEXT: __TEXT:__cstring:foo +# CHECK-NEXT: __TEXT:__cstring:foo +# CHECK-NEXT: __TEXT:__cstring:foo +# CHECK-NEXT: __TEXT:__cstring:foo +# CHECK-NEXT: __TEXT:__cstring:foo +# CHECK-NEXT: __TEXT:__cstring:foo +# CHECK-NEXT: __TEXT:__cstring:barbaz +# CHECK-NEXT: __TEXT:__cstring:baz +# CHECK-NEXT: __TEXT:__cstring:barbaz +# CHECK-NEXT: __TEXT:__cstring:baz +# CHECK-NEXT: __TEXT:__cstring:{{$}} +# CHECK-NEXT: __TEXT:__cstring:{{$}} + +## Make sure the symbol addresses are correct too. +# CHECK: SYMBOL TABLE: +# CHECK-DAG: [[#%.16x,FOO:]] l O __TEXT,__cstring _local_foo1 +# CHECK-DAG: [[#FOO]] l O __TEXT,__cstring _local_foo2 +# CHECK-DAG: [[#FOO]] g O __TEXT,__cstring _globl_foo1 +# CHECK-DAG: [[#FOO]] g O __TEXT,__cstring _globl_foo2 +# CHECK-DAG: [[#%.16x,BAR:]] l O __TEXT,__cstring _bar1 +# CHECK-DAG: [[#BAR]] l O __TEXT,__cstring _bar2 +# CHECK-DAG: [[#%.16x,ZERO:]] l O __TEXT,__cstring _zero1 +# CHECK-DAG: [[#ZERO]] l O __TEXT,__cstring _zero2 + +## Make sure we set the right alignment and flags. +# HEADER: Name: __cstring +# HEADER-NEXT: Segment: __TEXT +# HEADER-NEXT: Address: +# HEADER-NEXT: Size: +# HEADER-NEXT: Offset: +# HEADER-NEXT: Alignment: 4 +# HEADER-NEXT: RelocationOffset: +# HEADER-NEXT: RelocationCount: 0 +# HEADER-NEXT: Type: CStringLiterals +# HEADER-NEXT: Attributes [ (0x0) +# HEADER-NEXT: ] +# HEADER-NEXT: Reserved1: 0x0 +# HEADER-NEXT: Reserved2: 0x0 +# HEADER-NEXT: Reserved3: 0x0 + +#--- test.s +.cstring +.p2align 2 +_local_foo1: + .asciz "foo" +_local_foo2: + .asciz "foo" +L_.foo1: + .asciz "foo" +L_.foo2: + .asciz "foo" + +_bar1: + .ascii "bar" +_baz1: + .asciz "baz" +_bar2: + .ascii "bar" +_baz2: + .asciz "baz" + +_zero1: + .asciz "" +_zero2: + .asciz "" + +.section __DATA,ptrs,literal_pointers +.quad L_.foo1 +.quad L_.foo2 +.quad _local_foo1 +.quad _local_foo2 +.quad _globl_foo1 +.quad _globl_foo2 +.quad _bar1 +.quad _baz1 +.quad _bar2 +.quad _baz2 +.quad _zero1 +.quad _zero2 + +#--- more-foo.s +.globl _globl_foo1, _globl_foo2 +.cstring +.p2align 4 +_globl_foo1: + .asciz "foo" +_globl_foo2: + .asciz "foo" diff --git a/lld/test/MachO/invalid/cstring-dedup.s b/lld/test/MachO/invalid/cstring-dedup.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/invalid/cstring-dedup.s @@ -0,0 +1,21 @@ +# REQUIRES: x86 +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/not-terminated.s -o %t/not-terminated.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/relocs.s -o %t/relocs.o + +# RUN: not %lld -dylib --deduplicate-literals %t/not-terminated.o 2>&1 | FileCheck %s --check-prefix=TERM +# RUN: not %lld -dylib --deduplicate-literals %t/relocs.o 2>&1 | FileCheck %s --check-prefix=RELOCS + +# TERM: not-terminated.o:(__cstring): string is not null terminated +# RELOCS: relocs.o contains relocations in __TEXT,__cstring, so LLD cannot deduplicate literals. Try re-running without --deduplicate-literals. + +#--- not-terminated.s +.cstring +.asciz "foo" +.ascii "oh no" + +#--- relocs.s +.cstring +_str: +.asciz "foo" +.quad _str diff --git a/lld/test/MachO/invalid/reserved-section-name.s b/lld/test/MachO/invalid/reserved-section-name.s --- a/lld/test/MachO/invalid/reserved-section-name.s +++ b/lld/test/MachO/invalid/reserved-section-name.s @@ -8,7 +8,12 @@ .section __DATA_CONST,__got .space 1 +.data +_foo: +.space 1 + .text _main: - mov $0, %rax +## make sure the GOT will be needed + pushq _foo@GOTPCREL(%rip) ret diff --git a/lld/test/MachO/subsections-section-relocs.s b/lld/test/MachO/subsections-section-relocs.s --- a/lld/test/MachO/subsections-section-relocs.s +++ b/lld/test/MachO/subsections-section-relocs.s @@ -39,10 +39,13 @@ ## References to this generate a section relocation ## N.B.: ld64 doesn't actually reorder symbols in __cstring based on the order -## file. Only our implementation does. However, I'm not sure how else to -## test section relocations that target an address inside a relocated -## symbol: using a non-__cstring section would cause llvm-mc to emit a -## symbol relocation instead using the nearest symbol. +## file. Our implementation only does does so if --no-literal-merge is +## specified. I'm not sure how else to test section relocations that +## target an address inside a relocated symbol: using a non-__cstring +## section would cause llvm-mc to emit a symbol relocation instead using +## the nearest symbol. It might be more consistent for LLD to disable +## symbol-based cstring reordering altogether and leave this functionality +## untested, at least until we find a real-world use case... L_.str: .asciz "Private symbol" diff --git a/lld/test/MachO/x86-64-relocs.s b/lld/test/MachO/x86-64-relocs.s --- a/lld/test/MachO/x86-64-relocs.s +++ b/lld/test/MachO/x86-64-relocs.s @@ -4,7 +4,7 @@ # RUN: llvm-objdump --section-headers --syms -d %t | FileCheck %s # CHECK-LABEL: Sections: -# CHECK: __cstring {{[0-9a-z]+}} [[#%x, CSTRING_ADDR:]] +# CHECK: __data {{[0-9a-z]+}} [[#%x, DATA_ADDR:]] # CHECK-LABEL: SYMBOL TABLE: # CHECK: [[#%x, F_ADDR:]] {{.*}} _f @@ -13,15 +13,15 @@ ## Test X86_64_RELOC_BRANCH # CHECK: callq 0x[[#%x, F_ADDR]] <_f> ## Test extern (symbol) X86_64_RELOC_SIGNED -# CHECK: leaq [[#%u, STR_OFF:]](%rip), %rsi -# CHECK-NEXT: [[#%x, CSTRING_ADDR - STR_OFF]] +# CHECK: leaq [[#%u, LOCAL_OFF:]](%rip), %rsi +# CHECK-NEXT: [[#%x, DATA_ADDR - LOCAL_OFF]] ## Test non-extern (section) X86_64_RELOC_SIGNED -# CHECK: leaq [[#%u, LSTR_OFF:]](%rip), %rsi -# CHECK-NEXT: [[#%x, CSTRING_ADDR + 22 - LSTR_OFF]] +# CHECK: leaq [[#%u, PRIVATE_OFF:]](%rip), %rsi +# CHECK-NEXT: [[#%x, DATA_ADDR + 8 - PRIVATE_OFF]] # RUN: llvm-objdump --section=__const --full-contents %t | FileCheck %s --check-prefix=NONPCREL # NONPCREL: Contents of section __DATA_CONST,__const: -# NONPCREL-NEXT: 100001000 18040000 01000000 18040000 01000000 +# NONPCREL-NEXT: 100001000 08200000 01000000 08200000 01000000 .section __TEXT,__text .globl _main, _f @@ -31,36 +31,22 @@ ret _f: - movl $0x2000004, %eax # write() syscall - mov $1, %rdi # stdout - leaq _str(%rip), %rsi # Generates a X86_64_RELOC_SIGNED pcrel symbol relocation - mov $21, %rdx # length of str - syscall - - movl $0x2000004, %eax # write() syscall - mov $1, %rdi # stdout - leaq L_.str(%rip), %rsi # Generates a X86_64_RELOC_SIGNED pcrel section relocation - mov $15, %rdx # length of str - syscall - - movl $0x2000004, %eax # write() syscall - mov $1, %rdi # stdout - movq L_.ptr_1_to_str(%rip), %rsi - mov $15, %rdx # length of str - syscall + leaq _local(%rip), %rsi # Generates a X86_64_RELOC_SIGNED pcrel symbol relocation + leaq L_.private(%rip), %rsi # Generates a X86_64_RELOC_SIGNED pcrel section relocation + movq L_.ptr_1(%rip), %rsi ret -.section __TEXT,__cstring +.data ## References to this generate a symbol relocation -_str: - .asciz "Local defined symbol\n" +_local: + .quad 123 ## References to this generate a section relocation -L_.str: - .asciz "Private symbol\n" +L_.private: + .quad 123 .section __DATA,__const ## These generate X86_64_RELOC_UNSIGNED non-pcrel section relocations -L_.ptr_1_to_str: - .quad L_.str -L_.ptr_2_to_str: - .quad L_.str +L_.ptr_1: + .quad L_.private +L_.ptr_2: + .quad L_.private