diff --git a/lld/MachO/ConcatOutputSection.h b/lld/MachO/ConcatOutputSection.h --- a/lld/MachO/ConcatOutputSection.h +++ b/lld/MachO/ConcatOutputSection.h @@ -50,6 +50,15 @@ return sec->kind() == ConcatKind; } + void foldIdenticalSections(); + void segregate( + size_t begin, size_t end, + std::function equals); + size_t findBoundary(size_t begin, size_t end); + void forEachClassRange(size_t begin, size_t end, + std::function func); + void forEachClass(std::function func); + private: void mergeFlags(InputSection *input); diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp --- a/lld/MachO/ConcatOutputSection.cpp +++ b/lld/MachO/ConcatOutputSection.cpp @@ -16,7 +16,9 @@ #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "llvm/BinaryFormat/MachO.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/ScopedPrinter.h" +#include "llvm/Support/TimeProfiler.h" #include @@ -206,6 +208,7 @@ return; } + TimeTraceScope timeScope("Place branch-range-extension thunks"); uint64_t branchRange = target->branchRange; uint64_t stubsInRangeVA = TargetInfo::outOfRangeVA; size_t thunkSize = target->thunkSize; @@ -359,3 +362,181 @@ flags |= input->flags; flags &= pureMask; } + +// ICF = Identical C(ode|OMDAT) Folding +// +// FIXME: implement address-significance tables for MachO object files +// FIXME: don't merge functions with distinct EH LSDA & personality +// * mark symbols as preemptible +// * account for EH LSDA & personality differences +// * filter isecs ... +// * live +// * ! keep unique +// * ! writeable +// * ! init/fini + +static unsigned icfPass = 0; +static std::atomic icfRepeat = {false}; + +// Compare everything except relocation target data. +static bool equalsConstant(const InputSection *ia, const InputSection *ib) { + return ia->flags == ib->flags && ia->data == ib->data && + ia->relocs.size() == ib->relocs.size() && + std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), + [&](const Reloc &ra, const Reloc &rb) { + return ra.type == rb.type && ra.pcrel == rb.pcrel && + ra.length == rb.length && ra.offset == rb.offset && + ra.addend == rb.addend && + ra.referent.is() == + rb.referent.is(); + }); +} + +// Compare only relocation target data. +static bool equalsVariable(const InputSection *ia, const InputSection *ib) { + return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), + [&](const Reloc &ra, const Reloc &rb) { + if (ra.referent == rb.referent) + return true; + if (ra.referent.is()) { + const auto *sa = ra.referent.get(); + const auto *sb = rb.referent.get(); + if (const auto *da = dyn_cast(sa)) + if (const auto *db = dyn_cast(sb)) + return da->isec->icfEquivalenceClass[icfPass % 2] == + db->isec->icfEquivalenceClass[icfPass % 2]; + return false; + } else { + assert(false && "section reloc for ICF"); + const auto *sa = ra.referent.get(); + const auto *sb = rb.referent.get(); + return (sa->icfEquivalenceClass[icfPass % 2] == + sb->icfEquivalenceClass[icfPass % 2]); + } + }); +} + +// Find the first InputSection after begin that has a different class from +// Begin. +size_t ConcatOutputSection::findBoundary(size_t begin, size_t end) { + for (size_t i = begin + 1; i < end; ++i) + if (inputs[begin]->icfEquivalenceClass[icfPass % 2] != + inputs[i]->icfEquivalenceClass[icfPass % 2]) + return i; + return end; +} + +void ConcatOutputSection::forEachClassRange( + size_t begin, size_t end, std::function func) { + while (begin < end) { + size_t mid = findBoundary(begin, end); + func(begin, mid); + begin = mid; + } +} + +// Call FUNC on each class group. +void ConcatOutputSection::forEachClass( + std::function func) { + // If the number of sections are too small to use threading, + // call FUNC sequentially. + if (inputs.size() < 1024) { + forEachClassRange(0, inputs.size(), func); + ++icfPass; + return; + } + + // Shard into non-overlapping intervals, and call FUNC in parallel. + // The sharding must be completed before any calls to FUNC are made + // so that FUNC can modify the Chunks in its shard without causing + // data races. + const size_t shards = 256; + size_t step = inputs.size() / shards; + size_t boundaries[shards + 1]; + boundaries[0] = 0; + boundaries[shards] = inputs.size(); + parallelForEachN(1, shards, [&](size_t i) { + boundaries[i] = findBoundary((i - 1) * step, inputs.size()); + }); + parallelForEachN(1, shards + 1, [&](size_t i) { + if (boundaries[i - 1] < boundaries[i]) { + forEachClassRange(boundaries[i - 1], boundaries[i], func); + } + }); + ++icfPass; +} + +void ConcatOutputSection::foldIdenticalSections() { + TimeTraceScope timeScope("Fold Identical COMDAT Sections"); + // Combine hashes of referent sections into each + for (icfPass = 0; icfPass < 2; ++icfPass) { + parallelForEach(inputs, [&](InputSection *isec) { + uint64_t hash = isec->icfEquivalenceClass[icfPass % 2]; + for (const Reloc &r : isec->relocs) { + if (auto *sym = r.referent.dyn_cast()) + if (auto *defined = dyn_cast(sym)) + // FIXME: what about InputSection relocs? + hash += defined->isec->icfEquivalenceClass[icfPass % 2]; + } + // Set MSB to 1 to avoid collisions with non-hashed classes. + isec->icfEquivalenceClass[(icfPass + 1) % 2] = hash | (1ull << 63); + }); + } + + llvm::stable_sort(inputs, [](const InputSection *a, const InputSection *b) { + return a->icfEquivalenceClass[0] < b->icfEquivalenceClass[0]; + }); + forEachClass( + [&](size_t begin, size_t end) { segregate(begin, end, equalsConstant); }); + + // Split groups by comparing relocations until convergence is obtained. + do { + icfRepeat = false; + forEachClass([&](size_t begin, size_t end) { + segregate(begin, end, equalsVariable); + }); + } while (icfRepeat); + + log("ICF needed " + Twine(icfPass) + " iterations"); + // Merge sections in the same classes. + forEachClass([&](size_t begin, size_t end) { + if (end - begin > 1) + for (size_t i = begin + 1; i < end; ++i) + inputs[begin]->foldIdentical(inputs[i]); + }); + + size_t oldSize = inputs.size(); + inputs.erase(std::remove_if(inputs.begin(), inputs.end(), + [](const InputSection *isec) -> bool { + return isec->shouldOmitFromOutput(); + }), + inputs.end()); + log("ICF kept " + std::to_string(inputs.size()) + " removed " + + std::to_string(oldSize - inputs.size()) + " of " + + std::to_string(oldSize)); +} + +// Split an equivalence class into smaller classes. +void ConcatOutputSection::segregate( + size_t begin, size_t end, + std::function equals) { + while (begin < end) { + // Divide [Begin, End) into two. Let Mid be the start index of the + // second group. + auto bound = std::stable_partition( + inputs.begin() + begin + 1, inputs.begin() + end, + [&](InputSection *isec) { return equals(inputs[begin], isec); }); + size_t mid = bound - inputs.begin(); + + // Split [Begin, End) into [Begin, Mid) and [Mid, End). We use Mid as an + // equivalence class ID because every group ends with a unique index. + for (size_t i = begin; i < mid; ++i) + inputs[i]->icfEquivalenceClass[(icfPass + 1) % 2] = mid; + + // If we created a group, we need to iterate the main loop again. + if (mid != end) + icfRepeat = true; + + begin = mid; + } +} diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -51,6 +51,13 @@ dynamic_lookup, }; +enum class ICFLevel { + unknown, + none, + safe, + all, +}; + struct SectionAlign { llvm::StringRef segName; llvm::StringRef sectName; @@ -114,6 +121,7 @@ NamespaceKind namespaceKind = NamespaceKind::twolevel; UndefinedSymbolTreatment undefinedSymbolTreatment = UndefinedSymbolTreatment::error; + ICFLevel icfLevel = ICFLevel::none; llvm::MachO::HeaderFileType outputType; std::vector systemLibraryRoots; std::vector librarySearchPaths; diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -516,13 +516,13 @@ isec->data = {nullptr, static_cast(common->size)}; isec->flags = S_ZEROFILL; inputSections.push_back(isec); - - replaceSymbol(sym, sym->getName(), isec->file, isec, /*value=*/0, - /*size=*/0, - /*isWeakDef=*/false, - /*isExternal=*/true, common->privateExtern, - /*isThumb=*/false, - /*isReferencedDynamically=*/false); + isec->defined = replaceSymbol( + sym, sym->getName(), isec->file, isec, /*value=*/0, + /*size=*/0, + /*isWeakDef=*/false, + /*isExternal=*/true, common->privateExtern, + /*isThumb=*/false, + /*isReferencedDynamically=*/false); } } @@ -659,6 +659,21 @@ return treatment; } +static ICFLevel getICFLevel(const ArgList &args) { + StringRef icfLevelStr = args.getLastArgValue(OPT_icf); + auto icfLevel = StringSwitch(icfLevelStr) + .Cases("none", "", ICFLevel::none) + .Case("all", ICFLevel::all) + .Case("safe", ICFLevel::safe) + .Default(ICFLevel::unknown); + if (icfLevel == ICFLevel::unknown) { + warn(Twine("unknown --icf OPTION '") + icfLevelStr + + "', defaulting to 'none'"); + icfLevel = ICFLevel::none; + } + return icfLevel; +} + static void warnIfDeprecatedOption(const Option &opt) { if (!opt.getGroup().isValid()) return; @@ -1024,6 +1039,8 @@ config->undefinedSymbolTreatment = getUndefinedSymbolTreatment(args); + config->icfLevel = getICFLevel(args); + if (config->outputType == MH_EXECUTE) config->entry = symtab->addUndefined(args.getLastArgValue(OPT_e, "_main"), /*file=*/nullptr, diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -488,18 +488,20 @@ if (isWeakDefCanBeHidden) isPrivateExtern = true; - return symtab->addDefined(name, isec->file, isec, value, size, - sym.n_desc & N_WEAK_DEF, isPrivateExtern, - sym.n_desc & N_ARM_THUMB_DEF, - sym.n_desc & REFERENCED_DYNAMICALLY); + isec->defined = symtab->addDefined(name, isec->file, isec, value, size, + sym.n_desc & N_WEAK_DEF, isPrivateExtern, + sym.n_desc & N_ARM_THUMB_DEF, + sym.n_desc & REFERENCED_DYNAMICALLY); + return isec->defined; } assert(!isWeakDefCanBeHidden && "weak_def_can_be_hidden on already-hidden symbol?"); - return make( + isec->defined = make( name, isec->file, isec, value, size, sym.n_desc & N_WEAK_DEF, /*isExternal=*/false, /*isPrivateExtern=*/false, sym.n_desc & N_ARM_THUMB_DEF, sym.n_desc & REFERENCED_DYNAMICALLY); + return isec->defined; } // Absolute symbols are defined symbols that do not have an associated @@ -612,10 +614,18 @@ } auto *nextIsec = make(*isec); - nextIsec->data = isec->data.slice(symbolOffset); nextIsec->numRefs = 0; nextIsec->canOmitFromOutput = false; - isec->data = isec->data.slice(0, symbolOffset); +#if 0 + if (isZeroFill(isec->flags)) { + nextIsec->data = {nullptr, isec->data.size() - symbolOffset}; + isec->data = {nullptr, symbolOffset}; + } else +#endif + { + nextIsec->data = isec->data.slice(symbolOffset); + isec->data = isec->data.slice(0, symbolOffset); + } // By construction, the symbol will be at offset zero in the new // subsection. diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -20,6 +20,7 @@ class InputFile; class OutputSection; +class Defined; class InputSection { public: @@ -47,6 +48,9 @@ // How many symbols refer to this InputSection. uint32_t numRefs = 0; + // Equivalence-class ID for ICF (Identical Code Folding) + uint64_t icfEquivalenceClass[2] = {0, 0}; + // True if this InputSection could not be written to the output file. // With subsections_via_symbols, most symbol have its own InputSection, // and for weak symbols (e.g. from inline functions), only the @@ -58,6 +62,12 @@ bool shouldOmitFromOutput() const { return canOmitFromOutput && numRefs == 0; } + bool isEligibleForICF() const; + void hashForICF(); + void foldIdentical(InputSection *redundant); + + // InputSection *replica = this; // ... or the surving copy after ICF + Defined *defined = nullptr; // symbol that references this for ICF logs ArrayRef data; std::vector relocs; diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -15,6 +15,7 @@ #include "Writer.h" #include "lld/Common/Memory.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/xxhash.h" using namespace llvm; using namespace llvm::MachO; @@ -45,6 +46,65 @@ return sym->getVA(); } +bool InputSection::isEligibleForICF() const { + if (shouldOmitFromOutput()) + return false; + if (in.unwindInfo->funcHasPersonality(this)) + return false; + switch (sectionType(flags)) { + case S_REGULAR: + return true; + case S_CSTRING_LITERALS: + case S_4BYTE_LITERALS: + case S_8BYTE_LITERALS: + case S_16BYTE_LITERALS: + case S_LITERAL_POINTERS: + return false; + case S_ZEROFILL: + case S_GB_ZEROFILL: + case S_NON_LAZY_SYMBOL_POINTERS: + case S_LAZY_SYMBOL_POINTERS: + case S_SYMBOL_STUBS: + case S_MOD_INIT_FUNC_POINTERS: + case S_MOD_TERM_FUNC_POINTERS: + case S_COALESCED: + case S_INTERPOSING: + case S_DTRACE_DOF: + case S_LAZY_DYLIB_SYMBOL_POINTERS: + case S_THREAD_LOCAL_REGULAR: + case S_THREAD_LOCAL_ZEROFILL: + case S_THREAD_LOCAL_VARIABLES: + case S_THREAD_LOCAL_VARIABLE_POINTERS: + case S_THREAD_LOCAL_INIT_FUNCTION_POINTERS: + return false; + default: + llvm_unreachable("Section type"); + } +} + +void InputSection::hashForICF() { + assert(data.data()); // zeroFill section data has nullptr with non-zero size + assert(icfEquivalenceClass[0] == 0); // don't overwrite a unique ID! + // Turn-on the top bit to guarantee that valid hashes have no + // collisions with the small-integer unique IDs for ICF-inelgible sections + icfEquivalenceClass[0] = xxHash64(data) | (1ull << 63); +} + +void InputSection::foldIdentical(InputSection *copy) { + StringRef thisName = defined ? defined->getName() : "???"; + StringRef thatName = copy->defined ? copy->defined->getName() : "???"; + log("ICF: " + thisName + " replaces " + thatName + ", saves " + + to_string(getSize())); + align = std::max(align, copy->align); + // copy->replica = this; + copy->canOmitFromOutput = true; + if (copy->defined) { + numRefs++; + copy->defined->isec = this; + copy->numRefs--; + } +} + void InputSection::writeTo(uint8_t *buf) { assert(!shouldOmitFromOutput()); diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -46,10 +46,15 @@ def no_lto_legacy_pass_manager : Flag<["--"], "no-lto-legacy-pass-manager">, HelpText<"Use the new pass manager in LLVM">, Group; -def time_trace: Flag<["--"], "time-trace">, HelpText<"Record time trace">; +def time_trace: Flag<["--"], "time-trace">, + HelpText<"Record time trace">, + Group; def time_trace_granularity_eq: Joined<["--"], "time-trace-granularity=">, - HelpText<"Minimum time granularity (in microseconds) traced by time profiler">; -def time_trace_file_eq: Joined<["--"], "time-trace-file=">, HelpText<"Specify time trace output file">; + HelpText<"Minimum time granularity (in microseconds) traced by time profiler">, + Group; +def time_trace_file_eq: Joined<["--"], "time-trace-file=">, + HelpText<"Specify time trace output file">, + Group; // This is a complete Options.td compiled from Apple's ld(1) manpage // dated 2018-03-07 and cross checked with ld64 source code in repo @@ -254,6 +259,10 @@ HelpText<"Disable infra for branches beyond the maximum branch distance.">, Flags<[HelpHidden]>, Group; +def icf: Separate<["-"], "icf">, + HelpText<"Enable identical code folding at level (default: none)">, + MetaVarName<"[none,safe,all]">, + Group; def grp_version : OptionGroup<"version">, HelpText<"VERSION TARGETING">; diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp --- a/lld/MachO/SymbolTable.cpp +++ b/lld/MachO/SymbolTable.cpp @@ -89,6 +89,8 @@ Defined *defined = replaceSymbol( s, name, file, isec, value, size, isWeakDef, /*isExternal=*/true, isPrivateExtern, isThumb, isReferencedDynamically); + if (isec) + isec->defined = defined; defined->overridesWeakDef = overridesWeakDef; return defined; } diff --git a/lld/MachO/UnwindInfoSection.h b/lld/MachO/UnwindInfoSection.h --- a/lld/MachO/UnwindInfoSection.h +++ b/lld/MachO/UnwindInfoSection.h @@ -20,11 +20,14 @@ namespace lld { namespace macho { +struct Referent; + class UnwindInfoSection : public SyntheticSection { public: bool isNeeded() const override { return compactUnwindSection != nullptr; } uint64_t getSize() const override { return unwindInfoSize; } virtual void prepareRelocations(InputSection *) = 0; + virtual bool funcHasPersonality(const InputSection *) = 0; void setCompactUnwindSection(ConcatOutputSection *cuSection) { compactUnwindSection = cuSection; diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp --- a/lld/MachO/UnwindInfoSection.cpp +++ b/lld/MachO/UnwindInfoSection.cpp @@ -23,6 +23,8 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/BinaryFormat/MachO.h" +#include + using namespace llvm; using namespace llvm::MachO; using namespace lld; @@ -114,6 +116,13 @@ template class UnwindInfoSectionImpl : public UnwindInfoSection { public: void prepareRelocations(InputSection *) override; + bool funcHasPersonality(const InputSection *isec) override { + bool hasPersonality = funcsWithPersonality.contains(isec); + if (hasPersonality && isec->defined) + log("ICF get personality " + + (isec->defined ? isec->defined->getName() : "???")); + return hasPersonality; + } void finalize() override; void writeTo(uint8_t *buf) const override; @@ -124,6 +133,7 @@ std::vector personalities; SmallDenseMap, Symbol *> personalityTable; + DenseSet funcsWithPersonality; std::vector lsdaEntries; // Map of function offset (from the image base) to an index within the LSDA // array. @@ -146,12 +156,22 @@ assert(!isec->shouldOmitFromOutput() && "__compact_unwind section should not be omitted"); - for (Reloc &r : isec->relocs) { + for (size_t i = 0; i < isec->relocs.size(); ++i) { + Reloc &r = isec->relocs[i]; assert(target->hasAttr(r.type, RelocAttrBits::UNSIGNED)); if (r.offset % sizeof(CompactUnwindEntry) != offsetof(CompactUnwindEntry, personality)) continue; + Reloc &rFunc = isec->relocs[++i]; + assert(r.offset == + rFunc.offset + offsetof(CompactUnwindEntry, personality)); + if (const auto *fIsec = rFunc.referent.get()) { + const auto *fSym = fIsec->defined; + log("ICF sec personality " + (fSym ? fSym->getName() : "???")); + funcsWithPersonality.insert(fIsec); + } + if (auto *s = r.referent.dyn_cast()) { if (auto *undefined = dyn_cast(s)) { treatUndefinedSymbol(*undefined); diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -860,6 +860,32 @@ } } + ConcatOutputSection *textSection = concatOutputSections.lookup( + maybeRenameSection({segment_names::text, section_names::text})); + if (config->icfLevel != ICFLevel::none && textSection) { + // ICF operates within a single output section comprising many + // input sections, i.e., within a ConcatOutputSection. The ICF + // algorithm relies on pre-computed hashes of InputSection::data + // for the ConcatOutputSection::inputs and all sections referenced + // by their relocs. We could recursively traverse the relocs to + // find every referenced InputSection, but that precludes easy + // parallelization. Therefore, we hash every InputSection here + // where we have them all accessible as a simple vector. + std::vector hashable; + // If an InputSection is ineligible for ICF, we give it a unique + // ID to force it into its own unfoldable equivalence class. + uint64_t icfInvalidId = 0; + for (InputSection *isec : inputSections) { + if (isec->parent == textSection && isec->isEligibleForICF()) + hashable.push_back(isec); + else + isec->icfEquivalenceClass[0] = ++icfInvalidId; + } + parallelForEach(hashable, [](InputSection *isec) { isec->hashForICF(); }); + // FIXME: should we fold more than __TEXT,__text ? + textSection->foldIdenticalSections(); + } + // dyld requires __LINKEDIT segment to always exist (even if empty). linkEditSegment = getOrCreateOutputSegment(segment_names::linkEdit); } diff --git a/lld/test/MachO/icf.s b/lld/test/MachO/icf.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/icf.s @@ -0,0 +1,62 @@ +# REQUIRES: x86 +# RUN: rm -rf %t; mkdir %t + +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/main.o +# RUN: %lld -lSystem -icf all -o %t/main %t/main.o +# RUN: llvm-objdump -d --syms %t/main | FileCheck %s + +## Check that _a and _b fold into a single function, as evidenced by +## sharing a text address, and that _c does not fold, as evidenced +## by its appearance in the disassembly. + +# CHECK-LABEL: SYMBOL TABLE: +# CHECK: [[#%x,AB:]] g F __TEXT,__text _a +# CHECK: [[#%x,AB]] g F __TEXT,__text _b +# CHECK: [[#%x,C:]] g F __TEXT,__text _c +# CHECK: [[#%x,D:]] g F __TEXT,__text _d + +# CHECK-LABEL: Disassembly of section __TEXT,__text: +# CHECK-NOT: [[#]] <_a>: +# CHECK: [[#%x,AB]] <_b>: +# CHECK: [[#%x,C]] <_c>: +# CHECK: [[#%x,D]] <_d>: + +.subsections_via_symbols +.text + +.globl _a +.p2align 2 +_a: + movq 8(%rip), %rsi + callq _d + ret + +.globl _b +.p2align 2 +_b: + movq 8(%rip), %rsi + callq _d + retq + +.globl _c +.p2align 2 +_c: + movq 4(%rip), %rsi + callq _d + ret + +.globl _d +.p2align 2 +_d: + movq 8(%rip), %rsi + callq _d + ret + +.globl _main +.p2align 2 +_main: + callq _a + callq _b + callq _c + callq _d + ret