diff --git a/lld/MachO/ConcatOutputSection.h b/lld/MachO/ConcatOutputSection.h --- a/lld/MachO/ConcatOutputSection.h +++ b/lld/MachO/ConcatOutputSection.h @@ -31,6 +31,7 @@ const ConcatInputSection *firstSection() const { return inputs.front(); } const ConcatInputSection *lastSection() const { return inputs.back(); } + bool isNeeded() const override { return !inputs.empty(); } // These accessors will only be valid after finalizing the section uint64_t getSize() const override { return size; } @@ -50,6 +51,8 @@ return sec->kind() == ConcatKind; } + static ConcatOutputSection *getOrCreateForInput(const InputSection *); + private: void finalizeFlags(InputSection *input); @@ -79,6 +82,12 @@ uint8_t sequence = 0; // how many thunks created so-far? }; +NamePair maybeRenameSection(NamePair key); + +// Output sections are added to output segments in iteration order +// of ConcatOutputSection, so must have deterministic iteration order. +extern llvm::MapVector concatOutputSections; + extern llvm::DenseMap thunkMap; } // namespace macho diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp --- a/lld/MachO/ConcatOutputSection.cpp +++ b/lld/MachO/ConcatOutputSection.cpp @@ -24,7 +24,10 @@ using namespace lld; using namespace lld::macho; +MapVector macho::concatOutputSections; + void ConcatOutputSection::addInput(ConcatInputSection *input) { + assert(input->parent == this); if (inputs.empty()) { align = input->align; flags = input->getFlags(); @@ -33,7 +36,6 @@ finalizeFlags(input); } inputs.push_back(input); - input->parent = this; } // Branch-range extension can be implemented in two ways, either through ... @@ -357,3 +359,22 @@ break; } } + +ConcatOutputSection * +ConcatOutputSection::getOrCreateForInput(const InputSection *isec) { + NamePair names = maybeRenameSection({isec->getSegName(), isec->getName()}); + ConcatOutputSection *&osec = concatOutputSections[names]; + if (!osec) + osec = make(names.second); + return osec; +} + +NamePair macho::maybeRenameSection(NamePair key) { + auto newNames = config->sectionRenameMap.find(key); + if (newNames != config->sectionRenameMap.end()) + return newNames->second; + auto newName = config->segmentRenameMap.find(key.first); + if (newName != config->segmentRenameMap.end()) + return std::make_pair(newName->second, key.second); + return key; +} diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -577,6 +577,7 @@ // any CommonSymbols. static void replaceCommonSymbols() { TimeTraceScope timeScope("Replace common symbols"); + ConcatOutputSection *osec = nullptr; for (Symbol *sym : symtab->getSymbols()) { auto *common = dyn_cast(sym); if (common == nullptr) @@ -589,6 +590,9 @@ auto *isec = make( segment_names::data, section_names::common, common->getFile(), data, common->align, S_ZEROFILL); + if (!osec) + osec = ConcatOutputSection::getOrCreateForInput(isec); + isec->parent = osec; inputSections.push_back(isec); // FIXME: CommonSymbol should store isReferencedDynamically, noDeadStrip @@ -1037,6 +1041,7 @@ int inputOrder = 0; for (const InputFile *file : inputFiles) { for (const SubsectionMap &map : file->subsections) { + ConcatOutputSection *osec = nullptr; for (const SubsectionEntry &entry : map) { if (auto *isec = dyn_cast(entry.isec)) { if (isec->isCoalescedWeak()) @@ -1047,6 +1052,9 @@ continue; } isec->outSecOff = inputOrder++; + if (!osec) + osec = ConcatOutputSection::getOrCreateForInput(isec); + isec->parent = osec; inputSections.push_back(isec); } else if (auto *isec = dyn_cast(entry.isec)) { if (in.cStringSection->inputOrder == UnspecifiedInputOrder) diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp --- a/lld/MachO/ICF.cpp +++ b/lld/MachO/ICF.cpp @@ -52,22 +52,18 @@ // // Summary of segments & sections: // -// Since folding never occurs across output-section boundaries, -// ConcatOutputSection is the natural input for ICF. -// // The __TEXT segment is readonly at the MMU. Some sections are already // deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal*) and some are // synthetic and inherently free of duplicates (__TEXT,__stubs & -// __TEXT,__unwind_info). We only run ICF on __TEXT,__text. One might hope ICF -// could work on __TEXT,__concat, but doing so induces many test failures. +// __TEXT,__unwind_info). Note that we don't yet run ICF on __TEXT,__const, +// because doing so induces many test failures. // // The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and // thus ineligible for ICF. // // The __DATA_CONST segment is read/write at the MMU, but is logically const to -// the application after dyld applies fixups to pointer data. Some sections are -// deduplicated elsewhere (__DATA_CONST,__cfstring), and some are synthetic -// (__DATA_CONST,__got). There are no ICF opportunities here. +// the application after dyld applies fixups to pointer data. We currently +// fold only the __DATA_CONST,__cfstring section. // // The __DATA segment is read/write at the MMU, and as application-writeable // data, none of its sections are eligible for ICF. @@ -84,12 +80,13 @@ // Compare everything except the relocation referents static bool equalsConstant(const ConcatInputSection *ia, const ConcatInputSection *ib) { + // We can only fold within the same OutputSection. + if (ia->parent != ib->parent) + return false; if (ia->data.size() != ib->data.size()) return false; if (ia->data != ib->data) return false; - if (ia->getFlags() != ib->getFlags()) - return false; if (ia->relocs.size() != ib->relocs.size()) return false; auto f = [&](const Reloc &ra, const Reloc &rb) { @@ -323,8 +320,6 @@ // relocs to find every referenced InputSection, but that precludes easy // parallelization. Therefore, we hash every InputSection here where we have // them all accessible as simple vectors. - std::vector codeSections; - std::vector cfStringSections; // ICF can't fold functions with unwind info DenseSet functionsWithUnwindInfo = @@ -338,32 +333,22 @@ // coexist with equivalence-class IDs, this is not necessary, but might help // someone keep the numbers straight in case we ever need to debug the // ICF::segregate() + std::vector hashable; uint64_t icfUniqueID = inputSections.size(); for (ConcatInputSection *isec : inputSections) { + // FIXME: consider non-code __text sections as hashable? bool isHashable = (isCodeSection(isec) || isCfStringSection(isec)) && !isec->shouldOmitFromOutput() && !functionsWithUnwindInfo.contains(isec) && isec->isHashableForICF(); - if (isHashable) { - if (isCodeSection(isec)) - codeSections.push_back(isec); - else { - assert(isCfStringSection(isec)); - cfStringSections.push_back(isec); - } - } else { + if (isHashable) + hashable.push_back(isec); + else isec->icfEqClass[0] = ++icfUniqueID; - } } - std::vector hashable(codeSections); - hashable.insert(hashable.end(), cfStringSections.begin(), - cfStringSections.end()); parallelForEach(hashable, [](ConcatInputSection *isec) { isec->hashForICF(); }); // Now that every input section is either hashed or marked as unique, run the // segregation algorithm to detect foldable subsections. - // We dedup cfStringSections first since code sections may refer to them, but - // not vice-versa. - ICF(cfStringSections).run(); - ICF(codeSections).run(); + ICF(hashable).run(); } diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "InputSection.h" +#include "ConcatOutputSection.h" #include "Config.h" #include "InputFiles.h" #include "OutputSegment.h" diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -15,6 +15,7 @@ #include "OutputSection.h" #include "OutputSegment.h" #include "Target.h" +#include "Writer.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Hashing.h" diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -15,7 +15,6 @@ #include "OutputSegment.h" #include "SymbolTable.h" #include "Symbols.h" -#include "Writer.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" @@ -596,6 +595,8 @@ in.got->addEntry(stubBinder); + in.imageLoaderCache->parent = + ConcatOutputSection::getOrCreateForInput(in.imageLoaderCache); inputSections.push_back(in.imageLoaderCache); // Since this isn't in the symbol table or in any input file, the noDeadStrip // argument doesn't matter. It's kept alive by ImageLoaderCacheSection() diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp --- a/lld/MachO/UnwindInfoSection.cpp +++ b/lld/MachO/UnwindInfoSection.cpp @@ -144,6 +144,7 @@ void UnwindInfoSectionImpl::addInput(ConcatInputSection *isec) { assert(isec->getSegName() == segment_names::ld && isec->getName() == section_names::compactUnwind); + isec->parent = compactUnwindSection; compactUnwindSection->addInput(isec); } diff --git a/lld/MachO/Writer.h b/lld/MachO/Writer.h --- a/lld/MachO/Writer.h +++ b/lld/MachO/Writer.h @@ -9,8 +9,6 @@ #ifndef LLD_MACHO_WRITER_H #define LLD_MACHO_WRITER_H -#include "Config.h" - #include namespace lld { @@ -29,8 +27,6 @@ template void writeResult(); -NamePair maybeRenameSection(NamePair key); - void createSyntheticSections(); // Add bindings for symbols that need weak or non-lazy bindings. diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -76,10 +76,6 @@ LCUuid *uuidCommand = nullptr; OutputSegment *linkEditSegment = nullptr; - - // Output sections are added to output segments in iteration order - // of ConcatOutputSection, so must have deterministic iteration order. - MapVector concatOutputSections; }; // LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information. @@ -879,16 +875,6 @@ } } -NamePair macho::maybeRenameSection(NamePair key) { - auto newNames = config->sectionRenameMap.find(key); - if (newNames != config->sectionRenameMap.end()) - return newNames->second; - auto newName = config->segmentRenameMap.find(key.first); - if (newName != config->segmentRenameMap.end()) - return std::make_pair(newName->second, key.second); - return key; -} - template void Writer::createOutputSections() { TimeTraceScope timeScope("Create output sections"); // First, create hidden sections @@ -919,10 +905,7 @@ for (ConcatInputSection *isec : inputSections) { if (isec->shouldOmitFromOutput()) continue; - NamePair names = maybeRenameSection({isec->getSegName(), isec->getName()}); - ConcatOutputSection *&osec = concatOutputSections[names]; - if (!osec) - osec = make(names.second); + ConcatOutputSection *osec = cast(isec->parent); osec->addInput(isec); osec->inputOrder = std::min(osec->inputOrder, static_cast(isec->outSecOff)); @@ -934,7 +917,8 @@ StringRef segname = it.first.first; ConcatOutputSection *osec = it.second; assert(segname != segment_names::ld); - getOrCreateOutputSegment(segname)->addOutputSection(osec); + if (osec->isNeeded()) + getOrCreateOutputSegment(segname)->addOutputSection(osec); } for (SyntheticSection *ssec : syntheticSections) { diff --git a/lld/test/MachO/icf.s b/lld/test/MachO/icf.s --- a/lld/test/MachO/icf.s +++ b/lld/test/MachO/icf.s @@ -25,15 +25,17 @@ # CHECK: [[#%x,SR]] g F __TEXT,__text _sr2 # CHECK: [[#%x,MR:]] g F __TEXT,__text _mr1 # CHECK: [[#%x,MR]] g F __TEXT,__text _mr2 +# CHECK: [[#%x,K1:]] g O __TEXT,__foo _k1 +# CHECK: [[#%x,A:]] g F __TEXT,__text _k2 ### FIXME: Mutually-recursive functions with identical bodies (see below) # COM: [[#%x,XR:]] g F __TEXT,__text _xr1 # COM: [[#%x,XR]] g F __TEXT,__text _xr2 # CHECK-LABEL: Disassembly of section __TEXT,__text: # CHECK: [[#%x,MAIN]] <_main>: -# CHECK-NEXT: callq 0x[[#%x,A]] <_a3> -# CHECK-NEXT: callq 0x[[#%x,A]] <_a3> -# CHECK-NEXT: callq 0x[[#%x,A]] <_a3> +# CHECK-NEXT: callq 0x[[#%x,A]] <_k2> +# CHECK-NEXT: callq 0x[[#%x,A]] <_k2> +# CHECK-NEXT: callq 0x[[#%x,A]] <_k2> # CHECK-NEXT: callq 0x[[#%x,B]] <_b> # CHECK-NEXT: callq 0x[[#%x,B2]] <_b2> # CHECK-NEXT: callq 0x[[#%x,C]] <_c> @@ -44,6 +46,8 @@ # CHECK-NEXT: callq 0x[[#%x,H]] <_h> # CHECK-NEXT: callq 0x[[#%x,I]] <_i> # CHECK-NEXT: callq 0x[[#%x,J]] <_j> +# CHECK-NEXT: callq 0x[[#%x,K1]] <_k1> +# CHECK-NEXT: callq 0x[[#%x,A]] <_k2> # CHECK-NEXT: callq 0x[[#%x,SR]] <_sr2> # CHECK-NEXT: callq 0x[[#%x,SR]] <_sr2> # CHECK-NEXT: callq 0x[[#%x,MR]] <_mr2> @@ -232,8 +236,38 @@ ret .cfi_endproc +### No fold: _k1 is in a different section from _a1 +.section __TEXT,__foo +.globl _k1 +.p2align 2 +_k1: + callq _d + mov ___nan@GOTPCREL(%rip), %rax + callq ___isnan + movabs $_abs1a, %rdx + movl $0, %eax + ret + nopl (%rax) + +### Fold: _k2 is in a section that gets renamed and output as __text +.section __TEXT,__StaticInit +.globl _k2 +.p2align 2 +_k2: + callq _d + mov ___nan@GOTPCREL(%rip), %rax + callq ___isnan + movabs $_abs1a, %rdx + movl $0, %eax + ret +## For some reason, llvm-mc generates different nop encodings when adding +## padding for __StaticInit vs __text functions. So we explicitly specify the +## nop here to make sure this function can be folded with _a1. + nopl (%rax) + ### Fold: Simple recursion +.text .globl _sr1 .p2align 2 _sr1: @@ -311,6 +345,8 @@ callq _h callq _i callq _j + callq _k1 + callq _k2 callq _sr1 callq _sr2 callq _mr1