diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -1915,6 +1915,8 @@ if (config->deadStrip) markLive(); + objc::checkCategories(); + // ICF assumes that all literals have been folded already, so we must run // foldIdenticalLiterals before foldIdenticalSections. foldIdenticalLiterals(); diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -1271,13 +1271,10 @@ } } if (personalityAddrOff != 0) { - auto personalityRelocIt = - llvm::find_if(isec->relocs, [=](const macho::Reloc &r) { - return r.offset == personalityAddrOff; - }); - if (personalityRelocIt == isec->relocs.end()) + const auto *personalityReloc = isec->getRelocAt(personalityAddrOff); + if (!personalityReloc) reader.failOn(off, "Failed to locate relocation for personality symbol"); - cie.personalitySymbol = personalityRelocIt->referent.get(); + cie.personalitySymbol = personalityReloc->referent.get(); } return cie; } diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -55,6 +55,8 @@ // Return the source line corresponding to an address, or the empty string. // Format: Source.cpp:123 (/path/to/Source.cpp:123) std::string getSourceLocation(uint64_t off) const; + // Return the relocation at \p off, if it exists. This does a linear search. + const Reloc *getRelocAt(uint32_t off) const; // Whether the data at \p off in this InputSection is live. virtual bool isLive(uint64_t off) const = 0; virtual void markLive(uint64_t off) = 0; @@ -218,6 +220,10 @@ return toStringRef(data.slice(begin, end - begin)); } + StringRef getStringRefAtOffset(uint64_t off) const { + return getStringRef(getStringPieceIndex(off)); + } + // Returns i'th piece as a CachedHashStringRef. This function is very hot when // string merging is enabled, so we want to inline. LLVM_ATTRIBUTE_ALWAYS_INLINE @@ -232,6 +238,9 @@ bool deduplicateLiterals = false; std::vector pieces; + +private: + size_t getStringPieceIndex(uint64_t off) const; }; class WordLiteralInputSection final : public InputSection { diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -135,6 +135,14 @@ return {}; } +const Reloc *InputSection::getRelocAt(uint32_t off) const { + auto it = llvm::find_if( + relocs, [=](const macho::Reloc &r) { return r.offset == off; }); + if (it == relocs.end()) + return nullptr; + return &*it; +} + void ConcatInputSection::foldIdentical(ConcatInputSection *copy) { align = std::max(align, copy->align); copy->live = false; @@ -259,6 +267,15 @@ return const_cast(this)->getStringPiece(off); } +size_t CStringInputSection::getStringPieceIndex(uint64_t off) const { + if (off >= data.size()) + fatal(toString(this) + ": offset is outside the section"); + + auto it = + partition_point(pieces, [=](StringPiece p) { return p.inSecOff <= off; }); + return std::distance(pieces.begin(), it) - 1; +} + uint64_t CStringInputSection::getOffset(uint64_t off) const { const StringPiece &piece = getStringPiece(off); uint64_t addend = off - piece.inSecOff; diff --git a/lld/MachO/Layout.h b/lld/MachO/Layout.h new file mode 100644 --- /dev/null +++ b/lld/MachO/Layout.h @@ -0,0 +1,53 @@ +//===- Layout.h -----------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Convenience macros for obtaining offsets of members in structs. +// +// Usage: +// +// #define FOR_EACH_FOO_FIELD(DO) \ +// DO(Ptr, bar) \ +// DO(uint32_t, baz) \ +// CREATE_LAYOUT_CLASS(Foo, FOR_EACH_FOO_FIELD) +// #undef FOR_EACH_FOO_FIELD +// +// This will generate the equivalent of +// +// struct FooLayout { +// uint32_t barOffset = 0; +// uint32_t bazOffset = sizeof(uintptr_t); +// uint32_t totalSize = sizeof(uintptr_t) + sizeof(uint32_t); +// }; + +#define _OFFSET_FOR_FIELD(type, name) uint32_t name##Offset; +#define _INIT_OFFSET(type, name) name##Offset = offsetof(Layout, name); +#define _LAYOUT_ENTRY(type, name) type name; + +#define CREATE_LAYOUT_CLASS(className, FOR_EACH_FIELD) \ + struct className##Layout { \ + FOR_EACH_FIELD(_OFFSET_FOR_FIELD) \ + uint32_t totalSize; \ + \ + className##Layout(size_t wordSize) { \ + if (wordSize == 8) \ + init(); \ + else { \ + assert(wordSize == 4); \ + init(); \ + } \ + } \ + \ + private: \ + template void init() { \ + FOR_EACH_FIELD(_INIT_OFFSET); \ + totalSize = sizeof(Layout); \ + } \ + template struct Layout { \ + FOR_EACH_FIELD(_LAYOUT_ENTRY) \ + }; \ + } diff --git a/lld/MachO/ObjC.h b/lld/MachO/ObjC.h --- a/lld/MachO/ObjC.h +++ b/lld/MachO/ObjC.h @@ -20,6 +20,9 @@ constexpr const char ehtype[] = "_OBJC_EHTYPE_$_"; constexpr const char ivar[] = "_OBJC_IVAR_$_"; +// Check for duplicate method names within related categories / classes. +void checkCategories(); + } // namespace objc bool hasObjCSection(llvm::MemoryBufferRef); diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp --- a/lld/MachO/ObjC.cpp +++ b/lld/MachO/ObjC.cpp @@ -9,10 +9,12 @@ #include "ObjC.h" #include "InputFiles.h" #include "InputSection.h" +#include "Layout.h" #include "OutputSegment.h" #include "Target.h" #include "lld/Common/ErrorHandler.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/Bitcode/BitcodeReader.h" @@ -66,3 +68,226 @@ return false; } } + +namespace { + +#define FOR_EACH_CATEGORY_FIELD(DO) \ + DO(Ptr, name) \ + DO(Ptr, klass) \ + DO(Ptr, instanceMethods) \ + DO(Ptr, classMethods) \ + DO(Ptr, protocols) \ + DO(Ptr, instanceProps) \ + DO(Ptr, classProps) + +CREATE_LAYOUT_CLASS(Category, FOR_EACH_CATEGORY_FIELD); + +#undef FOR_EACH_CATEGORY_FIELD + +#define FOR_EACH_CLASS_FIELD(DO) \ + DO(Ptr, metaClass) \ + DO(Ptr, superClass) \ + DO(Ptr, methodCache) \ + DO(Ptr, vtable) \ + DO(Ptr, roData) + +CREATE_LAYOUT_CLASS(Class, FOR_EACH_CLASS_FIELD); + +#undef FOR_EACH_CLASS_FIELD + +#define FOR_EACH_RO_CLASS_FIELD(DO) \ + DO(uint32_t, flags) \ + DO(uint32_t, instanceStart) \ + DO(Ptr, instanceSize) \ + DO(Ptr, ivarLayout) \ + DO(Ptr, name) \ + DO(Ptr, baseMethods) \ + DO(Ptr, baseProtocols) \ + DO(Ptr, ivars) \ + DO(Ptr, weakIvarLayout) \ + DO(Ptr, baseProperties) + +CREATE_LAYOUT_CLASS(ROClass, FOR_EACH_RO_CLASS_FIELD); + +#undef FOR_EACH_RO_CLASS_FIELD + +#define FOR_EACH_LIST_HEADER(DO) \ + DO(uint32_t, size) \ + DO(uint32_t, count) + +CREATE_LAYOUT_CLASS(ListHeader, FOR_EACH_LIST_HEADER); + +#undef FOR_EACH_LIST_HEADER + +#define FOR_EACH_METHOD(DO) \ + DO(Ptr, name) \ + DO(Ptr, type) \ + DO(Ptr, impl) + +CREATE_LAYOUT_CLASS(Method, FOR_EACH_METHOD); + +#undef FOR_EACH_METHOD + +} + +enum MethodContainerKind { + MCK_Class, + MCK_Category, +}; + +struct MethodContainer { + MethodContainerKind kind; + const ConcatInputSection *isec; +}; + +enum MethodKind { + MK_Instance, + MK_Static, +}; + +struct ObjcClass { + DenseMap instanceMethods; + DenseMap classMethods; +}; + +class ObjcCategoryChecker { +public: + ObjcCategoryChecker(); + void parseCategory(const ConcatInputSection *catListIsec); + +private: + void parseClass(const Defined *classSym); + void parseMethods(const ConcatInputSection *methodsIsec, + const Symbol *methodContainer, + const ConcatInputSection *containerIsec, + MethodContainerKind, MethodKind); + + CategoryLayout catLayout; + ClassLayout classLayout; + ROClassLayout roClassLayout; + ListHeaderLayout listHeaderLayout; + MethodLayout methodLayout; + + DenseMap classMap; +}; + +ObjcCategoryChecker::ObjcCategoryChecker() + : catLayout(target->wordSize), classLayout(target->wordSize), + roClassLayout(target->wordSize), listHeaderLayout(target->wordSize), + methodLayout(target->wordSize) {} + +// \p r must point to an offset within a cstring section. +static StringRef getReferentString(const Reloc &r) { + return cast(r.referent.get()) + ->getStringRefAtOffset(r.addend); +} + +void ObjcCategoryChecker::parseMethods(const ConcatInputSection *methodsIsec, + const Symbol *methodContainerSym, + const ConcatInputSection *containerIsec, + MethodContainerKind mcKind, + MethodKind mKind) { + auto &klass = classMap[methodContainerSym]; + for (const Reloc &r : methodsIsec->relocs) { + if ((r.offset - listHeaderLayout.totalSize) % methodLayout.totalSize != + methodLayout.nameOffset) + continue; + + CachedHashStringRef s(getReferentString(r)); + auto &methodMap = + mKind == MK_Instance ? klass.instanceMethods : klass.classMethods; + if (!methodMap.count(s)) { + methodMap[s] = {mcKind, containerIsec}; + continue; + } + + // We have a duplicate; generate a warning message. + const auto &mc = methodMap.lookup(s); + const Reloc *nameReloc = nullptr; + if (mc.kind == MCK_Category) { + nameReloc = mc.isec->getRelocAt(catLayout.nameOffset); + } else { + assert(mc.kind == MCK_Class); + auto *roIsec = mc.isec->getRelocAt(classLayout.roDataOffset) + ->getReferentInputSection(); + nameReloc = roIsec->getRelocAt(roClassLayout.nameOffset); + } + StringRef containerName = getReferentString(*nameReloc); + StringRef methPrefix = mKind == MK_Instance ? "-" : "+"; + + // We should only ever encounter collisions when parsing category methods + // (since the Class struct is parsed before any of its categories). + assert(mcKind == MCK_Category); + StringRef newCatName = + getReferentString(*containerIsec->getRelocAt(catLayout.nameOffset)); + + StringRef containerType = mc.kind == MCK_Category ? "category" : "class"; + warn("method '" + methPrefix + s.val() + + "' has conflicting definitions:\n>>> defined in category " + + newCatName + " from " + toString(containerIsec->getFile()) + + "\n>>> defined in " + containerType + " " + containerName + " from " + + toString(mc.isec->getFile())); + } +} + +void ObjcCategoryChecker::parseCategory(const ConcatInputSection *catIsec) { + auto *classReloc = catIsec->getRelocAt(catLayout.klassOffset); + if (!classReloc) + return; + + auto *classSym = classReloc->referent.get(); + if (auto *d = dyn_cast(classSym)) + if (!classMap.count(d)) + parseClass(d); + + if (const auto *r = catIsec->getRelocAt(catLayout.classMethodsOffset)) { + parseMethods(cast(r->getReferentInputSection()), + classSym, catIsec, MCK_Category, MK_Static); + } + + if (const auto *r = catIsec->getRelocAt(catLayout.instanceMethodsOffset)) { + parseMethods(cast(r->getReferentInputSection()), + classSym, catIsec, MCK_Category, MK_Instance); + } +} + +void ObjcCategoryChecker::parseClass(const Defined *classSym) { + // Given a Class struct, get its corresponding Methods struct + auto getMethodsIsec = + [&](const InputSection *classIsec) -> ConcatInputSection * { + if (const auto *r = classIsec->getRelocAt(classLayout.roDataOffset)) { + const auto *roIsec = + cast(r->getReferentInputSection()); + if (const auto *r = roIsec->getRelocAt(roClassLayout.baseMethodsOffset)) { + if (auto *methodsIsec = + cast_or_null(r->getReferentInputSection())) + return methodsIsec; + } + } + return nullptr; + }; + + auto *classIsec = cast(classSym->isec); + + // Parse instance methods. + if (auto *instanceMethodsIsec = getMethodsIsec(classIsec)) + parseMethods(instanceMethodsIsec, classSym, classIsec, MCK_Class, + MK_Instance); + + // Class methods are contained in the metaclass. + if (const auto *r = classSym->isec->getRelocAt(classLayout.metaClassOffset)) + if (auto *classMethodsIsec = getMethodsIsec( + cast(r->getReferentInputSection()))) + parseMethods(classMethodsIsec, classSym, classIsec, MCK_Class, MK_Static); +} + +void objc::checkCategories() { + ObjcCategoryChecker checker; + for (auto *isec : inputSections) { + if (isec->getName() == section_names::objcCatList) + for (const Reloc &r : isec->relocs) { + auto *catIsec = cast(r.getReferentInputSection()); + checker.parseCategory(catIsec); + } + } +} diff --git a/lld/MachO/Relocations.h b/lld/MachO/Relocations.h --- a/lld/MachO/Relocations.h +++ b/lld/MachO/Relocations.h @@ -67,6 +67,8 @@ int64_t addend, llvm::PointerUnion referent) : type(type), pcrel(pcrel), length(length), offset(offset), addend(addend), referent(referent) {} + + InputSection *getReferentInputSection() const; }; bool validateSymbolRelocation(const Symbol *, const InputSection *, diff --git a/lld/MachO/Relocations.cpp b/lld/MachO/Relocations.cpp --- a/lld/MachO/Relocations.cpp +++ b/lld/MachO/Relocations.cpp @@ -21,6 +21,16 @@ static_assert(sizeof(void *) != 8 || sizeof(Reloc) == 24, "Try to minimize Reloc's size; we create many instances"); +InputSection *Reloc::getReferentInputSection() const { + if (const auto *sym = referent.dyn_cast()) { + if (const auto *d = dyn_cast(sym)) + return d->isec; + return nullptr; + } else { + return referent.get(); + } +} + bool macho::validateSymbolRelocation(const Symbol *sym, const InputSection *isec, const Reloc &r) { const RelocAttrs &relocAttrs = target->getRelocAttrs(r.type); diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp --- a/lld/MachO/UnwindInfoSection.cpp +++ b/lld/MachO/UnwindInfoSection.cpp @@ -8,6 +8,7 @@ #include "UnwindInfoSection.h" #include "InputSection.h" +#include "Layout.h" #include "OutputSection.h" #include "OutputSegment.h" #include "SymbolTable.h" @@ -88,41 +89,18 @@ // TODO(gkm): how do we align the 2nd-level pages? -// The offsets of various fields in the on-disk representation of each compact -// unwind entry. -struct CompactUnwindOffsets { - uint32_t functionAddress; - uint32_t functionLength; - uint32_t encoding; - uint32_t personality; - uint32_t lsda; - - CompactUnwindOffsets(size_t wordSize) { - if (wordSize == 8) - init(); - else { - assert(wordSize == 4); - init(); - } - } +// The various fields in the on-disk representation of each compact unwind +// entry. +#define FOR_EACH_CU_FIELD(DO) \ + DO(Ptr, functionAddress) \ + DO(uint32_t, functionLength) \ + DO(compact_unwind_encoding_t, encoding) \ + DO(Ptr, personality) \ + DO(Ptr, lsda) -private: - template void init() { - functionAddress = offsetof(Layout, functionAddress); - functionLength = offsetof(Layout, functionLength); - encoding = offsetof(Layout, encoding); - personality = offsetof(Layout, personality); - lsda = offsetof(Layout, lsda); - } +CREATE_LAYOUT_CLASS(CompactUnwind, FOR_EACH_CU_FIELD); - template struct Layout { - Ptr functionAddress; - uint32_t functionLength; - compact_unwind_encoding_t encoding; - Ptr personality; - Ptr lsda; - }; -}; +#undef FOR_EACH_CU_FIELD // LLD's internal representation of a compact unwind entry. struct CompactUnwindEntry { @@ -148,7 +126,7 @@ // lengthy definition of UnwindInfoSection. class UnwindInfoSectionImpl final : public UnwindInfoSection { public: - UnwindInfoSectionImpl() : cuOffsets(target->wordSize) {} + UnwindInfoSectionImpl() : cuLayout(target->wordSize) {} uint64_t getSize() const override { return unwindInfoSize; } void prepare() override; void finalize() override; @@ -162,7 +140,7 @@ uint64_t unwindInfoSize = 0; std::vector symbolsVec; - CompactUnwindOffsets cuOffsets; + CompactUnwindLayout cuLayout; std::vector> commonEncodings; EncodingMap commonEncodingIndexes; // The entries here will be in the same order as their originating symbols @@ -261,7 +239,7 @@ // compact unwind entries that references them, and thus appear as section // relocs. There is no need to prepare them. We only prepare relocs for // personality functions. - if (r.offset != cuOffsets.personality) + if (r.offset != cuLayout.personalityOffset) continue; if (auto *s = r.referent.dyn_cast()) { @@ -373,17 +351,13 @@ auto buf = reinterpret_cast(d->unwindEntry->data.data()) - target->wordSize; cu.functionLength = - support::endian::read32le(buf + cuOffsets.functionLength); - cu.encoding = support::endian::read32le(buf + cuOffsets.encoding); + support::endian::read32le(buf + cuLayout.functionLengthOffset); + cu.encoding = support::endian::read32le(buf + cuLayout.encodingOffset); for (const Reloc &r : d->unwindEntry->relocs) { - if (r.offset == cuOffsets.personality) { + if (r.offset == cuLayout.personalityOffset) cu.personality = r.referent.get(); - } else if (r.offset == cuOffsets.lsda) { - if (auto *referentSym = r.referent.dyn_cast()) - cu.lsda = cast(referentSym)->isec; - else - cu.lsda = r.referent.get(); - } + else if (r.offset == cuLayout.lsdaOffset) + cu.lsda = r.getReferentInputSection(); } }); } diff --git a/lld/test/MachO/lit.local.cfg b/lld/test/MachO/lit.local.cfg --- a/lld/test/MachO/lit.local.cfg +++ b/lld/test/MachO/lit.local.cfg @@ -24,3 +24,5 @@ config.substitutions.append(('%lld', lld + ' -lSystem -fatal_warnings')) config.substitutions.append(('%no-lsystem-lld', lld + ' -fatal_warnings')) config.substitutions.append(('%no-fatal-warnings-lld', lld + ' -lSystem')) + +config.suffixes.add('.m') diff --git a/lld/test/MachO/objc-category-conflicts.m b/lld/test/MachO/objc-category-conflicts.m new file mode 100644 --- /dev/null +++ b/lld/test/MachO/objc-category-conflicts.m @@ -0,0 +1,75 @@ +# REQUIRES: x86 +# RUN: rm -rf %t; split-file %s %t +# COM: /usr/bin/clang -S %t/cat1.m -o %t/cat1.s +# COM: /usr/bin/clang -S %t/cat2.m -o %t/cat2.s +# COM: /usr/bin/clang -S %t/klass.m -o %t/klass.s +# RUN: /usr/bin/clang -c %t/cat1.m -o %t/cat1.o +# RUN: /usr/bin/clang -c %t/cat2.m -o %t/cat2.o +# RUN: /usr/bin/clang -c %t/klass.m -o %t/klass.o + +# COM: ld -dylib -lSystem -syslibroot $(xcrun -show-sdk-path) -framework Foundation \ +# COM: %t/klass.o -o %t/libklass.dylib -arch x86_64 -platform_version macos 13.0 13.0 + +# COM: ld -dylib -lSystem -syslibroot $(xcrun -show-sdk-path) -framework Foundation \ +# COM: %t/libklass.dylib %t/cat1.o %t/cat2.o -o %t/out -arch x86_64 -platform_version macos 13.0 13.0 + +# RUN: ld -dylib -lSystem -syslibroot $(xcrun -show-sdk-path) -framework Foundation \ +# RUN: %t/klass.o %t/cat1.o %t/cat2.o -o %t/out -arch x86_64 -platform_version macos 13.0 13.0 + +# RUN: ld64.lld -dylib -lSystem -syslibroot $(xcrun -show-sdk-path) -framework Foundation \ +# RUN: %t/klass.o %t/cat1.o %t/cat2.o -o %t/out -arch x86_64 -platform_version macos 13.0 13.0 + +#--- klass.h + +#import + +@interface Foo : NSObject +-(void) m1; ++(void) s1; +@end + +#--- klass.m + +#import "klass.h" + +@implementation Foo +-(void) m1 {} ++(void) s1 {} +@end + +#--- cat1.m + +#import +#import "klass.h" + +@interface Foo(Cat1) +-(void) m1; +-(void) m2; + ++(void) s1; ++(void) s2; +@end + +@implementation Foo(Cat1) +-(void) m1 {} +-(void) m2 {} + ++(void) s1 {} ++(void) s2 {} +@end + +#--- cat2.m + +#import +#import "klass.h" + +@interface Foo(Cat2) +-(void) m2; + ++(void) s2; +@end + +@implementation Foo(Cat2) +-(void) m2 {} ++(void) s2 {} +@end