Index: wasm/InputChunks.h =================================================================== --- wasm/InputChunks.h +++ wasm/InputChunks.h @@ -130,6 +130,8 @@ StringRef getName() const override { return Function->SymbolName; } StringRef getDebugName() const override { return Function->DebugName; } uint32_t getComdat() const override { return Function->Comdat; } + const ArrayRef getFunctionBody() const { return Function->Body; } + uint32_t getFunctionInputOffset() const { return getInputSectionOffset(); } uint32_t getFunctionIndex() const { return FunctionIndex.getValue(); } bool hasFunctionIndex() const { return FunctionIndex.hasValue(); } void setFunctionIndex(uint32_t Index); Index: wasm/InputChunks.cpp =================================================================== --- wasm/InputChunks.cpp +++ wasm/InputChunks.cpp @@ -89,6 +89,8 @@ break; case R_WEBASSEMBLY_TABLE_INDEX_I32: case R_WEBASSEMBLY_MEMORY_ADDR_I32: + case R_WEBASSEMBLY_FUNCTION_OFFSET_I32: + case R_WEBASSEMBLY_SECTION_OFFSET_I32: ExistingValue = static_cast(read32le(Loc)); write32le(Loc, Value); break; @@ -124,7 +126,9 @@ case R_WEBASSEMBLY_MEMORY_ADDR_LEB: case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: case R_WEBASSEMBLY_MEMORY_ADDR_I32: - writeSleb128(OS, Rel.Addend, "reloc addend"); + case R_WEBASSEMBLY_FUNCTION_OFFSET_I32: + case R_WEBASSEMBLY_SECTION_OFFSET_I32: + writeSleb128(OS, File->calcNewAddend(Rel), "reloc addend"); break; } } Index: wasm/InputFiles.h =================================================================== --- wasm/InputFiles.h +++ wasm/InputFiles.h @@ -95,6 +95,7 @@ uint32_t calcNewIndex(const WasmRelocation &Reloc) const; uint32_t calcNewValue(const WasmRelocation &Reloc) const; + uint32_t calcNewAddend(const WasmRelocation &Reloc) const; uint32_t calcExpectedValue(const WasmRelocation &Reloc) const; const WasmSection *CodeSection = nullptr; @@ -110,12 +111,14 @@ std::vector Functions; std::vector Globals; std::vector CustomSections; + llvm::DenseMap CustomSectionsByIndex; ArrayRef getSymbols() const { return Symbols; } Symbol *getSymbol(uint32_t Index) const { return Symbols[Index]; } FunctionSymbol *getFunctionSymbol(uint32_t Index) const; DataSymbol *getDataSymbol(uint32_t Index) const; GlobalSymbol *getGlobalSymbol(uint32_t Index) const; + SectionSymbol *getSectionSymbol(uint32_t Index) const; private: Symbol *createDefined(const WasmSymbol &Sym); Index: wasm/InputFiles.cpp =================================================================== --- wasm/InputFiles.cpp +++ wasm/InputFiles.cpp @@ -16,6 +16,7 @@ #include "lld/Common/Memory.h" #include "llvm/Object/Binary.h" #include "llvm/Object/Wasm.h" +#include "llvm/Support/LEB128.h" #include "llvm/Support/raw_ostream.h" #define DEBUG_TYPE "lld" @@ -42,6 +43,12 @@ return MBRef; } +static size_t getFunctionCodeOffset(const ArrayRef FunctionBody) { + unsigned Count; + llvm::decodeULEB128(FunctionBody.data(), &Count); + return Count; +} + void ObjFile::dumpInfo() const { log("info for: " + getName() + "\n Symbols : " + Twine(Symbols.size()) + @@ -60,6 +67,25 @@ return Symbols[Reloc.Index]->getOutputSymbolIndex(); } +// Relocations can contain addend for combined sections. This function takes a +// relocation and returns updated addend by offset in the output section. +uint32_t ObjFile::calcNewAddend(const WasmRelocation &Reloc) const { + switch (Reloc.Type) { + case R_WEBASSEMBLY_MEMORY_ADDR_LEB: + case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: + case R_WEBASSEMBLY_MEMORY_ADDR_I32: + case R_WEBASSEMBLY_FUNCTION_OFFSET_I32: + return Reloc.Addend; + case R_WEBASSEMBLY_SECTION_OFFSET_I32: + if (auto *Sym = dyn_cast(getSectionSymbol(Reloc.Index))) { + return Sym->Section->OutputOffset + Reloc.Addend; + } + return 0; + default: + llvm_unreachable("unexpected relocation type"); + } +} + // Calculate the value we expect to find at the relocation location. // This is used as a sanity check before applying a relocation to a given // location. It is useful for catching bugs in the compiler and linker. @@ -80,6 +106,16 @@ return Segment.Data.Offset.Value.Int32 + Sym.Info.DataRef.Offset + Reloc.Addend; } + case R_WEBASSEMBLY_FUNCTION_OFFSET_I32: + if (auto *Sym = dyn_cast(getFunctionSymbol(Reloc.Index))) { + size_t FunctionCodeOffset = + getFunctionCodeOffset(Sym->Function->getFunctionBody()); + return Sym->Function->getFunctionInputOffset() + FunctionCodeOffset + + Reloc.Addend; + } + return 0; + case R_WEBASSEMBLY_SECTION_OFFSET_I32: + return Reloc.Addend; case R_WEBASSEMBLY_TYPE_INDEX_LEB: return Reloc.Index; case R_WEBASSEMBLY_FUNCTION_INDEX_LEB: @@ -110,6 +146,18 @@ return getFunctionSymbol(Reloc.Index)->getFunctionIndex(); case R_WEBASSEMBLY_GLOBAL_INDEX_LEB: return getGlobalSymbol(Reloc.Index)->getGlobalIndex(); + case R_WEBASSEMBLY_FUNCTION_OFFSET_I32: + if (auto *Sym = dyn_cast(getFunctionSymbol(Reloc.Index))) { + size_t FunctionCodeOffset = + getFunctionCodeOffset(Sym->Function->getFunctionBody()); + return Sym->Function->OutputOffset + FunctionCodeOffset + Reloc.Addend; + } + return 0; + case R_WEBASSEMBLY_SECTION_OFFSET_I32: + if (auto *Sym = dyn_cast(getSectionSymbol(Reloc.Index))) { + return Sym->Section->OutputOffset + Reloc.Addend; + } + return 0; default: llvm_unreachable("unknown relocation type"); } @@ -147,14 +195,19 @@ // Find the code and data sections. Wasm objects can have at most one code // and one data section. + uint32_t SectionIndex = 0; for (const SectionRef &Sec : WasmObj->sections()) { const WasmSection &Section = WasmObj->getWasmSection(Sec); - if (Section.Type == WASM_SEC_CODE) + if (Section.Type == WASM_SEC_CODE) { CodeSection = &Section; - else if (Section.Type == WASM_SEC_DATA) + } else if (Section.Type == WASM_SEC_DATA) { DataSection = &Section; - else if (Section.Type == WASM_SEC_CUSTOM) + } else if (Section.Type == WASM_SEC_CUSTOM) { CustomSections.emplace_back(make(Section, this)); + CustomSections.back()->copyRelocations(Section); + CustomSectionsByIndex[SectionIndex] = CustomSections.back(); + } + SectionIndex++; } TypeMap.resize(getWasmObj()->types().size()); @@ -215,6 +268,10 @@ return cast(Symbols[Index]); } +SectionSymbol *ObjFile::getSectionSymbol(uint32_t Index) const { + return cast(Symbols[Index]); +} + DataSymbol *ObjFile::getDataSymbol(uint32_t Index) const { return cast(Symbols[Index]); } @@ -253,14 +310,20 @@ return make(Name, Flags, this, Seg, Offset, Size); return Symtab->addDefinedData(Name, Flags, this, Seg, Offset, Size); } - case WASM_SYMBOL_TYPE_GLOBAL: + case WASM_SYMBOL_TYPE_GLOBAL: { InputGlobal *Global = Globals[Sym.Info.ElementIndex - WasmObj->getNumImportedGlobals()]; if (Sym.isBindingLocal()) return make(Name, Flags, this, Global); return Symtab->addDefinedGlobal(Name, Flags, this, Global); } - llvm_unreachable("unkown symbol kind"); + case WASM_SYMBOL_TYPE_SECTION: { + InputSection *Section = CustomSectionsByIndex[Sym.Info.ElementIndex]; + assert(Sym.isBindingLocal()); + return make(Name, Flags, Section, this); + } + } + llvm_unreachable("unknown symbol kind"); } Symbol *ObjFile::createUndefined(const WasmSymbol &Sym) { @@ -274,8 +337,10 @@ return Symtab->addUndefinedData(Name, Flags, this); case WASM_SYMBOL_TYPE_GLOBAL: return Symtab->addUndefinedGlobal(Name, Flags, this, Sym.GlobalType); + case WASM_SYMBOL_TYPE_SECTION: + llvm_unreachable("undefined section symbols must be defined"); } - llvm_unreachable("unkown symbol kind"); + llvm_unreachable("unknown symbol kind"); } void ArchiveFile::parse() { Index: wasm/OutputSections.h =================================================================== --- wasm/OutputSections.h +++ wasm/OutputSections.h @@ -127,6 +127,8 @@ return Header.size() + NameData.size() + PayloadSize; } void writeTo(uint8_t *Buf) override; + uint32_t numRelocations() const override; + void writeRelocations(raw_ostream &OS) const override; protected: size_t PayloadSize; Index: wasm/OutputSections.cpp =================================================================== --- wasm/OutputSections.cpp +++ wasm/OutputSections.cpp @@ -224,3 +224,15 @@ parallelForEach(InputSections, [&](const InputSection *Section) { Section->writeTo(Buf); }); } + +uint32_t CustomSection::numRelocations() const { + uint32_t Count = 0; + for (const InputSection *InputSect : InputSections) + Count += InputSect->NumRelocations(); + return Count; +} + +void CustomSection::writeRelocations(raw_ostream &OS) const { + for (const InputSection *S : InputSections) + S->writeRelocations(OS); +} Index: wasm/Symbols.h =================================================================== --- wasm/Symbols.h +++ wasm/Symbols.h @@ -30,6 +30,7 @@ class InputSegment; class InputFunction; class InputGlobal; +class InputSection; #define INVALID_INDEX UINT32_MAX @@ -40,17 +41,21 @@ DefinedFunctionKind, DefinedDataKind, DefinedGlobalKind, + DefinedSectionKind, UndefinedFunctionKind, UndefinedDataKind, UndefinedGlobalKind, LazyKind, + CombinedSectionKind, }; Kind kind() const { return SymbolKind; } bool isDefined() const { return SymbolKind == DefinedFunctionKind || SymbolKind == DefinedDataKind || - SymbolKind == DefinedGlobalKind; + SymbolKind == DefinedGlobalKind || + SymbolKind == DefinedSectionKind || + SymbolKind == CombinedSectionKind; } bool isUndefined() const { @@ -155,6 +160,48 @@ } }; +class SectionSymbol : public Symbol { +public: + static bool classof(const Symbol *S) { + return S->kind() == DefinedSectionKind || S->kind() == CombinedSectionKind; + } + + SectionSymbol(StringRef Name, Kind K, uint32_t Flags, InputFile *F = nullptr) + : Symbol(Name, K, Flags, F) {} +}; + +class DefinedSection : public SectionSymbol { +public: + DefinedSection(StringRef Name, uint32_t Flags, const InputSection *Section, + InputFile *F) + : SectionSymbol(Name, DefinedSectionKind, Flags, F), Section(Section) {} + + static bool classof(const Symbol *S) { + return S->kind() == DefinedSectionKind; + } + + const InputSection *Section; +}; + +class CombinedSection : public SectionSymbol { +public: + CombinedSection(StringRef Name, uint32_t Flags, StringRef SectionName) + : SectionSymbol(Name, CombinedSectionKind, Flags), + SectionName(SectionName) {} + + static bool classof(const Symbol *S) { + return S->kind() == CombinedSectionKind; + } + + StringRef SectionName; + + uint32_t getOutputSectionIndex() const; + void setOutputSectionIndex(uint32_t Index); + +protected: + uint32_t OutputSectionIndex = INVALID_INDEX; +}; + class DataSymbol : public Symbol { public: static bool classof(const Symbol *S) { @@ -297,6 +344,8 @@ alignas(DefinedFunction) char A[sizeof(DefinedFunction)]; alignas(DefinedData) char B[sizeof(DefinedData)]; alignas(DefinedGlobal) char C[sizeof(DefinedGlobal)]; + alignas(DefinedSection) char I[sizeof(DefinedSection)]; + alignas(CombinedSection) char J[sizeof(CombinedSection)]; alignas(LazySymbol) char D[sizeof(LazySymbol)]; alignas(UndefinedFunction) char E[sizeof(UndefinedFunction)]; alignas(UndefinedData) char F[sizeof(UndefinedData)]; Index: wasm/Symbols.cpp =================================================================== --- wasm/Symbols.cpp +++ wasm/Symbols.cpp @@ -36,6 +36,8 @@ return llvm::wasm::WASM_SYMBOL_TYPE_DATA; if (isa(this)) return llvm::wasm::WASM_SYMBOL_TYPE_GLOBAL; + if (isa(this)) + return llvm::wasm::WASM_SYMBOL_TYPE_SECTION; llvm_unreachable("invalid symbol kind"); } @@ -194,6 +196,19 @@ Global ? &Global->getType() : nullptr), Global(Global) {} +uint32_t CombinedSection::getOutputSectionIndex() const { + DEBUG(dbgs() << "getOutputSectionIndex: " << getName() << "\n"); + assert(OutputSectionIndex != INVALID_INDEX); + return OutputSectionIndex; +} + +void CombinedSection::setOutputSectionIndex(uint32_t Index) { + DEBUG(dbgs() << "setOutputSectionIndex: " << getName() << " -> " << Index + << "\n"); + assert(Index != INVALID_INDEX); + OutputSectionIndex = Index; +} + void LazySymbol::fetch() { cast(File)->addMember(&ArchiveSymbol); } std::string lld::toString(const wasm::Symbol &Sym) { @@ -211,6 +226,8 @@ return "DefinedData"; case wasm::Symbol::DefinedGlobalKind: return "DefinedGlobal"; + case wasm::Symbol::DefinedSectionKind: + return "DefinedSection"; case wasm::Symbol::UndefinedFunctionKind: return "UndefinedFunction"; case wasm::Symbol::UndefinedDataKind: @@ -219,6 +236,8 @@ return "UndefinedGlobal"; case wasm::Symbol::LazyKind: return "LazyKind"; + case wasm::Symbol::CombinedSectionKind: + return "CombinedSection"; } llvm_unreachable("invalid symbol kind"); } Index: wasm/Writer.cpp =================================================================== --- wasm/Writer.cpp +++ wasm/Writer.cpp @@ -67,6 +67,7 @@ void assignIndexes(); void calculateImports(); void calculateExports(); + void calculateCustomSections(); void assignSymtab(); void calculateTypes(); void createOutputSegments(); @@ -123,6 +124,8 @@ std::vector Segments; llvm::SmallDenseMap SegmentMap; + llvm::StringSet<> InternedNames; + llvm::StringMap CombinedSectionSymbols; }; } // anonymous namespace @@ -299,18 +302,35 @@ } } +void Writer::calculateCustomSections() { + log("calculateCustomSections"); + bool StripDebug = Config->StripDebug || Config->StripAll; + for (ObjFile *File : Symtab->ObjectFiles) { + for (InputSection *Section : File->CustomSections) { + StringRef Name = Section->getName(); + // These custom sections are known the linker and synthesized rather than + // blindly copied + if (Name == "linking" || Name == "name" || Name.startswith("reloc.")) + continue; + // .. or it is a debug section + if (StripDebug && Name.startswith(".debug_")) + continue; + CustomSectionMapping[Name].push_back(Section); + } + } +} + void Writer::createCustomSections() { log("createCustomSections"); - for (ObjFile *File : Symtab->ObjectFiles) - for (InputSection *Section : File->CustomSections) - CustomSectionMapping[Section->getName()].push_back(Section); - for (auto &Pair : CustomSectionMapping) { StringRef Name = Pair.first(); - // These custom sections are known the linker and synthesized rather than - // blindly copied - if (Name == "linking" || Name == "name" || Name.startswith("reloc.")) - continue; + + auto P = CombinedSectionSymbols.find(Name); + if (P != CombinedSectionSymbols.end()) { + uint32_t SectionIndex = OutputSections.size(); + P->second.setOutputSectionIndex(SectionIndex); + } + DEBUG(dbgs() << "createCustomSection: " << Name << "\n"); OutputSections.push_back(make(Name, Pair.second)); } @@ -375,6 +395,9 @@ Name = "reloc.DATA"; else if (OSec->Type == WASM_SEC_CODE) Name = "reloc.CODE"; + else if (OSec->Type == WASM_SEC_CUSTOM && + StringRef(OSec->Name).startswith(".debug")) + Name = InternedNames.insert("reloc." + OSec->Name).first->getKey(); else llvm_unreachable("relocations only supported for code and data"); @@ -452,8 +475,7 @@ writeUleb128(Sub.OS, G->getGlobalIndex(), "index"); if (Sym->isDefined()) writeStr(Sub.OS, Sym->getName(), "sym name"); - } else { - assert(isa(Sym)); + } else if (auto *D = dyn_cast(Sym)) { writeStr(Sub.OS, Sym->getName(), "sym name"); if (auto *DataSym = dyn_cast(Sym)) { writeUleb128(Sub.OS, DataSym->getOutputSegmentIndex(), "index"); @@ -461,7 +483,10 @@ "data offset"); writeUleb128(Sub.OS, DataSym->getSize(), "data size"); } - } + } else if (auto *S = dyn_cast(Sym)) { + writeUleb128(Sub.OS, S->getOutputSectionIndex(), "sym section index"); + } else + assert(false); } Sub.writeTo(OS); @@ -719,6 +744,8 @@ continue; if (!Sym->isLive()) continue; + if (isa(Sym)) + continue; DEBUG(dbgs() << "exporting sym: " << Sym->getName() << "\n"); @@ -732,12 +759,38 @@ if (!Config->Relocatable) return; + StringMap DebugSymbolIndicies; + unsigned SymbolIndex = SymtabEntries.size(); for (ObjFile *File : Symtab->ObjectFiles) { DEBUG(dbgs() << "Symtab entries: " << File->getName() << "\n"); for (Symbol *Sym : File->getSymbols()) { if (Sym->getFile() != File) continue; + + if (auto *S = dyn_cast(Sym)) { + StringRef Name = S->getName(); + if (!Name.startswith(".debug_") || + CustomSectionMapping.find(Name) == CustomSectionMapping.end()) + continue; + + auto DSI = DebugSymbolIndicies.find(Name); + if (DSI != DebugSymbolIndicies.end()) { + Sym->setOutputSymbolIndex(DSI->second); + continue; + } + + Sym->setOutputSymbolIndex(SymbolIndex); + DebugSymbolIndicies[Name] = SymbolIndex; + + DEBUG(dbgs() << "Symtab entries (debug): " << Name << "\n"); + auto P = CombinedSectionSymbols.try_emplace( + Name, Name, llvm::wasm::WASM_SYMBOL_BINDING_LOCAL, Name); + assert(P.second); + Sym = &P.first->second; + Sym->markLive(); + } + // (Since this is relocatable output, GC is not performed so symbols must // be live.) assert(Sym->isLive()); @@ -836,6 +889,8 @@ HandleRelocs(Chunk); for (InputChunk *Chunk : File->Segments) HandleRelocs(Chunk); + for (auto &P : File->CustomSections) + HandleRelocs(P); } uint32_t GlobalIndex = NumImportedGlobals + InputGlobals.size(); @@ -957,6 +1012,8 @@ layoutMemory(); log("-- calculateExports"); calculateExports(); + log("-- calculateCustomSections"); + calculateCustomSections(); log("-- assignSymtab"); assignSymtab();