Index: wasm/InputFiles.h =================================================================== --- wasm/InputFiles.h +++ wasm/InputFiles.h @@ -98,13 +98,14 @@ // Returns true if the given function index is an imported function, // as opposed to the locally defined function. bool isImportedFunction(uint32_t Index) const; + // Returns true if the given global index is an imported global. + bool isImportedGlobal(uint32_t Index) const; size_t NumFunctionImports() const { return FunctionImports; } size_t NumGlobalImports() const { return GlobalImports; } int32_t FunctionIndexOffset = 0; int32_t GlobalIndexOffset = 0; - int32_t TableIndexOffset = 0; const WasmSection *CodeSection = nullptr; std::vector CodeRelocations; int32_t CodeOffset = 0; @@ -114,6 +115,7 @@ std::vector Segments; const std::vector &getSymbols() { return Symbols; } + const std::vector &getFunctionSymbols() { return FunctionSymbols; } private: Symbol *createDefined(const WasmSymbol &Sym, @@ -123,12 +125,13 @@ InputSegment *getSegment(const WasmSymbol &WasmSym); const Symbol *getFunctionSymbol(uint32_t Index) const; const Symbol *getGlobalSymbol(uint32_t Index) const; + const Symbol *getTableSymbol(uint32_t Index) const; // List of all symbols referenced or defined by this file. std::vector Symbols; // List of all function symbols indexed by the function index space - std::vector FunctionSymbols; + std::vector FunctionSymbols; // List of all global symbols indexed by the global index space std::vector GlobalSymbols; Index: wasm/InputFiles.cpp =================================================================== --- wasm/InputFiles.cpp +++ wasm/InputFiles.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include #include "InputFiles.h" #include "Config.h" @@ -47,7 +48,6 @@ log("reloc info for: " + getName() + "\n" + " FunctionIndexOffset : " + Twine(FunctionIndexOffset) + "\n" + " NumFunctionImports : " + Twine(NumFunctionImports()) + "\n" + - " TableIndexOffset : " + Twine(TableIndexOffset) + "\n" + " GlobalIndexOffset : " + Twine(GlobalIndexOffset) + "\n" + " NumGlobalImports : " + Twine(NumGlobalImports()) + "\n"); } @@ -56,6 +56,10 @@ return Index < NumFunctionImports(); } +bool ObjFile::isImportedGlobal(uint32_t Index) const { + return Index < NumGlobalImports(); +} + const Symbol *ObjFile::getFunctionSymbol(uint32_t Index) const { return FunctionSymbols[Index]; } @@ -64,6 +68,20 @@ return GlobalSymbols[Index]; } +const Symbol *ObjFile::getTableSymbol(uint32_t Index) const { + for (const WasmElemSegment &ElemSegment : WasmObj->elements()) { + assert(ElemSegment.TableIndex == 0); + if (ElemSegment.Offset.Opcode != WASM_OPCODE_I32_CONST) + fatal("unsupported table init opcode: " + + Twine(ElemSegment.Offset.Opcode)); + uint32_t Offset = (uint32_t)ElemSegment.Offset.Value.Int32; + if (Index < Offset || Index >= Offset + ElemSegment.Functions.size()) + continue; + return getFunctionSymbol(ElemSegment.Functions[Index - Offset]); + } + fatal("Table index not defined"); +} + uint32_t ObjFile::getRelocatedAddress(uint32_t Index) const { return getGlobalSymbol(Index)->getVirtualAddress(); } @@ -86,7 +104,7 @@ } uint32_t ObjFile::relocateTableIndex(uint32_t Original) const { - return Original + TableIndexOffset; + return getTableSymbol(Original)->getTableIndex(); } uint32_t ObjFile::relocateGlobalIndex(uint32_t Original) const { @@ -158,6 +176,9 @@ } } + for (const auto &C : WasmObj->comdats()) + Symtab->addComdat(C.first(), this); + FunctionSymbols.resize(FunctionImports + WasmObj->functions().size()); GlobalSymbols.resize(GlobalImports + WasmObj->globals().size()); Index: wasm/InputSegment.h =================================================================== --- wasm/InputSegment.h +++ wasm/InputSegment.h @@ -48,6 +48,14 @@ uint32_t getInputSectionOffset() const { return Segment->SectionOffset; } + // If two translation units define the same data segment, then + // getProvidingSegment() will return the previous definition. If this + // InputSegment is the definitive source of the segment, then nullptr is + // returned. For example, for a templated class with a vtable, multiple + // translation units may provide the vtable, and the first one will be + // returned here. + InputSegment *getProvidingSegment() const; + void setOutputSegment(const OutputSegment *Segment, uint32_t Offset) { OutputSeg = Segment; OutputSegmentOffset = Offset; Index: wasm/InputSegment.cpp =================================================================== --- wasm/InputSegment.cpp +++ wasm/InputSegment.cpp @@ -9,6 +9,7 @@ #include "InputSegment.h" #include "OutputSegment.h" +#include "SymbolTable.h" #include "lld/Common/LLVM.h" #define DEBUG_TYPE "lld" @@ -16,8 +17,38 @@ using namespace llvm; using namespace lld::wasm; +static InputSegment *getNamedSegment(const std::vector& Segs, + StringRef Name) { + for (InputSegment *S : Segs) { + if (S->getName() == Name) + return S; + } + return nullptr; +} + +InputSegment *InputSegment::getProvidingSegment() const { + StringRef Comdat = Segment->Data.Comdat; + if (Comdat.empty()) + return nullptr; + + ObjFile *ProvidingFile = Symtab->findComdat(Comdat); + if (ProvidingFile == File) + return nullptr; + + // When compiling inline functions, the *code* is allowed to differ in each + // file where the functions are provided - but it really doesn't make sense + // for the data to differ, so we check and enforce that here. + InputSegment *OtherSegment = getNamedSegment(ProvidingFile->Segments, getName()); + if (!OtherSegment) + fatal("Segment " + Twine(getName()) + " not found in file providing COMDAT"); + if (OtherSegment->Segment->Data.Content != Segment->Data.Content) + fatal("Segments have different contents in providing files"); + return OtherSegment; +} + uint32_t InputSegment::translateVA(uint32_t Address) const { assert(Address >= startVA() && Address < endVA()); + assert(OutputSeg); int32_t Delta = OutputSeg->StartVA + OutputSegmentOffset - startVA(); DEBUG(dbgs() << "translateVA: " << getName() << " Delta=" << Delta << " Address=" << Address << "\n"); Index: wasm/OutputSegment.h =================================================================== --- wasm/OutputSegment.h +++ wasm/OutputSegment.h @@ -27,7 +27,6 @@ Alignment = std::max(Alignment, Segment->getAlignment()); InputSegments.push_back(Segment); Size = llvm::alignTo(Size, Segment->getAlignment()); - ; Segment->setOutputSegment(this, Size); Size += Segment->getSize(); } Index: wasm/SymbolTable.h =================================================================== --- wasm/SymbolTable.h +++ wasm/SymbolTable.h @@ -56,10 +56,14 @@ Symbol *addDefinedGlobal(StringRef Name); void addLazy(ArchiveFile *F, const Archive::Symbol *Sym); + bool addComdat(StringRef Name, ObjFile*); + ObjFile* findComdat(StringRef Name) const; + private: std::pair insert(StringRef Name); llvm::DenseMap SymMap; + llvm::DenseMap ComdatMap; }; extern SymbolTable *Symtab; Index: wasm/SymbolTable.cpp =================================================================== --- wasm/SymbolTable.cpp +++ wasm/SymbolTable.cpp @@ -95,6 +95,25 @@ return &WasmObj->types()[FunctionType]; } +// Get the COMDAT for a given function symbol +static StringRef getFunctionComdat(const ObjFile &Obj, + const WasmSymbol &Sym) { + if (!Obj.isImportedFunction(Sym.ElementIndex)) { + uint32_t FunctionIndex = Sym.ElementIndex - Obj.NumFunctionImports(); + return Obj.getWasmObj()->functions()[FunctionIndex].Comdat; + } + return StringRef(); +} + +// Get the COMDAT for a given global symbol +static StringRef getGlobalComdat(const ObjFile &Obj, const WasmSymbol &Sym) { + if (!Obj.isImportedGlobal(Sym.ElementIndex)) { + uint32_t GlobalIndex = Sym.ElementIndex - Obj.NumGlobalImports(); + return Obj.getWasmObj()->globals()[GlobalIndex].Comdat; + } + return StringRef(); +} + // Check the type of new symbol matches that of the symbol is replacing. // For functions this can also involve verifying that the signatures match. static void checkSymbolTypes(const Symbol &Existing, const InputFile &F, @@ -154,10 +173,30 @@ bool WasInserted; Symbol::Kind Kind = Symbol::DefinedFunctionKind; const WasmSignature *NewSig = nullptr; - if (Sym->Type == WasmSymbol::SymbolType::GLOBAL_EXPORT) + StringRef Comdat; + if (Sym->Type == WasmSymbol::SymbolType::GLOBAL_EXPORT) { Kind = Symbol::DefinedGlobalKind; - else - NewSig = getFunctionSig(*cast(F), *Sym); + Comdat = F ? getGlobalComdat(*cast(F), *Sym) : StringRef(); + } else { + ObjFile &OF = *cast(F); + NewSig = getFunctionSig(OF, *Sym); + Comdat = getFunctionComdat(OF, *Sym); + } + + if (!Comdat.empty() && ComdatMap[CachedHashStringRef(Comdat)] != F) { + DEBUG(dbgs() << "symbol " << Sym->Name << " of kind " << Kind + << " from COMDAT " << Comdat << " already defined\n"); + S = find(Sym->Name); + // Each object file that defines a COMDAT should define the same symbols in + // the COMDAT? TODO ..or maybe not, if compiling the same header file with + // different levels of optimisation in different translation units. + if (!S || !S->isDefined()) { + error("previous definition of COMDAT did not define symbol"); + return S; + } + checkSymbolTypes(*S, *F, *Sym, NewSig); + return S; + } std::tie(S, WasInserted) = insert(Sym->Name); if (WasInserted) { @@ -243,3 +282,19 @@ F->addMember(Sym); } } + +bool SymbolTable::addComdat(StringRef Name, ObjFile* F) { + DEBUG(dbgs() << "addComdat: " << Name << "\n"); + ObjFile *&File = ComdatMap[CachedHashStringRef(Name)]; + if (File) { + DEBUG(dbgs() << "COMDAT already defined\n"); + return false; + } + File = F; + return true; +} + +ObjFile* SymbolTable::findComdat(StringRef Name) const { + auto It = ComdatMap.find(CachedHashStringRef(Name)); + return It == ComdatMap.end() ? nullptr : It->second; +} Index: wasm/Symbols.h =================================================================== --- wasm/Symbols.h +++ wasm/Symbols.h @@ -72,18 +72,25 @@ bool hasFunctionType() const { return FunctionType; } const WasmSignature &getFunctionType() const; uint32_t getOutputIndex() const; + uint32_t getTableIndex() const; // Returns the virtual address of a defined global. // Only works for globals, not functions. uint32_t getVirtualAddress() const; // Returns true if an output index has been set for this symbol - bool hasOutputIndex() { return OutputIndex.hasValue(); } + bool hasOutputIndex() const { return OutputIndex.hasValue(); } // Set the output index of the symbol (in the function or global index // space of the output object. void setOutputIndex(uint32_t Index); + // Returns true if a table index has been set for this symbol + bool hasTableIndex() const { return TableIndex.hasValue(); } + + // Set the table index of the symbol + void setTableIndex(uint32_t Index); + // Set the virtual address for a "synthetic" global variable - not used // normally for symbols defined in the usual way. void setVirtualAddress(uint32_t Addr); @@ -111,6 +118,7 @@ const InputSegment *Segment = nullptr; const WasmSignature *FunctionType; llvm::Optional OutputIndex; + llvm::Optional TableIndex; llvm::Optional VirtualAddress; }; Index: wasm/Symbols.cpp =================================================================== --- wasm/Symbols.cpp +++ wasm/Symbols.cpp @@ -59,12 +59,23 @@ return OutputIndex.getValue(); } +uint32_t Symbol::getTableIndex() const { + assert(hasTableIndex()); + return TableIndex.getValue(); +} + void Symbol::setOutputIndex(uint32_t Index) { DEBUG(dbgs() << "setOutputIndex " << Name << " -> " << Index << "\n"); assert(!hasOutputIndex()); OutputIndex = Index; } +void Symbol::setTableIndex(uint32_t Index) { + DEBUG(dbgs() << "setTableIndex " << Name << " -> " << Index << "\n"); + assert(!hasTableIndex()); + TableIndex = Index; +} + void Symbol::setVirtualAddress(uint32_t Addr) { DEBUG(dbgs() << "setVirtualAddress " << Name << " -> " << Addr << "\n"); assert(!VirtualAddress.hasValue()); Index: wasm/Writer.cpp =================================================================== --- wasm/Writer.cpp +++ wasm/Writer.cpp @@ -69,6 +69,7 @@ uint32_t getTypeIndex(const WasmSignature &Sig); void assignSymbolIndexes(); + void assignTableIndexes(); void calculateImports(); void calculateOffsets(); void calculateTypes(); @@ -107,7 +108,6 @@ uint32_t NumGlobals = 0; uint32_t NumMemoryPages = 0; uint32_t NumTableElems = 0; - uint32_t NumElements = 0; uint32_t InitialTableOffset = 0; std::vector Types; @@ -197,6 +197,9 @@ SyntheticSection *Section = createSyntheticSection(WASM_SEC_FUNCTION); raw_ostream &OS = Section->getStream(); + // XXX TODO - needs to skip functions excluded by COMDAT or by virtue of + // being weak and overridden. That is, for each function, need to get its + // Symbol and check that Symbol->File == CurrentObjectFile. writeUleb128(OS, NumFunctions, "function count"); for (ObjFile *File : Symtab->ObjectFiles) { for (uint32_t Sig : File->getWasmObj()->functionTypes()) { @@ -279,6 +282,9 @@ if (ExportOther) { for (ObjFile *File : Symtab->ObjectFiles) { for (Symbol *Sym : File->getSymbols()) { + // We don't need to consider COMDATs or weak symbols here - if two + // ObjFiles define the same Symbol, the WrittenToSymtab check will + // ensure that the symbol isn't written out more than once. if (!Sym->isFunction() || Sym->isLocal() || Sym->isUndefined() || (Sym->isHidden() && !ExportHidden) || Sym->WrittenToSymtab) continue; @@ -350,7 +356,7 @@ } void Writer::createElemSection() { - if (!NumElements) + if (!NumTableElems) return; SyntheticSection *Section = createSyntheticSection(WASM_SEC_ELEM); @@ -362,13 +368,26 @@ InitExpr.Opcode = WASM_OPCODE_I32_CONST; InitExpr.Value.Int32 = InitialTableOffset; writeInitExpr(OS, InitExpr); - writeUleb128(OS, NumElements, "elem count"); + writeUleb128(OS, NumTableElems - InitialTableOffset, "elem count"); - for (ObjFile *File : Symtab->ObjectFiles) - for (const WasmElemSegment &Segment : File->getWasmObj()->elements()) - for (uint64_t FunctionIndex : Segment.Functions) - writeUleb128(OS, File->relocateFunctionIndex(FunctionIndex), - "function index"); + uint32_t ElemIndex = InitialTableOffset; + for (ObjFile *File : Symtab->ObjectFiles) { + const WasmObjectFile *WasmFile = File->getWasmObj(); + + for (const WasmElemSegment &Segment : WasmFile->elements()) { + for (uint64_t FunctionIndex : Segment.Functions) { + Symbol *S = File->getFunctionSymbols()[FunctionIndex]; + uint32_t CurrIndex = S->getTableIndex(); + assert(CurrIndex <= ElemIndex); + if (CurrIndex == ElemIndex) { + writeUleb128(OS, File->relocateFunctionIndex(FunctionIndex), + "function index"); + ++ElemIndex; + } + } + } + } + assert(ElemIndex == NumTableElems); } void Writer::createCodeSection() { @@ -377,6 +396,9 @@ log("createCodeSection"); + // XXX TODO - needs to skip functions excluded by COMDAT or by virtue of + // being weak and overridden. That is, for each function, need to get its + // Symbol and check that Symbol->File == CurrentObjectFile. auto Section = make(NumFunctions, Symtab->ObjectFiles); OutputSections.push_back(Section); } @@ -598,7 +620,6 @@ void Writer::calculateOffsets() { NumGlobals = Config->SyntheticGlobals.size(); - NumTableElems = InitialTableOffset; for (ObjFile *File : Symtab->ObjectFiles) { const WasmObjectFile *WasmFile = File->getWasmObj(); @@ -621,29 +642,6 @@ fatal(File->getName() + ": contains more than one memory"); } } - - // Table - uint32_t TableCount = WasmFile->tables().size(); - if (TableCount) { - if (TableCount > 1) - fatal(File->getName() + ": contains more than one table"); - File->TableIndexOffset = NumTableElems; - NumTableElems += WasmFile->tables()[0].Limits.Initial; - } - - // Elem - uint32_t SegmentCount = WasmFile->elements().size(); - if (SegmentCount) { - if (SegmentCount > 1) - fatal(File->getName() + ": contains more than element segment"); - - const WasmElemSegment &Segment = WasmFile->elements()[0]; - if (Segment.TableIndex != 0) - fatal(File->getName() + ": unsupported table index"); - if (Segment.Offset.Value.Int32 != 0) - fatal(File->getName() + ": unsupported segment offset"); - NumElements += Segment.Functions.size(); - } } } @@ -698,6 +696,32 @@ } } +void Writer::assignTableIndexes() { + NumTableElems = InitialTableOffset; + + for (ObjFile *File : Symtab->ObjectFiles) { + DEBUG(dbgs() << "assignTableIndexes: " << File->getName() << "\n"); + const WasmObjectFile *WasmFile = File->getWasmObj(); + + // Taking the address of an inline function in two different translation + // units *must* return the same value, so that function pointers passed + // between translation units are equality-comparable. + size_t TableCount = WasmFile->tables().size(); + if (TableCount > 1) + fatal(File->getName() + ": contains more than one table"); + for (const WasmElemSegment &Segment : WasmFile->elements()) { + assert(Segment.TableIndex == 0); // Already checked by WasmObjectFile + for (uint64_t FunctionIndex : Segment.Functions) { + Symbol *S = File->getFunctionSymbols()[FunctionIndex]; + if (!S->hasTableIndex()) { + S->setTableIndex(NumTableElems); + ++NumTableElems; + } + } + } + } +} + static StringRef getOutputDataSegmentName(StringRef Name) { if (Config->Relocatable) return Name; @@ -724,8 +748,13 @@ S = make(Name); Segments.push_back(S); } - S->addInputSegment(Segment); - DEBUG(dbgs() << "added data: " << Name << ": " << S->Size << "\n"); + if (InputSegment *ProvidingSegment = Segment->getProvidingSegment()) { + Segment->setOutputSegment(S, ProvidingSegment->getOutputSegmentOffset()); + DEBUG(dbgs() << "pointing to existing data: " << Name << "\n"); + } else { + S->addInputSegment(Segment); + DEBUG(dbgs() << "added data: " << Name << ": " << S->Size << "\n"); + } for (const WasmRelocation &R : File->DataSection->Relocations) { if (R.Offset >= Segment->getInputSectionOffset() && R.Offset < Segment->getInputSectionOffset() + Segment->getSize()) { @@ -760,6 +789,8 @@ log("-- assignSymbolIndexes"); assignSymbolIndexes(); + log("-- assignTableIndexes"); + assignTableIndexes(); log("-- layoutMemory"); layoutMemory();