Index: test/wasm/Inputs/comdat1.ll =================================================================== --- /dev/null +++ test/wasm/Inputs/comdat1.ll @@ -0,0 +1,11 @@ +$inlineFn = comdat any +@constantData = weak_odr constant [3 x i8] c"abc", comdat($inlineFn) +define linkonce_odr i32 @inlineFn() comdat { +entry: + ret i32 ptrtoint ([3 x i8]* @constantData to i32) +} + +define i32 @callInline1() { +entry: + ret i32 ptrtoint (i32 ()* @inlineFn to i32) +} Index: test/wasm/Inputs/comdat2.ll =================================================================== --- /dev/null +++ test/wasm/Inputs/comdat2.ll @@ -0,0 +1,11 @@ +$inlineFn = comdat any +@constantData = weak_odr constant [3 x i8] c"abc", comdat($inlineFn) +define linkonce_odr i32 @inlineFn() comdat { +entry: + ret i32 ptrtoint ([3 x i8]* @constantData to i32) +} + +define i32 @callInline2() { +entry: + ret i32 ptrtoint (i32 ()* @inlineFn to i32) +} Index: test/wasm/comdats.ll =================================================================== --- /dev/null +++ test/wasm/comdats.ll @@ -0,0 +1,65 @@ +; RUN: llc -filetype=obj -mtriple=wasm32-unknown-uknown-wasm %p/Inputs/comdat1.ll -o %t1.o +; RUN: llc -filetype=obj -mtriple=wasm32-unknown-uknown-wasm %p/Inputs/comdat2.ll -o %t2.o +; RUN: llc -filetype=obj -mtriple=wasm32-unknown-uknown-wasm %s -o %t.o +; RUN: lld -flavor wasm -o %t.wasm %t.o %t1.o %t2.o +; RUN: obj2yaml %t.wasm | FileCheck %s + +declare i32 @inlineFn() + +define void @_start() local_unnamed_addr { +entry: + %call = call i32 @inlineFn() + ret void +} + +; CHECK: - Type: GLOBAL +; CHECK-NEXT: Globals: +; CHECK-NEXT: - Type: I32 +; CHECK-NEXT: Mutable: true +; CHECK-NEXT: InitExpr: +; CHECK-NEXT: Opcode: I32_CONST +; CHECK-NEXT: Value: 66576 +; CHECK-NEXT: - Type: EXPORT +; CHECK-NEXT: Exports: +; CHECK-NEXT: - Name: memory +; CHECK-NEXT: Kind: MEMORY +; CHECK-NEXT: Index: 0 +; CHECK-NEXT: - Name: _start +; CHECK-NEXT: Kind: FUNCTION +; CHECK-NEXT: Index: 0 +; CHECK-NEXT: - Name: inlineFn +; CHECK-NEXT: Kind: FUNCTION +; CHECK-NEXT: Index: 1 +; CHECK-NEXT: - Name: callInline1 +; CHECK-NEXT: Kind: FUNCTION +; CHECK-NEXT: Index: 2 +; CHECK-NEXT: - Name: callInline2 +; CHECK-NEXT: Kind: FUNCTION +; (Should be "3" when function pruning implemented:) +; CHECK-NEXT: Index: 4 +; CHECK-NEXT: - Type: ELEM +; CHECK-NEXT: Segments: +; CHECK-NEXT: - Offset: +; CHECK-NEXT: Opcode: I32_CONST +; CHECK-NEXT: Value: 1 +; CHECK-NEXT: Functions: [ 1 ] +; CHECK-NEXT: - Type: CODE +; CHECK-NEXT: Functions: +; CHECK-NEXT: - Locals: +; CHECK-NEXT: Body: 1081808080001A0B +; CHECK-NEXT: - Locals: +; CHECK-NEXT: Body: 4180888080000B +; CHECK-NEXT: - Locals: +; CHECK-NEXT: Body: 4181808080000B +; CHECK-NEXT: - Locals: +; CHECK-NEXT: Body: 4180888080000B +; CHECK-NEXT: - Locals: +; CHECK-NEXT: Body: 4181808080000B +; CHECK-NEXT: - Type: DATA +; CHECK-NEXT: Segments: +; CHECK-NEXT: - SectionOffset: 7 +; CHECK-NEXT: MemoryIndex: 0 +; CHECK-NEXT: Offset: +; CHECK-NEXT: Opcode: I32_CONST +; CHECK-NEXT: Value: 1024 +; CHECK-NEXT: Content: '616263' Index: wasm/InputFiles.h =================================================================== --- wasm/InputFiles.h +++ wasm/InputFiles.h @@ -11,6 +11,7 @@ #define LLD_WASM_INPUT_FILES_H #include "lld/Common/LLVM.h" +#include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/Object/Archive.h" @@ -24,6 +25,7 @@ using llvm::object::Archive; using llvm::object::WasmObjectFile; using llvm::object::WasmSection; +using llvm::object::WasmSegment; using llvm::object::WasmSymbol; using llvm::wasm::WasmImport; @@ -109,12 +111,15 @@ const WasmSection *DataSection = nullptr; std::vector TypeMap; + llvm::DenseMap SegmentMap; std::vector Segments; ArrayRef getSymbols() { return Symbols; } ArrayRef getTableSymbols() { return TableSymbols; } + ArrayRef getFunctionSymbols() { return FunctionSymbols; } private: + InputSegment* createSegment(const WasmSegment &Seg); Symbol *createDefined(const WasmSymbol &Sym, const InputSegment *Segment = nullptr); Symbol *createUndefined(const WasmSymbol &Sym); Index: wasm/InputFiles.cpp =================================================================== --- wasm/InputFiles.cpp +++ wasm/InputFiles.cpp @@ -153,11 +153,14 @@ } } + for (const auto &C : WasmObj->comdats()) + Symtab->addComdat(C.first(), this); + FunctionSymbols.resize(FunctionImports + WasmObj->functions().size()); GlobalSymbols.resize(GlobalImports + WasmObj->globals().size()); - for (const WasmSegment &Seg : WasmObj->dataSegments()) - Segments.emplace_back(make(&Seg, this)); + for (const WasmSegment &WasmSeg : WasmObj->dataSegments()) + createSegment(WasmSeg); // Populate `FunctionSymbols` and `GlobalSymbols` based on the WasmSymbols // in the object @@ -214,6 +217,51 @@ DEBUG(dbgs() << "Globals : " << GlobalSymbols.size() << "\n"); } +static InputSegment* getCanonicalSegment(const WasmSegment &Seg, + ObjFile *CurrFile) { + StringRef Comdat = Seg.Data.Comdat; + StringRef Name = Seg.Data.Name; + if (Comdat.empty()) + return nullptr; + + ObjFile *ComdatFile = Symtab->findComdat(Comdat); + if (ComdatFile == CurrFile) + return nullptr; + + // When compiling inline functions, the *code* is allowed to differ in each + // file where the functions are provided - but it really doesn't make sense + // for the data to differ, so we check and enforce that here. + auto SegI = ComdatFile->SegmentMap.find(CachedHashStringRef(Name)); + if (SegI == ComdatFile->SegmentMap.end()) { + error("Segment " + Twine(Name) + " not found in file '" + + ComdatFile->getName() + "' providing COMDAT"); + return nullptr; + } + InputSegment *CanonicalSegment = SegI->second; + if (CanonicalSegment->Segment->Data.Content != Seg.Data.Content) { + error("Segments " + Twine(Name) + " have different contents in providing " + "files '" + ComdatFile->getName() + "' and '" + CurrFile->getName() + + "'"); + return nullptr; + } + return CanonicalSegment; +} + +InputSegment* ObjFile::createSegment(const WasmSegment &Seg) { + InputSegment *IS = make(&Seg, this); + // If the segment is defined in another object file, we can't simply put + // that previous definition in Segments, we link globals to their segment + // from the base address, and that will depend on the surrounding code in + // each file. We stash a pointer though so that the canonical definition of + // the segment is available later for relocations. + IS->CanonicalSegment = getCanonicalSegment(Seg, this); + + Segments.emplace_back(IS); + if (!Seg.Data.Name.empty()) + SegmentMap[CachedHashStringRef(Seg.Data.Name)] = IS; + return IS; +} + Symbol *ObjFile::createUndefined(const WasmSymbol &Sym) { return Symtab->addUndefined(this, &Sym); } Index: wasm/InputSegment.h =================================================================== --- wasm/InputSegment.h +++ wasm/InputSegment.h @@ -63,6 +63,13 @@ const ObjFile *File; std::vector Relocations; + // If two translation units define the same data segment, then + // CanonicalSegment will contain the previous definition. If this + // InputSegment is the definitive source of the segment, then nullptr is + // returned. This is used for relocating references to this segment to the + // chosen segment after duplicates are discarded. + const InputSegment *CanonicalSegment = nullptr; + protected: const OutputSegment *OutputSeg = nullptr; uint32_t OutputSegmentOffset = 0; Index: wasm/InputSegment.cpp =================================================================== --- wasm/InputSegment.cpp +++ wasm/InputSegment.cpp @@ -9,6 +9,7 @@ #include "InputSegment.h" #include "OutputSegment.h" +#include "SymbolTable.h" #include "lld/Common/LLVM.h" #define DEBUG_TYPE "lld" @@ -18,6 +19,7 @@ uint32_t InputSegment::translateVA(uint32_t Address) const { assert(Address >= startVA() && Address < endVA()); + assert(OutputSeg); int32_t Delta = OutputSeg->StartVA + OutputSegmentOffset - startVA(); DEBUG(dbgs() << "translateVA: " << getName() << " Delta=" << Delta << " Address=" << Address << "\n"); Index: wasm/OutputSegment.h =================================================================== --- wasm/OutputSegment.h +++ wasm/OutputSegment.h @@ -27,7 +27,6 @@ Alignment = std::max(Alignment, Segment->getAlignment()); InputSegments.push_back(Segment); Size = llvm::alignTo(Size, Segment->getAlignment()); - ; Segment->setOutputSegment(this, Size); Size += Segment->getSize(); } Index: wasm/SymbolTable.h =================================================================== --- wasm/SymbolTable.h +++ wasm/SymbolTable.h @@ -57,11 +57,15 @@ Symbol *addDefinedGlobal(StringRef Name); void addLazy(ArchiveFile *F, const Archive::Symbol *Sym); + bool addComdat(StringRef Name, ObjFile*); + ObjFile* findComdat(StringRef Name) const; + private: std::pair insert(StringRef Name); llvm::DenseMap SymMap; std::vector SymVector; + llvm::DenseMap ComdatMap; }; extern SymbolTable *Symtab; Index: wasm/SymbolTable.cpp =================================================================== --- wasm/SymbolTable.cpp +++ wasm/SymbolTable.cpp @@ -10,6 +10,7 @@ #include "SymbolTable.h" #include "Config.h" +#include "InputSegment.h" #include "WriterUtils.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" @@ -94,6 +95,16 @@ return &WasmObj->types()[FunctionType]; } +// Get the COMDAT for a given function symbol +static StringRef getFunctionComdat(const ObjFile &Obj, + const WasmSymbol &Sym) { + if (!Obj.isImportedFunction(Sym.ElementIndex)) { + uint32_t FunctionIndex = Sym.ElementIndex - Obj.NumFunctionImports(); + return Obj.getWasmObj()->functions()[FunctionIndex].Comdat; + } + return StringRef(); +} + // Check the type of new symbol matches that of the symbol is replacing. // For functions this can also involve verifying that the signatures match. static void checkSymbolTypes(const Symbol &Existing, const InputFile &F, @@ -154,10 +165,29 @@ bool WasInserted; Symbol::Kind Kind = Symbol::DefinedFunctionKind; const WasmSignature *NewSig = nullptr; - if (Sym->Type == WasmSymbol::SymbolType::GLOBAL_EXPORT) + StringRef Comdat; + if (Sym->Type == WasmSymbol::SymbolType::GLOBAL_EXPORT) { Kind = Symbol::DefinedGlobalKind; - else - NewSig = getFunctionSig(*cast(F), *Sym); + Comdat = Segment ? Segment->Segment->Data.Comdat : StringRef(); + } else { + ObjFile &OF = *cast(F); + NewSig = getFunctionSig(OF, *Sym); + Comdat = getFunctionComdat(OF, *Sym); + } + + if (!Comdat.empty() && ComdatMap[CachedHashStringRef(Comdat)] != F) { + DEBUG(dbgs() << "symbol " << Sym->Name << " of kind " << Kind + << " from COMDAT " << Comdat << " already defined\n"); + S = find(Sym->Name); + // Each object file that defines a COMDAT should define the same symbols in + // the COMDAT, since discarding COMDATs assumes that they are equivalent. + if (!S || !S->isDefined()) { + error("previous definition of COMDAT did not define symbol"); + return S; + } + checkSymbolTypes(*S, *F, *Sym, NewSig); + return S; + } std::tie(S, WasInserted) = insert(Sym->Name); if (WasInserted) { @@ -243,3 +273,19 @@ F->addMember(Sym); } } + +bool SymbolTable::addComdat(StringRef Name, ObjFile* F) { + DEBUG(dbgs() << "addComdat: " << Name << "\n"); + ObjFile *&File = ComdatMap[CachedHashStringRef(Name)]; + if (File) { + DEBUG(dbgs() << "COMDAT already defined\n"); + return false; + } + File = F; + return true; +} + +ObjFile* SymbolTable::findComdat(StringRef Name) const { + auto It = ComdatMap.find(CachedHashStringRef(Name)); + return It == ComdatMap.end() ? nullptr : It->second; +} Index: wasm/Writer.cpp =================================================================== --- wasm/Writer.cpp +++ wasm/Writer.cpp @@ -646,8 +646,14 @@ S = make(Name); Segments.push_back(S); } - S->addInputSegment(Segment); - DEBUG(dbgs() << "added data: " << Name << ": " << S->Size << "\n"); + if (Segment->CanonicalSegment) { + Segment->setOutputSegment( + S, Segment->CanonicalSegment->getOutputSegmentOffset()); + DEBUG(dbgs() << "pointing to existing data: " << Name << "\n"); + } else { + S->addInputSegment(Segment); + DEBUG(dbgs() << "added data: " << Name << ": " << S->Size << "\n"); + } for (const WasmRelocation &R : File->DataSection->Relocations) { if (R.Offset >= Segment->getInputSectionOffset() && R.Offset < Segment->getInputSectionOffset() + Segment->getSize()) {