Index: lld/trunk/docs/WebAssembly.rst =================================================================== --- lld/trunk/docs/WebAssembly.rst +++ lld/trunk/docs/WebAssembly.rst @@ -95,6 +95,26 @@ For more specific details on how this is achieved see the tool conventions on linking_. +Function Signatrues +~~~~~~~~~~~~~~~~~~~ + +One way in which the WebAssembly linker differs from traditional native linkers +is that function signature checking is strict in WebAssembly. It is a +validation error for a module to contain to call site that doesn't agree with +the target signature. Even though this is undefined behavior in C/C++ its not +uncommon to find this in real world C/C++ programs. For example, a call site in +one complication unit which calls a function defined in another complication +unit but with too many arguments. + +In order not to generate such invalid modules lld has two modes of handling such +mismatches: it can simply error out or it can create stub functions that will +trap at runtime (functions that contain only an ``unreachable`` instruction) +and use these stub functions at the otherwise invalid call sites. + +The the default befviour is to generate these stub function and to produce +a warning. The ``--falal-warnings`` flag can be used to disable this behaviour +and error out if mismatched are found. + Imports and Exports ~~~~~~~~~~~~~~~~~~~ Index: lld/trunk/test/wasm/Inputs/call-ret32.ll =================================================================== --- lld/trunk/test/wasm/Inputs/call-ret32.ll +++ lld/trunk/test/wasm/Inputs/call-ret32.ll @@ -0,0 +1,11 @@ +target triple = "wasm32-unknown-unknown" + +@ret32_address = global i32 (float)* @ret32, align 4 + +define hidden i32* @call_ret32() { +entry: + %call1 = call i32 @ret32(float 0.000000e+00) + ret i32* bitcast (i32 (float)** @ret32_address to i32*) +} + +declare i32 @ret32(float) Index: lld/trunk/test/wasm/lto/signature-mismatch.ll =================================================================== --- lld/trunk/test/wasm/lto/signature-mismatch.ll +++ lld/trunk/test/wasm/lto/signature-mismatch.ll @@ -15,5 +15,6 @@ ret void } +; CHECK: error: function signature mismatch: f ; CHECK: >>> defined as (i32) -> void in {{.*}}signature-mismatch.ll.tmp1.o ; CHECK: >>> defined as () -> void in lto.tmp Index: lld/trunk/test/wasm/signature-mismatch-export.ll =================================================================== --- lld/trunk/test/wasm/signature-mismatch-export.ll +++ lld/trunk/test/wasm/signature-mismatch-export.ll @@ -0,0 +1,32 @@ +; RUN: llc -filetype=obj %p/Inputs/ret32.ll -o %t.ret32.o +; RUN: llc -filetype=obj %s -o %t.main.o +; RUN: wasm-ld --export=ret32 -o %t.wasm %t.main.o %t.ret32.o +; RUN: obj2yaml %t.wasm | FileCheck %s + +target triple = "wasm32-unknown-unknown" + +declare i32 @ret32(i32) + +define void @_start() { +entry: + %call1 = call i32 @ret32(i32 0) + ret void +} + +; CHECK: - Type: EXPORT +; CHECK: - Name: ret32 +; CHECK-NEXT: Kind: FUNCTION +; CHECK-NEXT: Index: 3 + +; CHECK: - Type: CUSTOM +; CHECK-NEXT: Name: name +; CHECK-NEXT: FunctionNames: +; CHECK-NEXT: - Index: 0 +; CHECK-NEXT: Name: __wasm_call_ctors +; CHECK-NEXT: - Index: 1 +; CHECK-NEXT: Name: 'unreachable:ret32' +; CHECK-NEXT: - Index: 2 +; CHECK-NEXT: Name: _start +; CHECK-NEXT: - Index: 3 +; CHECK-NEXT: Name: ret32 +; CHECK-NEXT: ... Index: lld/trunk/test/wasm/signature-mismatch.ll =================================================================== --- lld/trunk/test/wasm/signature-mismatch.ll +++ lld/trunk/test/wasm/signature-mismatch.ll @@ -1,26 +1,53 @@ ; RUN: llc -filetype=obj %p/Inputs/ret32.ll -o %t.ret32.o +; RUN: llc -filetype=obj %p/Inputs/call-ret32.ll -o %t.call.o ; RUN: llc -filetype=obj %s -o %t.main.o -; RUN: not wasm-ld --fatal-warnings -o %t.wasm %t.main.o %t.ret32.o 2>&1 | FileCheck %s -; Run the test again by with the object files in the other order to verify -; the check works when the undefined symbol is resolved by an existing defined -; one. -; RUN: not wasm-ld --fatal-warnings -o %t.wasm %t.ret32.o %t.main.o 2>&1 | FileCheck %s -check-prefix=REVERSE +; RUN: wasm-ld --export=call_ret32 --export=ret32 -o %t.wasm %t.main.o %t.ret32.o %t.call.o 2>&1 | FileCheck %s -check-prefix=WARN +; RUN: obj2yaml %t.wasm | FileCheck %s -check-prefix=YAML +; RUN: not wasm-ld --fatal-warnings -o %t.wasm %t.main.o %t.ret32.o %t.call.o 2>&1 | FileCheck %s -check-prefix=ERROR target triple = "wasm32-unknown-unknown" +@ret32_address_main = global i32 (i32, i64, i32)* @ret32, align 4 + ; Function Attrs: nounwind -define hidden void @_start() local_unnamed_addr #0 { +define hidden void @_start() local_unnamed_addr { entry: - %call = tail call i32 @ret32(i32 1, i64 2, i32 3) #2 + %call1 = call i32 @ret32(i32 1, i64 2, i32 3) + %addr = load i32 (i32, i64, i32)*, i32 (i32, i64, i32)** @ret32_address_main, align 4 + %call2 = call i32 %addr(i32 1, i64 2, i32 3) ret void } -declare i32 @ret32(i32, i64, i32) local_unnamed_addr #1 +declare i32 @ret32(i32, i64, i32) local_unnamed_addr + +; WARN: warning: function signature mismatch: ret32 +; WARN-NEXT: >>> defined as (i32, i64, i32) -> i32 in {{.*}}.main.o +; WARN-NEXT: >>> defined as (f32) -> i32 in {{.*}}.ret32.o + +; ERROR: error: function signature mismatch: ret32 +; ERROR-NEXT: >>> defined as (i32, i64, i32) -> i32 in {{.*}}.main.o +; ERROR-NEXT: >>> defined as (f32) -> i32 in {{.*}}.ret32.o + +; YAML: - Type: EXPORT +; YAML: - Name: ret32 +; YAML-NEXT: Kind: FUNCTION +; YAML-NEXT: Index: 3 +; YAML-NEXT: - Name: call_ret32 +; YAML-NEXT: Kind: FUNCTION +; YAML-NEXT: Index: 4 + +; YAML: - Type: CUSTOM +; YAML-NEXT: Name: name +; YAML-NEXT: FunctionNames: +; YAML-NEXT: - Index: 0 +; YAML-NEXT: Name: __wasm_call_ctors +; YAML-NEXT: - Index: 1 +; YAML-NEXT: Name: 'unreachable:ret32' +; YAML-NEXT: - Index: 2 +; YAML-NEXT: Name: _start +; YAML-NEXT: - Index: 3 +; YAML-NEXT: Name: ret32 +; YAML-NEXT: - Index: 4 +; YAML-NEXT: Name: call_ret32 +; YAML-NEXT: ... -; CHECK: error: function signature mismatch: ret32 -; CHECK-NEXT: >>> defined as (i32, i64, i32) -> i32 in {{.*}}.main.o -; CHECK-NEXT: >>> defined as (f32) -> i32 in {{.*}}.ret32.o - -; REVERSE: error: function signature mismatch: ret32 -; REVERSE-NEXT: >>> defined as (f32) -> i32 in {{.*}}.ret32.o -; REVERSE-NEXT: >>> defined as (i32, i64, i32) -> i32 in {{.*}}.main.o Index: lld/trunk/wasm/Driver.cpp =================================================================== --- lld/trunk/wasm/Driver.cpp +++ lld/trunk/wasm/Driver.cpp @@ -534,17 +534,6 @@ for (auto *Arg : Args.filtered(OPT_undefined)) handleUndefined(Arg->getValue()); - // Handle the `--export ` options - // This works like --undefined but also exports the symbol if its found - for (auto *Arg : Args.filtered(OPT_export)) { - Symbol *Sym = handleUndefined(Arg->getValue()); - if (Sym && Sym->isDefined()) - Sym->ForceExport = true; - else if (!Config->AllowUndefined) - error(Twine("symbol exported via --export not found: ") + - Arg->getValue()); - } - Symbol *EntrySym = nullptr; if (!Config->Relocatable) { if (!Config->Shared && !Config->Entry.empty()) { @@ -555,26 +544,47 @@ error("entry symbol not defined (pass --no-entry to supress): " + Config->Entry); } - - // Make sure we have resolved all symbols. - if (!Config->AllowUndefined) - Symtab->reportRemainingUndefines(); } if (errorCount()) return; + // Handle the `--export ` options + // This works like --undefined but also exports the symbol if its found + for (auto *Arg : Args.filtered(OPT_export)) + handleUndefined(Arg->getValue()); + // Do link-time optimization if given files are LLVM bitcode files. // This compiles bitcode files into real object files. Symtab->addCombinedLTOObject(); if (errorCount()) return; - // Add synthetic dummies for weak undefined functions. Must happen - // after LTO otherwise functions may not yet have signatures. - if (!Config->Relocatable) + // Resolve any variant symbols that were created due to signature + // mismatchs. + Symtab->handleSymbolVariants(); + if (errorCount()) + return; + + for (auto *Arg : Args.filtered(OPT_export)) { + Symbol *Sym = Symtab->find(Arg->getValue()); + if (Sym && Sym->isDefined()) + Sym->ForceExport = true; + else if (!Config->AllowUndefined) + error(Twine("symbol exported via --export not found: ") + + Arg->getValue()); + } + + if (!Config->Relocatable) { + // Add synthetic dummies for weak undefined functions. Must happen + // after LTO otherwise functions may not yet have signatures. Symtab->handleWeakUndefines(); + // Make sure we have resolved all symbols. + if (!Config->AllowUndefined) + Symtab->reportRemainingUndefines(); + } + if (EntrySym) EntrySym->setHidden(false); Index: lld/trunk/wasm/SymbolTable.h =================================================================== --- lld/trunk/wasm/SymbolTable.h +++ lld/trunk/wasm/SymbolTable.h @@ -49,6 +49,8 @@ Symbol *find(StringRef Name); + void replace(StringRef Name, Symbol* Sym); + void trace(StringRef Name); Symbol *addDefinedFunction(StringRef Name, uint32_t Flags, InputFile *File, @@ -79,12 +81,15 @@ DefinedFunction *addSyntheticFunction(StringRef Name, uint32_t Flags, InputFunction *Function); + void handleSymbolVariants(); void handleWeakUndefines(); private: std::pair insert(StringRef Name, const InputFile *File); std::pair insertName(StringRef Name); + bool getFunctionVariant(Symbol* Sym, const WasmSignature *Sig, + const InputFile *File, Symbol **Out); InputFunction *replaceWithUnreachable(Symbol *Sym, const WasmSignature &Sig, StringRef DebugName); @@ -94,6 +99,10 @@ llvm::DenseMap SymMap; std::vector SymVector; + // For certain symbols types, e.g. function symbols, we allow for muliple + // variants of the same symbol with different signatures. + llvm::DenseMap> SymVariants; + llvm::DenseSet Comdats; // For LTO. Index: lld/trunk/wasm/SymbolTable.cpp =================================================================== --- lld/trunk/wasm/SymbolTable.cpp +++ lld/trunk/wasm/SymbolTable.cpp @@ -26,6 +26,38 @@ SymbolTable *lld::wasm::Symtab; +static char encodeValType(ValType Type) { + switch (Type) { + case ValType::I32: + return 'i'; + case ValType::I64: + return 'j'; + case ValType::F32: + return 'f'; + case ValType::F64: + return 'd'; + case ValType::V128: + return 'V'; + case ValType::EXCEPT_REF: + return 'e'; + } + llvm_unreachable("invalid wasm type"); +} + +static std::string encodeSignature(const WasmSignature &Sig) { + std::string S = ":"; + for (ValType Type : Sig.Returns) + S += encodeValType(Type); + S += ':'; + for (ValType Type : Sig.Params) + S += encodeValType(Type); + return S; +} + +static StringRef getVariantName(StringRef BaseName, const WasmSignature &Sig) { + return Saver.save(BaseName + encodeSignature(Sig)); +} + void SymbolTable::addFile(InputFile *File) { log("Processing: " + toString(File)); if (Config->Trace) @@ -81,6 +113,11 @@ return SymVector[It->second]; } +void SymbolTable::replace(StringRef Name, Symbol* Sym) { + auto It = SymMap.find(CachedHashStringRef(Name)); + SymVector[It->second] = Sym; +} + std::pair SymbolTable::insertName(StringRef Name) { bool Trace = false; auto P = SymMap.insert({CachedHashStringRef(Name), (int)SymVector.size()}); @@ -123,29 +160,19 @@ } // Check the type of new symbol matches that of the symbol is replacing. -// For functions this can also involve verifying that the signatures match. -static void checkFunctionType(Symbol *Existing, const InputFile *File, - const WasmSignature *NewSig) { - auto ExistingFunction = dyn_cast(Existing); - if (!ExistingFunction) { - reportTypeError(Existing, File, WASM_SYMBOL_TYPE_FUNCTION); - return; - } - +// Returns true if the function types match, false is there is a singature +// mismatch. +bool signatureMatches(FunctionSymbol *Existing, const WasmSignature *NewSig) { if (!NewSig) - return; + return true; - const WasmSignature *OldSig = ExistingFunction->Signature; + const WasmSignature *OldSig = Existing->Signature; if (!OldSig) { - ExistingFunction->Signature = NewSig; - return; + Existing->Signature = NewSig; + return true; } - if (*NewSig != *OldSig) - warn("function signature mismatch: " + Existing->getName() + - "\n>>> defined as " + toString(*OldSig) + " in " + - toString(Existing->getFile()) + "\n>>> defined as " + - toString(*NewSig) + " in " + toString(File)); + return *NewSig == *OldSig; } static void checkGlobalType(const Symbol *Existing, const InputFile *File, @@ -255,26 +282,45 @@ bool WasInserted; std::tie(S, WasInserted) = insert(Name, File); + auto Replace = [&](Symbol* Sym) { + // If the new defined function doesn't have signture (i.e. bitcode + // functions) but the old symbol does, then preserve the old signature + const WasmSignature *OldSig = S->getSignature(); + auto* NewSym = replaceSymbol(Sym, Name, Flags, File, Function); + if (!NewSym->Signature) + NewSym->Signature = OldSig; + }; + if (WasInserted || S->isLazy()) { - replaceSymbol(S, Name, Flags, File, Function); + Replace(S); return S; } - if (Function) - checkFunctionType(S, File, &Function->Signature); + auto ExistingFunction = dyn_cast(S); + if (!ExistingFunction) { + reportTypeError(S, File, WASM_SYMBOL_TYPE_FUNCTION); + return S; + } - if (shouldReplace(S, File, Flags)) { - // If the new defined function doesn't have signture (i.e. bitcode - // functions) but the old symbol does then preserve the old signature - const WasmSignature *OldSig = nullptr; - if (auto* F = dyn_cast(S)) - OldSig = F->Signature; - if (auto *L = dyn_cast(S)) - OldSig = L->Signature; - auto NewSym = replaceSymbol(S, Name, Flags, File, Function); - if (!NewSym->Signature) - NewSym->Signature = OldSig; + if (Function && !signatureMatches(ExistingFunction, &Function->Signature)) { + Symbol* Variant; + if (getFunctionVariant(S, &Function->Signature, File, &Variant)) + // New variant, always replace + Replace(Variant); + else if (shouldReplace(S, File, Flags)) + // Variant already exists, replace it after checking shouldReplace + Replace(Variant); + + // This variant we found take the place in the symbol table as the primary + // variant. + replace(Name, Variant); + return Variant; } + + // Existing function with matching signature. + if (shouldReplace(S, File, Flags)) + Replace(S); + return S; } @@ -287,15 +333,19 @@ bool WasInserted; std::tie(S, WasInserted) = insert(Name, File); - if (WasInserted || S->isLazy()) { + auto Replace = [&]() { replaceSymbol(S, Name, Flags, File, Segment, Address, Size); + }; + + if (WasInserted || S->isLazy()) { + Replace(); return S; } checkDataType(S, File); if (shouldReplace(S, File, Flags)) - replaceSymbol(S, Name, Flags, File, Segment, Address, Size); + Replace(); return S; } @@ -307,15 +357,19 @@ bool WasInserted; std::tie(S, WasInserted) = insert(Name, File); - if (WasInserted || S->isLazy()) { + auto Replace = [&]() { replaceSymbol(S, Name, Flags, File, Global); + }; + + if (WasInserted || S->isLazy()) { + Replace(); return S; } checkGlobalType(S, File, &Global->getType()); if (shouldReplace(S, File, Flags)) - replaceSymbol(S, Name, Flags, File, Global); + Replace(); return S; } @@ -327,15 +381,19 @@ bool WasInserted; std::tie(S, WasInserted) = insert(Name, File); - if (WasInserted || S->isLazy()) { + auto Replace = [&]() { replaceSymbol(S, Name, Flags, File, Event); + }; + + if (WasInserted || S->isLazy()) { + Replace(); return S; } checkEventType(S, File, &Event->getType(), &Event->Signature); if (shouldReplace(S, File, Flags)) - replaceSymbol(S, Name, Flags, File, Event); + Replace(); return S; } @@ -350,13 +408,25 @@ bool WasInserted; std::tie(S, WasInserted) = insert(Name, File); - if (WasInserted) + auto Replace = [&]() { replaceSymbol(S, Name, ImportName, ImportModule, Flags, File, Sig); + }; + + if (WasInserted) + Replace(); else if (auto *Lazy = dyn_cast(S)) Lazy->fetch(); - else - checkFunctionType(S, File, Sig); + else { + auto ExistingFunction = dyn_cast(S); + if (!ExistingFunction) { + reportTypeError(S, File, WASM_SYMBOL_TYPE_FUNCTION); + return S; + } + if (!signatureMatches(ExistingFunction, Sig)) + if (getFunctionVariant(S, Sig, File, &S)) + Replace(); + } return S; } @@ -438,6 +508,44 @@ return Comdats.insert(CachedHashStringRef(Name)).second; } +// The new signature doesn't match. Create a variant to the symbol with the +// signature encoded in the name and return that instead. These symbols are +// then unified later in handleSymbolVariants. +bool SymbolTable::getFunctionVariant(Symbol* Sym, const WasmSignature *Sig, + const InputFile *File, Symbol **Out) { + StringRef NewName = getVariantName(Sym->getName(), *Sig); + LLVM_DEBUG(dbgs() << "getFunctionVariant: " << Sym->getName() << " -> " << NewName + << " " << toString(*Sig) << "\n"); + Symbol *Variant = nullptr; + + // Linear search through symbol variants. Should never be more than two + // or three entries here. + auto &Variants = SymVariants[CachedHashStringRef(Sym->getName())]; + if (Variants.size() == 0) + Variants.push_back(Sym); + + for (Symbol* V : Variants) { + if (*V->getSignature() == *Sig) { + Variant = V; + break; + } + } + + bool WasAdded = !Variant; + if (WasAdded) { + // Create a new variant; + LLVM_DEBUG(dbgs() << "added new variant\n"); + Variant = reinterpret_cast(make()); + Variants.push_back(Variant); + } else { + LLVM_DEBUG(dbgs() << "variant already exists: " << toString(*Variant) << "\n"); + assert(*Variant->getSignature() == *Sig); + } + + *Out = Variant; + return WasAdded; +} + // Set a flag for --trace-symbol so that we can print out a log message // if a new symbol with the same name is inserted into the symbol table. void SymbolTable::trace(StringRef Name) { @@ -451,14 +559,16 @@ // Replace the given symbol body with an unreachable function. // This is used by handleWeakUndefines in order to generate a callable -// equivalent of an undefined function. +// equivalent of an undefined function and also handleSymbolVariants for +// undefined functions that don't match the signature of the definition. InputFunction *SymbolTable::replaceWithUnreachable(Symbol *Sym, const WasmSignature &Sig, StringRef DebugName) { auto *Func = make(Sig, Sym->getName(), DebugName); Func->setBody(UnreachableFn); SyntheticFunctions.emplace_back(Func); - replaceSymbol(Sym, Sym->getName(), Sym->getFlags(), nullptr, Func); + replaceSymbol(Sym, Sym->getName(), Sym->getFlags(), nullptr, + Func); return Func; } @@ -471,21 +581,15 @@ if (!Sym->isUndefWeak()) continue; - const WasmSignature *Sig = nullptr; - - if (auto *FuncSym = dyn_cast(Sym)) { + const WasmSignature *Sig = Sym->getSignature(); + if (!Sig) { // It is possible for undefined functions not to have a signature (eg. if // added via "--undefined"), but weak undefined ones do have a signature. - assert(FuncSym->Signature); - Sig = FuncSym->Signature; - } else if (auto *LazySym = dyn_cast(Sym)) { - // Lazy symbols may not be functions and therefore can have a null - // signature. - Sig = LazySym->Signature; - } - - if (!Sig) + // Lazy symbols may not be functions and therefore Sig can still be null + // in some circumstantce. + assert(!isa(Sym)); continue; + } // Add a synthetic dummy for weak undefined functions. These dummies will // be GC'd if not used as the target of any "call" instructions. @@ -498,3 +602,64 @@ Sym->setHidden(true); } } + +static void reportFunctionSignatureMismatch(StringRef SymName, + FunctionSymbol *A, + FunctionSymbol *B, bool Error) { + std::string msg = ("function signature mismatch: " + SymName + + "\n>>> defined as " + toString(*A->Signature) + " in " + + toString(A->getFile()) + "\n>>> defined as " + + toString(*B->Signature) + " in " + toString(B->getFile())) + .str(); + if (Error) + error(msg); + else + warn(msg); +} + +// Remove any variant symbols that were created due to function signature +// mismatches. +void SymbolTable::handleSymbolVariants() { + for (auto Pair : SymVariants) { + // Push the initial symbol onto the list of variants. + StringRef SymName = Pair.first.val(); + std::vector &Variants = Pair.second; + +#ifndef NDEBUG + dbgs() << "symbol with (" << Variants.size() + << ") variants: " << SymName << "\n"; + for (auto *S: Variants) { + auto *F = cast(S); + dbgs() << " variant: " + F->getName() << " " << toString(*F->Signature) << "\n"; + } +#endif + + // Find the one definition. + DefinedFunction *Defined = nullptr; + for (auto *Symbol : Variants) { + if (auto F = dyn_cast(Symbol)) { + Defined = F; + break; + } + } + + // If there are no definitions, and the undefined symbols disagree on + // the signature, there is not we can do since we don't know which one + // to use as the signature on the import. + if (!Defined) { + reportFunctionSignatureMismatch(SymName, + cast(Variants[0]), + cast(Variants[1]), true); + return; + } + + for (auto *Symbol : Variants) { + if (Symbol != Defined) { + auto *F = cast(Symbol); + reportFunctionSignatureMismatch(SymName, F, Defined, false); + StringRef DebugName = Saver.save("unreachable:" + toString(*F)); + replaceWithUnreachable(F, *F->Signature, DebugName); + } + } + } +} Index: lld/trunk/wasm/Symbols.h =================================================================== --- lld/trunk/wasm/Symbols.h +++ lld/trunk/wasm/Symbols.h @@ -105,6 +105,8 @@ // True if this symbol is specified by --trace-symbol option. unsigned Traced : 1; + const WasmSignature* getSignature() const; + protected: Symbol(StringRef Name, Kind K, uint32_t Flags, InputFile *F) : IsUsedInRegularObj(false), ForceExport(false), Traced(false), Index: lld/trunk/wasm/Symbols.cpp =================================================================== --- lld/trunk/wasm/Symbols.cpp +++ lld/trunk/wasm/Symbols.cpp @@ -45,6 +45,14 @@ llvm_unreachable("invalid symbol kind"); } +const WasmSignature *Symbol::getSignature() const { + if (auto* F = dyn_cast(this)) + return F->Signature; + if (auto *L = dyn_cast(this)) + return L->Signature; + return nullptr; +} + InputChunk *Symbol::getChunk() const { if (auto *F = dyn_cast(this)) return F->Function;