Index: docs/WebAssembly.rst =================================================================== --- docs/WebAssembly.rst +++ docs/WebAssembly.rst @@ -95,6 +95,21 @@ For more specific details on how this is achieved see the tool conventions on linking_. +Function Signatrues +~~~~~~~~~~~~~~~~~~~ + +One way in which the WebAssembly linker differs from tradiation native linkers +is that function signature checking is strict in WebAssembly. i.e. its not +possible to call a function with the wrong signature. Even though this is +undefined behaviour in C/C++ its not uncommon to find this in real world C/C++ +programs. For example, a call site in one complication unit which calls a +function defined in another complication unit but with too many arguments. + +To work around this when the WebAssembly linker finds an undefined function +symbol with the wrong signature it replaces it with an auto-generated function +stub which contains only the ``unreachable`` instruction. This effectively +turns a validation error into a runtime error. + Imports and Exports ~~~~~~~~~~~~~~~~~~~ Index: test/wasm/archive-weak-undefined.ll =================================================================== --- test/wasm/archive-weak-undefined.ll +++ test/wasm/archive-weak-undefined.ll @@ -15,5 +15,5 @@ ret void } -; CHECK: Name: undefined function ret32 +; CHECK: Name: 'undefined:ret32' ; CHECK-NOT: Name: ret32 Index: test/wasm/cxx-mangling.ll =================================================================== --- test/wasm/cxx-mangling.ll +++ test/wasm/cxx-mangling.ll @@ -58,8 +58,8 @@ ; CHECK-NEXT: - Index: 0 ; CHECK-NEXT: Name: __wasm_call_ctors ; CHECK-NEXT: - Index: 1 -; DEMANGLE-NEXT: Name: 'undefined function bar(int)' -; MANGLE-NEXT: Name: undefined function _Z3bari +; DEMANGLE-NEXT: Name: 'undefined:bar(int)' +; MANGLE-NEXT: Name: 'undefined:_Z3bari' ; CHECK-NEXT: - Index: 2 ; DEMANGLE-NEXT: Name: 'foo(int)' ; MANGLE-NEXT: Name: _Z3fooi Index: test/wasm/lto/signature-mismatch.ll =================================================================== --- test/wasm/lto/signature-mismatch.ll +++ test/wasm/lto/signature-mismatch.ll @@ -15,5 +15,6 @@ ret void } +; CHECK: error: function signature mismatch: f ; CHECK: >>> defined as (i32) -> void in {{.*}}signature-mismatch.ll.tmp1.o ; CHECK: >>> defined as () -> void in lto.tmp Index: test/wasm/lto/weak-undefined.ll =================================================================== --- test/wasm/lto/weak-undefined.ll +++ test/wasm/lto/weak-undefined.ll @@ -17,4 +17,4 @@ ret void } -; CHECK: Name: undefined function foo +; CHECK: Name: 'undefined:foo' Index: test/wasm/signature-mismatch-weak.ll =================================================================== --- test/wasm/signature-mismatch-weak.ll +++ test/wasm/signature-mismatch-weak.ll @@ -14,5 +14,5 @@ } ; CHECK: warning: function signature mismatch: weakFn -; CHECK-NEXT: >>> defined as () -> i32 in {{.*}}signature-mismatch-weak.ll.tmp.o +; CHECK-NEXT: >>> defined as () -> i32 in {{.*}}signature-mismatch-weak.ll.tmp.weak.o ; CHECK-NEXT: >>> defined as () -> i64 in {{.*}}signature-mismatch-weak.ll.tmp.strong.o Index: test/wasm/signature-mismatch.ll =================================================================== --- test/wasm/signature-mismatch.ll +++ test/wasm/signature-mismatch.ll @@ -1,10 +1,15 @@ ; RUN: llc -filetype=obj %p/Inputs/ret32.ll -o %t.ret32.o ; RUN: llc -filetype=obj %s -o %t.main.o +; RUN: wasm-ld -o %t.wasm %t.main.o %t.ret32.o +; RUN: obj2yaml %t.wasm | FileCheck %s -check-prefix=YAML + ; RUN: not wasm-ld --fatal-warnings -o %t.wasm %t.main.o %t.ret32.o 2>&1 | FileCheck %s ; Run the test again by with the object files in the other order to verify ; the check works when the undefined symbol is resolved by an existing defined ; one. -; RUN: not wasm-ld --fatal-warnings -o %t.wasm %t.ret32.o %t.main.o 2>&1 | FileCheck %s -check-prefix=REVERSE +; RUN: not wasm-ld --fatal-warnings -o %t.wasm %t.ret32.o %t.main.o 2>&1 | FileCheck %s +; We also have a specific flag to enable strict signature checking +; RUN: not wasm-ld --signature-check-strict -o %t.wasm %t.main.o %t.ret32.o 2>&1 | FileCheck %s target triple = "wasm32-unknown-unknown" @@ -21,6 +26,12 @@ ; CHECK-NEXT: >>> defined as (i32, i64, i32) -> i32 in {{.*}}.main.o ; CHECK-NEXT: >>> defined as (f32) -> i32 in {{.*}}.ret32.o -; REVERSE: error: function signature mismatch: ret32 -; REVERSE-NEXT: >>> defined as (f32) -> i32 in {{.*}}.ret32.o -; REVERSE-NEXT: >>> defined as (i32, i64, i32) -> i32 in {{.*}}.main.o +; YAML: - Type: CUSTOM +; YAML-NEXT: Name: name +; YAML-NEXT: FunctionNames: +; YAML-NEXT: - Index: 0 +; YAML-NEXT: Name: __wasm_call_ctors +; YAML-NEXT: - Index: 1 +; YAML-NEXT: Name: 'unreachable:ret32' +; YAML-NEXT: - Index: 2 +; YAML-NEXT: Name: _start Index: test/wasm/undefined-weak-call.ll =================================================================== --- test/wasm/undefined-weak-call.ll +++ test/wasm/undefined-weak-call.ll @@ -110,11 +110,11 @@ ; CHECK-NEXT: - Index: 0 ; CHECK-NEXT: Name: __wasm_call_ctors ; CHECK-NEXT: - Index: 1 -; CHECK-NEXT: Name: undefined function weakFunc1 +; CHECK-NEXT: Name: 'undefined:weakFunc1' ; CHECK-NEXT: - Index: 2 -; CHECK-NEXT: Name: undefined function weakFunc2 +; CHECK-NEXT: Name: 'undefined:weakFunc2' ; CHECK-NEXT: - Index: 3 -; CHECK-NEXT: Name: undefined function weakFunc3 +; CHECK-NEXT: Name: 'undefined:weakFunc3' ; CHECK-NEXT: - Index: 4 ; CHECK-NEXT: Name: callWeakFuncs ; CHECK-NEXT: ... Index: wasm/Config.h =================================================================== --- wasm/Config.h +++ wasm/Config.h @@ -35,6 +35,7 @@ bool Relocatable; bool SaveTemps; bool Shared; + bool SignatureCheckStrict; bool StripAll; bool StripDebug; bool StackFirst; Index: wasm/Driver.cpp =================================================================== --- wasm/Driver.cpp +++ wasm/Driver.cpp @@ -286,53 +286,6 @@ return Arg->getValue(); } -static const uint8_t UnreachableFn[] = { - 0x03 /* ULEB length */, 0x00 /* ULEB num locals */, - 0x00 /* opcode unreachable */, 0x0b /* opcode end */ -}; - -// For weak undefined functions, there may be "call" instructions that reference -// the symbol. In this case, we need to synthesise a dummy/stub function that -// will abort at runtime, so that relocations can still provided an operand to -// the call instruction that passes Wasm validation. -static void handleWeakUndefines() { - for (Symbol *Sym : Symtab->getSymbols()) { - if (!Sym->isUndefWeak()) - continue; - - const WasmSignature *Sig = nullptr; - - if (auto *FuncSym = dyn_cast(Sym)) { - // It is possible for undefined functions not to have a signature (eg. if - // added via "--undefined"), but weak undefined ones do have a signature. - assert(FuncSym->Signature); - Sig = FuncSym->Signature; - } else if (auto *LazySym = dyn_cast(Sym)) { - // Lazy symbols may not be functions and therefore can have a null - // signature. - Sig = LazySym->Signature; - } - - if (!Sig) - continue; - - - // Add a synthetic dummy for weak undefined functions. These dummies will - // be GC'd if not used as the target of any "call" instructions. - std::string SymName = toString(*Sym); - StringRef DebugName = Saver.save("undefined function " + SymName); - auto *Func = make(*Sig, Sym->getName(), DebugName); - Func->setBody(UnreachableFn); - // Ensure it compares equal to the null pointer, and so that table relocs - // don't pull in the stub body (only call-operand relocs should do that). - Func->setTableIndex(0); - Symtab->SyntheticFunctions.emplace_back(Func); - // Hide our dummy to prevent export. - uint32_t Flags = WASM_SYMBOL_VISIBILITY_HIDDEN; - replaceSymbol(Sym, Sym->getName(), Flags, nullptr, Func); - } -} - // Some Config members do not directly correspond to any particular // command line options, but computed based on other Config values. // This function initialize such members. See Config.h for the details @@ -368,6 +321,7 @@ Config->SaveTemps = Args.hasArg(OPT_save_temps); Config->SearchPaths = args::getStrings(Args, OPT_L); Config->Shared = Args.hasArg(OPT_shared); + Config->SignatureCheckStrict = Args.hasArg(OPT_signature_check_strict); Config->StripAll = Args.hasArg(OPT_strip_all); Config->StripDebug = Args.hasArg(OPT_strip_debug); Config->StackFirst = Args.hasArg(OPT_stack_first); @@ -601,9 +555,6 @@ Config->Entry); } - // Make sure we have resolved all symbols. - if (!Config->AllowUndefined) - Symtab->reportRemainingUndefines(); } if (errorCount()) @@ -615,10 +566,21 @@ if (errorCount()) return; - // Add synthetic dummies for weak undefined functions. Must happen - // after LTO otherwise functions may not yet have signatures. - if (!Config->Relocatable) - handleWeakUndefines(); + // Resolve any variant symbols that were created due to signature + // mismatchs. + Symtab->handleSymbolVariants(); + if (errorCount()) + return; + + if (!Config->Relocatable) { + // Add synthetic dummies for weak undefined functions. Must happen + // after LTO otherwise functions may not yet have signatures. + Symtab->handleWeakUndefines(); + + // Make sure we have resolved all symbols. + if (!Config->AllowUndefined) + Symtab->reportRemainingUndefines(); + } if (EntrySym) EntrySym->setHidden(false); Index: wasm/Options.td =================================================================== --- wasm/Options.td +++ wasm/Options.td @@ -91,6 +91,9 @@ def strip_debug: F<"strip-debug">, HelpText<"Strip debugging information">; +def signature_check_strict : F<"signature-check-strict">, + HelpText<"Error on function signature mismatch">; + def threads: F<"threads">, HelpText<"Run the linker multi-threaded">; def trace: F<"trace">, HelpText<"Print the names of the input files">; Index: wasm/SymbolTable.h =================================================================== --- wasm/SymbolTable.h +++ wasm/SymbolTable.h @@ -77,16 +77,28 @@ DefinedFunction *addSyntheticFunction(StringRef Name, uint32_t Flags, InputFunction *Function); + void handleSymbolVariants(); + void handleWeakUndefines(); + private: - std::pair insert(StringRef Name, InputFile *File); + std::pair insert(StringRef Name, const InputFile *File); std::pair insertName(StringRef Name); + bool createFunctionVariant(StringRef Name, const WasmSignature *Sig, + const InputFile *File, Symbol **Out); + InputFunction *replaceWithUnreachable(Symbol *Sym, const WasmSignature &Sig, + StringRef DebugName); + // Maps symbol names to index into the SymVector. -1 means that symbols // is to not yet in the vector but it should have tracing enabled if it is // ever added. llvm::DenseMap SymMap; std::vector SymVector; + // For certain symbols types, e.g. function symbols, we allow for muliple + // variants of the same symbol with different signatures. + llvm::DenseMap> SymVariants; + llvm::DenseSet Comdats; // For LTO. Index: wasm/SymbolTable.cpp =================================================================== --- wasm/SymbolTable.cpp +++ wasm/SymbolTable.cpp @@ -102,7 +102,8 @@ return {Sym, true}; } -std::pair SymbolTable::insert(StringRef Name, InputFile *File) { +std::pair SymbolTable::insert(StringRef Name, + const InputFile *File) { Symbol *S; bool WasInserted; std::tie(S, WasInserted) = insertName(Name); @@ -121,30 +122,55 @@ " in " + toString(File)); } +static char encodeValType(ValType Type) { + switch (Type) { + case ValType::I32: + return 'i'; + case ValType::I64: + return 'j'; + case ValType::F32: + return 'f'; + case ValType::F64: + return 'd'; + case ValType::V128: + return 'V'; + case ValType::EXCEPT_REF: + return 'e'; + } + llvm_unreachable("invalid wasm type"); +} + +static std::string encodeSignature(const WasmSignature& Sig) { + std::string S = ":"; + for (ValType Type : Sig.Returns) + S += encodeValType(Type); + S += ':'; + for (ValType Type : Sig.Params) + S += encodeValType(Type); + return S; +} + // Check the type of new symbol matches that of the symbol is replacing. -// For functions this can also involve verifying that the signatures match. -static void checkFunctionType(Symbol *Existing, const InputFile *File, - const WasmSignature *NewSig) { +// Returns true if the function types match, false is there is a singature +// mismatch. +bool checkFunctionType(Symbol *Existing, const InputFile *File, + const WasmSignature *NewSig) { auto ExistingFunction = dyn_cast(Existing); if (!ExistingFunction) { reportTypeError(Existing, File, WASM_SYMBOL_TYPE_FUNCTION); - return; + return true; } if (!NewSig) - return; + return true; const WasmSignature *OldSig = ExistingFunction->Signature; if (!OldSig) { ExistingFunction->Signature = NewSig; - return; + return true; } - if (*NewSig != *OldSig) - warn("function signature mismatch: " + Existing->getName() + - "\n>>> defined as " + toString(*OldSig) + " in " + - toString(Existing->getFile()) + "\n>>> defined as " + - toString(*NewSig) + " in " + toString(File)); + return *NewSig == *OldSig; } static void checkGlobalType(const Symbol *Existing, const InputFile *File, @@ -244,6 +270,29 @@ return true; } +// The new signature doesn't match. Create a variant to the symbol with +// the signature encoded in the name and return that instead. +// These symbols are then unified later in handleSymbolVariants. +bool SymbolTable::createFunctionVariant(StringRef Name, + const WasmSignature *Sig, + const InputFile *File, + Symbol **Out) { + StringRef NewName = Saver.save(Name + encodeSignature(*Sig)); + bool WasInserted; + Symbol* NewSym; + std::tie(NewSym, WasInserted) = insert(NewName, File); + if (WasInserted) { + LLVM_DEBUG(dbgs() << "add new variant: " << NewSym << " name: " << NewName + << toString(*NewSym) << "\n"); + SymVariants[CachedHashStringRef(Name)].push_back(NewSym); + } else { + assert(*cast(*Out)->Signature == *Sig); + } + NewSym->setName(Name); + *Out = NewSym; + return WasInserted; +} + Symbol *SymbolTable::addDefinedFunction(StringRef Name, uint32_t Flags, InputFile *File, InputFunction *Function) { @@ -259,21 +308,28 @@ return S; } - if (Function) - checkFunctionType(S, File, &Function->Signature); + bool createdNewVariant = false; + if (Function && !checkFunctionType(S, File, &Function->Signature)) + createdNewVariant = + createFunctionVariant(Name, &Function->Signature, File, &S); - if (shouldReplace(S, File, Flags)) { + if (createdNewVariant || shouldReplace(S, File, Flags)) { // If the new defined function doesn't have signture (i.e. bitcode - // functions) but the old symbol does then preserve the old signature + // functions) but the old symbol does, then preserve the old signature const WasmSignature *OldSig = nullptr; if (auto* F = dyn_cast(S)) OldSig = F->Signature; if (auto *L = dyn_cast(S)) OldSig = L->Signature; - auto NewSym = replaceSymbol(S, Name, Flags, File, Function); + + auto NewSym = replaceSymbol(S, S->getName(), Flags, File, Function); if (!NewSym->Signature) NewSym->Signature = OldSig; + LLVM_DEBUG(dbgs() << "done addDefinedFunction: " << Name << " [" + << toString(*NewSym->Signature) + << "]\n"); } + return S; } @@ -353,7 +409,9 @@ else if (auto *Lazy = dyn_cast(S)) Lazy->fetch(); else - checkFunctionType(S, File, Sig); + if (!checkFunctionType(S, File, Sig)) + if (createFunctionVariant(Name, Sig, File, &S)) + replaceSymbol(S, S->getName(), Module, Flags, File, Sig); return S; } @@ -438,3 +496,115 @@ void SymbolTable::trace(StringRef Name) { SymMap.insert({CachedHashStringRef(Name), -1}); } + +static const uint8_t UnreachableFn[] = { + 0x03 /* ULEB length */, 0x00 /* ULEB num locals */, + 0x00 /* opcode unreachable */, 0x0b /* opcode end */ +}; + +// Replace the given symbol body with an unreachable function. +// This is used by handleWeakUndefines in order to generate a callable +// equivalent of an undefined function and also handleSymbolVariants for +// undefined functions that don't match the signature of the definition. +InputFunction *SymbolTable::replaceWithUnreachable(Symbol *Sym, + const WasmSignature &Sig, + StringRef DebugName) { + auto *Func = make(Sig, Sym->getName(), DebugName); + Func->setBody(UnreachableFn); + SyntheticFunctions.emplace_back(Func); + replaceSymbol(Sym, Sym->getName(), Sym->getFlags(), nullptr, Func); + return Func; +} + +// For weak undefined functions, there may be "call" instructions that reference +// the symbol. In this case, we need to synthesise a dummy/stub function that +// will abort at runtime, so that relocations can still provided an operand to +// the call instruction that passes Wasm validation. +void SymbolTable::handleWeakUndefines() { + for (Symbol *Sym : getSymbols()) { + if (!Sym->isUndefWeak()) + continue; + + const WasmSignature *Sig = nullptr; + + if (auto *FuncSym = dyn_cast(Sym)) { + // It is possible for undefined functions not to have a signature (eg. if + // added via "--undefined"), but weak undefined ones do have a signature. + assert(FuncSym->Signature); + Sig = FuncSym->Signature; + } else if (auto *LazySym = dyn_cast(Sym)) { + // Lazy symbols may not be functions and therefore can have a null + // signature. + Sig = LazySym->Signature; + } + + if (!Sig) + continue; + + // Add a synthetic dummy for weak undefined functions. These dummies will + // be GC'd if not used as the target of any "call" instructions. + StringRef DebugName = Saver.save("undefined:" + toString(*Sym)); + InputFunction* Func = replaceWithUnreachable(Sym, *Sig, DebugName); + // Ensure it compares equal to the null pointer, and so that table relocs + // don't pull in the stub body (only call-operand relocs should do that). + Func->setTableIndex(0); + // Hide our dummy to prevent export. + Sym->setHidden(true); + } +} + +static void reportFunctionSignatureMismatch(FunctionSymbol *A, + FunctionSymbol *B, bool Error) { + std::string msg = ("function signature mismatch: " + A->getName() + + "\n>>> defined as " + toString(*A->Signature) + " in " + + toString(A->getFile()) + "\n>>> defined as " + + toString(*B->Signature) + " in " + toString(B->getFile())) + .str(); + if (Error) + error(msg); + else + warn(msg); +} + +// Remove any variant symbols that were created due to function signature +// mismatches. +void SymbolTable::handleSymbolVariants() { + for (auto Pair : SymVariants) { + LLVM_DEBUG(dbgs() << "symbol with variants: " << Pair.first.val() << "\n"); + + // Push the initial symbol onto the list of variants. + std::vector& Variants = Pair.second; + Variants.push_back(find(Pair.first.val())); + LLVM_DEBUG(dbgs() << Variants.size() << "\n"); + + // Find the one definition. + DefinedFunction* Defined = nullptr; + for (auto* Symbol : Variants) { + if (auto F = dyn_cast(Symbol)) { + Defined = F; + break; + } + } + + // If there are no definitions, and the undefined symbols disagree on + // the signature, there is not we can do since we don't know which one + // to use as the signature on the import. + if (!Defined) { + reportFunctionSignatureMismatch(cast(Variants[0]), + cast(Variants[1]), + true); + return; + } + + for (auto* Symbol : Variants) { + if (Symbol != Defined) { + auto* F = cast(Symbol); + reportFunctionSignatureMismatch(F, Defined, + Config->SignatureCheckStrict); + StringRef DebugName = Saver.save("unreachable:" + toString(*F)); + replaceWithUnreachable(F, *F->Signature, DebugName); + } + } + + } +} Index: wasm/Symbols.h =================================================================== --- wasm/Symbols.h +++ wasm/Symbols.h @@ -73,9 +73,13 @@ // Returns the symbol name. StringRef getName() const { return Name; } + void setName(StringRef S) { Name = S; } + // Returns the file from which this symbol was created. InputFile *getFile() const { return File; } + uint32_t getFlags() const { return Flags; } + InputChunk *getChunk() const; // Indicates that the section or import for this symbol will be included in