Index: docs/WebAssembly.rst =================================================================== --- docs/WebAssembly.rst +++ docs/WebAssembly.rst @@ -95,6 +95,21 @@ For more specific details on how this is achieved see the tool conventions on linking_. +Function Signatrues +~~~~~~~~~~~~~~~~~~~ + +One way in which the WebAssembly linker differs from tradiation native linkers +is that function signature checking is strict in WebAssembly. i.e. its not +possible to call a function with the wrong signature. Even though this is +undefined behaviour in C/C++ its not uncommon to find this in real world C/C++ +programs. For example, a call site in one complication unit which calls a +function defined in another complication unit but with too many arguments. + +To work around this when the WebAssembly linker finds an undefined function +symbol with the wrong signature it replaces it with an auto-generated function +stub which contains only the ``unreachable`` instruction. This effectively +turns a validation error into a runtime error. + Imports and Exports ~~~~~~~~~~~~~~~~~~~ Index: test/wasm/lto/signature-mismatch.ll =================================================================== --- test/wasm/lto/signature-mismatch.ll +++ test/wasm/lto/signature-mismatch.ll @@ -15,5 +15,6 @@ ret void } +; CHECK: error: function signature mismatch: f ; CHECK: >>> defined as (i32) -> void in {{.*}}signature-mismatch.ll.tmp1.o ; CHECK: >>> defined as () -> void in lto.tmp Index: test/wasm/signature-mismatch-weak.ll =================================================================== --- test/wasm/signature-mismatch-weak.ll +++ test/wasm/signature-mismatch-weak.ll @@ -14,5 +14,5 @@ } ; CHECK: warning: function signature mismatch: weakFn -; CHECK-NEXT: >>> defined as () -> i32 in {{.*}}signature-mismatch-weak.ll.tmp.o +; CHECK-NEXT: >>> defined as () -> i32 in {{.*}}signature-mismatch-weak.ll.tmp.weak.o ; CHECK-NEXT: >>> defined as () -> i64 in {{.*}}signature-mismatch-weak.ll.tmp.strong.o Index: test/wasm/signature-mismatch.ll =================================================================== --- test/wasm/signature-mismatch.ll +++ test/wasm/signature-mismatch.ll @@ -1,10 +1,15 @@ ; RUN: llc -filetype=obj %p/Inputs/ret32.ll -o %t.ret32.o ; RUN: llc -filetype=obj %s -o %t.main.o +; RUN: wasm-ld -o %t.wasm %t.main.o %t.ret32.o +; RUN: obj2yaml %t.wasm | FileCheck %s -check-prefix=YAML + ; RUN: not wasm-ld --fatal-warnings -o %t.wasm %t.main.o %t.ret32.o 2>&1 | FileCheck %s ; Run the test again by with the object files in the other order to verify ; the check works when the undefined symbol is resolved by an existing defined ; one. -; RUN: not wasm-ld --fatal-warnings -o %t.wasm %t.ret32.o %t.main.o 2>&1 | FileCheck %s -check-prefix=REVERSE +; RUN: not wasm-ld --fatal-warnings -o %t.wasm %t.ret32.o %t.main.o 2>&1 | FileCheck %s +; We also have a specific flag to enable strict signature checking +; RUN: not wasm-ld --signature-check-strict -o %t.wasm %t.main.o %t.ret32.o 2>&1 | FileCheck %s target triple = "wasm32-unknown-unknown" @@ -21,6 +26,12 @@ ; CHECK-NEXT: >>> defined as (i32, i64, i32) -> i32 in {{.*}}.main.o ; CHECK-NEXT: >>> defined as (f32) -> i32 in {{.*}}.ret32.o -; REVERSE: error: function signature mismatch: ret32 -; REVERSE-NEXT: >>> defined as (f32) -> i32 in {{.*}}.ret32.o -; REVERSE-NEXT: >>> defined as (i32, i64, i32) -> i32 in {{.*}}.main.o +; YAML: - Type: CUSTOM +; YAML-NEXT: Name: name +; YAML-NEXT: FunctionNames: +; YAML-NEXT: - Index: 0 +; YAML-NEXT: Name: __wasm_call_ctors +; YAML-NEXT: - Index: 1 +; YAML-NEXT: Name: 'unreachable:ret32' +; YAML-NEXT: - Index: 2 +; YAML-NEXT: Name: _start Index: wasm/Config.h =================================================================== --- wasm/Config.h +++ wasm/Config.h @@ -35,6 +35,7 @@ bool Relocatable; bool SaveTemps; bool Shared; + bool SignatureCheckStrict; bool StripAll; bool StripDebug; bool StackFirst; Index: wasm/Driver.cpp =================================================================== --- wasm/Driver.cpp +++ wasm/Driver.cpp @@ -321,6 +321,7 @@ Config->SaveTemps = Args.hasArg(OPT_save_temps); Config->SearchPaths = args::getStrings(Args, OPT_L); Config->Shared = Args.hasArg(OPT_shared); + Config->SignatureCheckStrict = Args.hasArg(OPT_signature_check_strict); Config->StripAll = Args.hasArg(OPT_strip_all); Config->StripDebug = Args.hasArg(OPT_strip_debug); Config->StackFirst = Args.hasArg(OPT_stack_first); @@ -554,9 +555,6 @@ Config->Entry); } - // Make sure we have resolved all symbols. - if (!Config->AllowUndefined) - Symtab->reportRemainingUndefines(); } if (errorCount()) @@ -568,11 +566,22 @@ if (errorCount()) return; - // Add synthetic dummies for weak undefined functions. Must happen - // after LTO otherwise functions may not yet have signatures. - if (!Config->Relocatable) + // Resolve any variant symbols that were created due to signature + // mismatchs. + Symtab->handleSymbolVariants(); + if (errorCount()) + return; + + if (!Config->Relocatable) { + // Add synthetic dummies for weak undefined functions. Must happen + // after LTO otherwise functions may not yet have signatures. Symtab->handleWeakUndefines(); + // Make sure we have resolved all symbols. + if (!Config->AllowUndefined) + Symtab->reportRemainingUndefines(); + } + if (EntrySym) EntrySym->setHidden(false); Index: wasm/Options.td =================================================================== --- wasm/Options.td +++ wasm/Options.td @@ -91,6 +91,9 @@ def strip_debug: F<"strip-debug">, HelpText<"Strip debugging information">; +def signature_check_strict : F<"signature-check-strict">, + HelpText<"Error on function signature mismatch">; + def threads: F<"threads">, HelpText<"Run the linker multi-threaded">; def trace: F<"trace">, HelpText<"Print the names of the input files">; Index: wasm/SymbolTable.h =================================================================== --- wasm/SymbolTable.h +++ wasm/SymbolTable.h @@ -77,12 +77,15 @@ DefinedFunction *addSyntheticFunction(StringRef Name, uint32_t Flags, InputFunction *Function); + void handleSymbolVariants(); void handleWeakUndefines(); private: std::pair insert(StringRef Name, const InputFile *File); std::pair insertName(StringRef Name); + bool createFunctionVariant(StringRef Name, const WasmSignature *Sig, + const InputFile *File, Symbol **Out); InputFunction *replaceWithUnreachable(Symbol *Sym, const WasmSignature &Sig, StringRef DebugName); @@ -92,6 +95,10 @@ llvm::DenseMap SymMap; std::vector SymVector; + // For certain symbols types, e.g. function symbols, we allow for muliple + // variants of the same symbol with different signatures. + llvm::DenseMap> SymVariants; + llvm::DenseSet Comdats; // For LTO. Index: wasm/SymbolTable.cpp =================================================================== --- wasm/SymbolTable.cpp +++ wasm/SymbolTable.cpp @@ -122,30 +122,55 @@ " in " + toString(File)); } +static char encodeValType(ValType Type) { + switch (Type) { + case ValType::I32: + return 'i'; + case ValType::I64: + return 'j'; + case ValType::F32: + return 'f'; + case ValType::F64: + return 'd'; + case ValType::V128: + return 'V'; + case ValType::EXCEPT_REF: + return 'e'; + } + llvm_unreachable("invalid wasm type"); +} + +static std::string encodeSignature(const WasmSignature& Sig) { + std::string S = ":"; + for (ValType Type : Sig.Returns) + S += encodeValType(Type); + S += ':'; + for (ValType Type : Sig.Params) + S += encodeValType(Type); + return S; +} + // Check the type of new symbol matches that of the symbol is replacing. -// For functions this can also involve verifying that the signatures match. -static void checkFunctionType(Symbol *Existing, const InputFile *File, - const WasmSignature *NewSig) { +// Returns true if the function types match, false is there is a singature +// mismatch. +bool checkFunctionType(Symbol *Existing, const InputFile *File, + const WasmSignature *NewSig) { auto ExistingFunction = dyn_cast(Existing); if (!ExistingFunction) { reportTypeError(Existing, File, WASM_SYMBOL_TYPE_FUNCTION); - return; + return true; } if (!NewSig) - return; + return true; const WasmSignature *OldSig = ExistingFunction->Signature; if (!OldSig) { ExistingFunction->Signature = NewSig; - return; + return true; } - if (*NewSig != *OldSig) - warn("function signature mismatch: " + Existing->getName() + - "\n>>> defined as " + toString(*OldSig) + " in " + - toString(Existing->getFile()) + "\n>>> defined as " + - toString(*NewSig) + " in " + toString(File)); + return *NewSig == *OldSig; } static void checkGlobalType(const Symbol *Existing, const InputFile *File, @@ -245,6 +270,29 @@ return true; } +// The new signature doesn't match. Create a variant to the symbol with +// the signature encoded in the name and return that instead. +// These symbols are then unified later in handleSymbolVariants. +bool SymbolTable::createFunctionVariant(StringRef Name, + const WasmSignature *Sig, + const InputFile *File, + Symbol **Out) { + StringRef NewName = Saver.save(Name + encodeSignature(*Sig)); + bool WasInserted; + Symbol* NewSym; + std::tie(NewSym, WasInserted) = insert(NewName, File); + if (WasInserted) { + LLVM_DEBUG(dbgs() << "add new variant: " << NewSym << " name: " << NewName + << toString(*NewSym) << "\n"); + SymVariants[CachedHashStringRef(Name)].push_back(NewSym); + } else { + assert(*cast(*Out)->Signature == *Sig); + } + NewSym->setName(Name); + *Out = NewSym; + return WasInserted; +} + Symbol *SymbolTable::addDefinedFunction(StringRef Name, uint32_t Flags, InputFile *File, InputFunction *Function) { @@ -260,21 +308,28 @@ return S; } - if (Function) - checkFunctionType(S, File, &Function->Signature); + bool createdNewVariant = false; + if (Function && !checkFunctionType(S, File, &Function->Signature)) + createdNewVariant = + createFunctionVariant(Name, &Function->Signature, File, &S); - if (shouldReplace(S, File, Flags)) { + if (createdNewVariant || shouldReplace(S, File, Flags)) { // If the new defined function doesn't have signture (i.e. bitcode - // functions) but the old symbol does then preserve the old signature + // functions) but the old symbol does, then preserve the old signature const WasmSignature *OldSig = nullptr; if (auto* F = dyn_cast(S)) OldSig = F->Signature; if (auto *L = dyn_cast(S)) OldSig = L->Signature; - auto NewSym = replaceSymbol(S, Name, Flags, File, Function); + + auto NewSym = replaceSymbol(S, S->getName(), Flags, File, Function); if (!NewSym->Signature) NewSym->Signature = OldSig; + LLVM_DEBUG(dbgs() << "done addDefinedFunction: " << Name << " [" + << toString(*NewSym->Signature) + << "]\n"); } + return S; } @@ -354,7 +409,9 @@ else if (auto *Lazy = dyn_cast(S)) Lazy->fetch(); else - checkFunctionType(S, File, Sig); + if (!checkFunctionType(S, File, Sig)) + if (createFunctionVariant(Name, Sig, File, &S)) + replaceSymbol(S, S->getName(), Module, Flags, File, Sig); return S; } @@ -447,7 +504,8 @@ // Replace the given symbol body with an unreachable function. // This is used by handleWeakUndefines in order to generate a callable -// equivalent of an undefined function. +// equivalent of an undefined function and also handleSymbolVariants for +// undefined functions that don't match the signature of the definition. InputFunction *SymbolTable::replaceWithUnreachable(Symbol *Sym, const WasmSignature &Sig, StringRef DebugName) { @@ -494,3 +552,59 @@ Sym->setHidden(true); } } + +static void reportFunctionSignatureMismatch(FunctionSymbol *A, + FunctionSymbol *B, bool Error) { + std::string msg = ("function signature mismatch: " + A->getName() + + "\n>>> defined as " + toString(*A->Signature) + " in " + + toString(A->getFile()) + "\n>>> defined as " + + toString(*B->Signature) + " in " + toString(B->getFile())) + .str(); + if (Error) + error(msg); + else + warn(msg); +} + +// Remove any variant symbols that were created due to function signature +// mismatches. +void SymbolTable::handleSymbolVariants() { + for (auto Pair : SymVariants) { + LLVM_DEBUG(dbgs() << "symbol with variants: " << Pair.first.val() << "\n"); + + // Push the initial symbol onto the list of variants. + std::vector& Variants = Pair.second; + Variants.push_back(find(Pair.first.val())); + LLVM_DEBUG(dbgs() << Variants.size() << "\n"); + + // Find the one definition. + DefinedFunction* Defined = nullptr; + for (auto* Symbol : Variants) { + if (auto F = dyn_cast(Symbol)) { + Defined = F; + break; + } + } + + // If there are no definitions, and the undefined symbols disagree on + // the signature, there is not we can do since we don't know which one + // to use as the signature on the import. + if (!Defined) { + reportFunctionSignatureMismatch(cast(Variants[0]), + cast(Variants[1]), + true); + return; + } + + for (auto* Symbol : Variants) { + if (Symbol != Defined) { + auto* F = cast(Symbol); + reportFunctionSignatureMismatch(F, Defined, + Config->SignatureCheckStrict); + StringRef DebugName = Saver.save("unreachable:" + toString(*F)); + replaceWithUnreachable(F, *F->Signature, DebugName); + } + } + + } +} Index: wasm/Symbols.h =================================================================== --- wasm/Symbols.h +++ wasm/Symbols.h @@ -73,6 +73,8 @@ // Returns the symbol name. StringRef getName() const { return Name; } + void setName(StringRef S) { Name = S; } + // Returns the file from which this symbol was created. InputFile *getFile() const { return File; }