diff --git a/lld/docs/WebAssembly.rst b/lld/docs/WebAssembly.rst --- a/lld/docs/WebAssembly.rst +++ b/lld/docs/WebAssembly.rst @@ -95,6 +95,25 @@ For more specific details on how this is achieved see the tool conventions on linking_. +Function Signatrues +~~~~~~~~~~~~~~~~~~~ + +One way in which the WebAssembly linker differs from traditional native linkers +is that function signature checking is strict in WebAssembly. It is a +validation error for a module to contain to call site that doesn't agree with +the target signature. Even though this is undefined behavior in C/C++ its not +uncommon to find this in real world C/C++ programs. For example, a call site in +one complication unit which calls a function defined in another complication +unit but with too many arguments. + +In order not to generate such invalid modules lld has two modes of handling such +mismatches: it can simply error out or it can create stub functions that will +trap at runtime (functions that contain only an ``unreachable`` instruction) +and use these stub functions at the otherwise invalid call sites. + +The later is the default behavior and the ``--signature-check-strict`` option +can be used to disable this and instead error out. + Imports and Exports ~~~~~~~~~~~~~~~~~~~ diff --git a/lld/test/wasm/lto/signature-mismatch.ll b/lld/test/wasm/lto/signature-mismatch.ll --- a/lld/test/wasm/lto/signature-mismatch.ll +++ b/lld/test/wasm/lto/signature-mismatch.ll @@ -15,5 +15,6 @@ ret void } +; CHECK: error: function signature mismatch: f ; CHECK: >>> defined as (i32) -> void in {{.*}}signature-mismatch.ll.tmp1.o ; CHECK: >>> defined as () -> void in lto.tmp diff --git a/lld/test/wasm/signature-mismatch-weak.ll b/lld/test/wasm/signature-mismatch-weak.ll --- a/lld/test/wasm/signature-mismatch-weak.ll +++ b/lld/test/wasm/signature-mismatch-weak.ll @@ -14,5 +14,5 @@ } ; CHECK: warning: function signature mismatch: weakFn -; CHECK-NEXT: >>> defined as () -> i32 in {{.*}}signature-mismatch-weak.ll.tmp.o +; CHECK-NEXT: >>> defined as () -> i32 in {{.*}}signature-mismatch-weak.ll.tmp.weak.o ; CHECK-NEXT: >>> defined as () -> i64 in {{.*}}signature-mismatch-weak.ll.tmp.strong.o diff --git a/lld/test/wasm/signature-mismatch.ll b/lld/test/wasm/signature-mismatch.ll --- a/lld/test/wasm/signature-mismatch.ll +++ b/lld/test/wasm/signature-mismatch.ll @@ -1,10 +1,15 @@ ; RUN: llc -filetype=obj %p/Inputs/ret32.ll -o %t.ret32.o ; RUN: llc -filetype=obj %s -o %t.main.o +; RUN: wasm-ld -o %t.wasm %t.main.o %t.ret32.o +; RUN: obj2yaml %t.wasm | FileCheck %s -check-prefix=YAML + ; RUN: not wasm-ld --fatal-warnings -o %t.wasm %t.main.o %t.ret32.o 2>&1 | FileCheck %s ; Run the test again by with the object files in the other order to verify ; the check works when the undefined symbol is resolved by an existing defined ; one. -; RUN: not wasm-ld --fatal-warnings -o %t.wasm %t.ret32.o %t.main.o 2>&1 | FileCheck %s -check-prefix=REVERSE +; RUN: not wasm-ld --fatal-warnings -o %t.wasm %t.ret32.o %t.main.o 2>&1 | FileCheck %s +; We also have a specific flag to enable strict signature checking +; RUN: not wasm-ld --signature-check-strict -o %t.wasm %t.main.o %t.ret32.o 2>&1 | FileCheck %s target triple = "wasm32-unknown-unknown" @@ -21,6 +26,12 @@ ; CHECK-NEXT: >>> defined as (i32, i64, i32) -> i32 in {{.*}}.main.o ; CHECK-NEXT: >>> defined as (f32) -> i32 in {{.*}}.ret32.o -; REVERSE: error: function signature mismatch: ret32 -; REVERSE-NEXT: >>> defined as (f32) -> i32 in {{.*}}.ret32.o -; REVERSE-NEXT: >>> defined as (i32, i64, i32) -> i32 in {{.*}}.main.o +; YAML: - Type: CUSTOM +; YAML-NEXT: Name: name +; YAML-NEXT: FunctionNames: +; YAML-NEXT: - Index: 0 +; YAML-NEXT: Name: __wasm_call_ctors +; YAML-NEXT: - Index: 1 +; YAML-NEXT: Name: 'unreachable:ret32' +; YAML-NEXT: - Index: 2 +; YAML-NEXT: Name: _start diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h --- a/lld/wasm/Config.h +++ b/lld/wasm/Config.h @@ -35,6 +35,7 @@ bool Relocatable; bool SaveTemps; bool Shared; + bool SignatureCheckStrict; bool StripAll; bool StripDebug; bool StackFirst; diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -321,6 +321,7 @@ Config->SaveTemps = Args.hasArg(OPT_save_temps); Config->SearchPaths = args::getStrings(Args, OPT_L); Config->Shared = Args.hasArg(OPT_shared); + Config->SignatureCheckStrict = Args.hasArg(OPT_signature_check_strict); Config->StripAll = Args.hasArg(OPT_strip_all); Config->StripDebug = Args.hasArg(OPT_strip_debug); Config->StackFirst = Args.hasArg(OPT_stack_first); @@ -556,9 +557,6 @@ Config->Entry); } - // Make sure we have resolved all symbols. - if (!Config->AllowUndefined) - Symtab->reportRemainingUndefines(); } if (errorCount()) @@ -570,11 +568,22 @@ if (errorCount()) return; - // Add synthetic dummies for weak undefined functions. Must happen - // after LTO otherwise functions may not yet have signatures. - if (!Config->Relocatable) + // Resolve any variant symbols that were created due to signature + // mismatchs. + Symtab->handleSymbolVariants(); + if (errorCount()) + return; + + if (!Config->Relocatable) { + // Add synthetic dummies for weak undefined functions. Must happen + // after LTO otherwise functions may not yet have signatures. Symtab->handleWeakUndefines(); + // Make sure we have resolved all symbols. + if (!Config->AllowUndefined) + Symtab->reportRemainingUndefines(); + } + if (EntrySym) EntrySym->setHidden(false); diff --git a/lld/wasm/Options.td b/lld/wasm/Options.td --- a/lld/wasm/Options.td +++ b/lld/wasm/Options.td @@ -91,6 +91,9 @@ def strip_debug: F<"strip-debug">, HelpText<"Strip debugging information">; +def signature_check_strict: F<"signature-check-strict">, + HelpText<"Error on function signature mismatch">; + def threads: F<"threads">, HelpText<"Run the linker multi-threaded">; def trace: F<"trace">, HelpText<"Print the names of the input files">; diff --git a/lld/wasm/SymbolTable.h b/lld/wasm/SymbolTable.h --- a/lld/wasm/SymbolTable.h +++ b/lld/wasm/SymbolTable.h @@ -79,12 +79,15 @@ DefinedFunction *addSyntheticFunction(StringRef Name, uint32_t Flags, InputFunction *Function); + void handleSymbolVariants(); void handleWeakUndefines(); private: std::pair insert(StringRef Name, const InputFile *File); std::pair insertName(StringRef Name); + bool createFunctionVariant(StringRef Name, const WasmSignature *Sig, + const InputFile *File, Symbol **Out); InputFunction *replaceWithUnreachable(Symbol *Sym, const WasmSignature &Sig, StringRef DebugName); @@ -94,6 +97,10 @@ llvm::DenseMap SymMap; std::vector SymVector; + // For certain symbols types, e.g. function symbols, we allow for muliple + // variants of the same symbol with different signatures. + llvm::DenseMap> SymVariants; + llvm::DenseSet Comdats; // For LTO. diff --git a/lld/wasm/SymbolTable.cpp b/lld/wasm/SymbolTable.cpp --- a/lld/wasm/SymbolTable.cpp +++ b/lld/wasm/SymbolTable.cpp @@ -123,29 +123,26 @@ } // Check the type of new symbol matches that of the symbol is replacing. -// For functions this can also involve verifying that the signatures match. -static void checkFunctionType(Symbol *Existing, const InputFile *File, - const WasmSignature *NewSig) { +// Returns true if the function types match, false is there is a singature +// mismatch. +bool checkFunctionType(Symbol *Existing, const InputFile *File, + const WasmSignature *NewSig) { auto ExistingFunction = dyn_cast(Existing); if (!ExistingFunction) { reportTypeError(Existing, File, WASM_SYMBOL_TYPE_FUNCTION); - return; + return true; } if (!NewSig) - return; + return true; const WasmSignature *OldSig = ExistingFunction->Signature; if (!OldSig) { ExistingFunction->Signature = NewSig; - return; + return true; } - if (*NewSig != *OldSig) - warn("function signature mismatch: " + Existing->getName() + - "\n>>> defined as " + toString(*OldSig) + " in " + - toString(Existing->getFile()) + "\n>>> defined as " + - toString(*NewSig) + " in " + toString(File)); + return *NewSig == *OldSig; } static void checkGlobalType(const Symbol *Existing, const InputFile *File, @@ -260,21 +257,26 @@ return S; } - if (Function) - checkFunctionType(S, File, &Function->Signature); + bool CreatedNewVariant = false; + if (Function && !checkFunctionType(S, File, &Function->Signature)) + CreatedNewVariant = + createFunctionVariant(Name, &Function->Signature, File, &S); - if (shouldReplace(S, File, Flags)) { + if (CreatedNewVariant || shouldReplace(S, File, Flags)) { // If the new defined function doesn't have signture (i.e. bitcode - // functions) but the old symbol does then preserve the old signature + // functions) but the old symbol does, then preserve the old signature const WasmSignature *OldSig = nullptr; if (auto* F = dyn_cast(S)) OldSig = F->Signature; if (auto *L = dyn_cast(S)) OldSig = L->Signature; - auto NewSym = replaceSymbol(S, Name, Flags, File, Function); + + auto NewSym = + replaceSymbol(S, S->getName(), Flags, File, Function); if (!NewSym->Signature) NewSym->Signature = OldSig; } + return S; } @@ -355,8 +357,10 @@ File, Sig); else if (auto *Lazy = dyn_cast(S)) Lazy->fetch(); - else - checkFunctionType(S, File, Sig); + else if (!checkFunctionType(S, File, Sig)) + if (createFunctionVariant(Name, Sig, File, &S)) + replaceSymbol(S, S->getName(), ImportName, + ImportModule, Flags, File, Sig); return S; } @@ -438,6 +442,56 @@ return Comdats.insert(CachedHashStringRef(Name)).second; } +static char encodeValType(ValType Type) { + switch (Type) { + case ValType::I32: + return 'i'; + case ValType::I64: + return 'j'; + case ValType::F32: + return 'f'; + case ValType::F64: + return 'd'; + case ValType::V128: + return 'V'; + case ValType::EXCEPT_REF: + return 'e'; + } + llvm_unreachable("invalid wasm type"); +} + +static std::string encodeSignature(const WasmSignature &Sig) { + std::string S = ":"; + for (ValType Type : Sig.Returns) + S += encodeValType(Type); + S += ':'; + for (ValType Type : Sig.Params) + S += encodeValType(Type); + return S; +} + +// The new signature doesn't match. Create a variant to the symbol with +// the signature encoded in the name and return that instead. +// These symbols are then unified later in handleSymbolVariants. +bool SymbolTable::createFunctionVariant(StringRef Name, + const WasmSignature *Sig, + const InputFile *File, Symbol **Out) { + StringRef NewName = Saver.save(Name + encodeSignature(*Sig)); + bool WasInserted; + Symbol *NewSym; + std::tie(NewSym, WasInserted) = insert(NewName, File); + if (WasInserted) { + LLVM_DEBUG(dbgs() << "add new variant: " << NewSym << " name: " << NewName + << toString(*NewSym) << "\n"); + SymVariants[CachedHashStringRef(Name)].push_back(NewSym); + } else { + assert(*cast(*Out)->Signature == *Sig); + } + NewSym->setName(Name); + *Out = NewSym; + return WasInserted; +} + // Set a flag for --trace-symbol so that we can print out a log message // if a new symbol with the same name is inserted into the symbol table. void SymbolTable::trace(StringRef Name) { @@ -451,7 +505,8 @@ // Replace the given symbol body with an unreachable function. // This is used by handleWeakUndefines in order to generate a callable -// equivalent of an undefined function. +// equivalent of an undefined function and also handleSymbolVariants for +// undefined functions that don't match the signature of the definition. InputFunction *SymbolTable::replaceWithUnreachable(Symbol *Sym, const WasmSignature &Sig, StringRef DebugName) { @@ -498,3 +553,57 @@ Sym->setHidden(true); } } + +static void reportFunctionSignatureMismatch(FunctionSymbol *A, + FunctionSymbol *B, bool Error) { + std::string msg = ("function signature mismatch: " + A->getName() + + "\n>>> defined as " + toString(*A->Signature) + " in " + + toString(A->getFile()) + "\n>>> defined as " + + toString(*B->Signature) + " in " + toString(B->getFile())) + .str(); + if (Error) + error(msg); + else + warn(msg); +} + +// Remove any variant symbols that were created due to function signature +// mismatches. +void SymbolTable::handleSymbolVariants() { + for (auto Pair : SymVariants) { + LLVM_DEBUG(dbgs() << "symbol with variants: " << Pair.first.val() << "\n"); + + // Push the initial symbol onto the list of variants. + std::vector &Variants = Pair.second; + Variants.push_back(find(Pair.first.val())); + LLVM_DEBUG(dbgs() << Variants.size() << "\n"); + + // Find the one definition. + DefinedFunction *Defined = nullptr; + for (auto *Symbol : Variants) { + if (auto F = dyn_cast(Symbol)) { + Defined = F; + break; + } + } + + // If there are no definitions, and the undefined symbols disagree on + // the signature, there is not we can do since we don't know which one + // to use as the signature on the import. + if (!Defined) { + reportFunctionSignatureMismatch(cast(Variants[0]), + cast(Variants[1]), true); + return; + } + + for (auto *Symbol : Variants) { + if (Symbol != Defined) { + auto *F = cast(Symbol); + reportFunctionSignatureMismatch(F, Defined, + Config->SignatureCheckStrict); + StringRef DebugName = Saver.save("unreachable:" + toString(*F)); + replaceWithUnreachable(F, *F->Signature, DebugName); + } + } + } +} diff --git a/lld/wasm/Symbols.h b/lld/wasm/Symbols.h --- a/lld/wasm/Symbols.h +++ b/lld/wasm/Symbols.h @@ -73,6 +73,8 @@ // Returns the symbol name. StringRef getName() const { return Name; } + void setName(StringRef S) { Name = S; } + // Returns the file from which this symbol was created. InputFile *getFile() const { return File; }