diff --git a/lld/test/wasm/invalid-mvp-table-use.s b/lld/test/wasm/invalid-mvp-table-use.s new file mode 100644 --- /dev/null +++ b/lld/test/wasm/invalid-mvp-table-use.s @@ -0,0 +1,17 @@ +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s +# +# Can't define tables in MVP files. +# RUN: not wasm-ld --no-entry %t.o -o %t.wasm 2>&1 | FileCheck -check-prefix=CHECK-ERR %s + +.global call_indirect +call_indirect: + .functype call_indirect () -> () + i32.const 1 + call_indirect () -> () + end_function + +.globl table +table: + .tabletype table, externref + +# CHECK-ERR: expected one symbol table entry for each of the 2 table(s) present, but got 1 symbol(s) instead. Recompile this input with -mattr=+reference-types. diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -816,14 +816,14 @@ } static TableSymbol *resolveIndirectFunctionTable() { - Symbol *existingTable = symtab->find(functionTableName); - if (existingTable) { - if (!isa(existingTable)) { + Symbol *existing = symtab->find(functionTableName); + if (existing) { + if (!isa(existing)) { error(Twine("reserved symbol must be of type table: `") + functionTableName + "`"); return nullptr; } - if (existingTable->isDefined()) { + if (existing->isDefined()) { error(Twine("reserved symbol must not be defined in input files: `") + functionTableName + "`"); return nullptr; @@ -831,12 +831,11 @@ } if (config->importTable) { - if (existingTable) - return cast(existingTable); + if (existing) + return cast(existing); else return createUndefinedIndirectFunctionTable(functionTableName); - } else if ((existingTable && existingTable->isLive()) || - config->exportTable) { + } else if ((existing && existing->isLive()) || config->exportTable) { // A defined table is required. Either because the user request an exported // table or because the table symbol is already live. The existing table is // guaranteed to be undefined due to the check above. diff --git a/lld/wasm/InputFiles.h b/lld/wasm/InputFiles.h --- a/lld/wasm/InputFiles.h +++ b/lld/wasm/InputFiles.h @@ -157,7 +157,8 @@ Symbol *createUndefined(const WasmSymbol &sym, bool isCalledDirectly); bool isExcludedByComdat(InputChunk *chunk) const; - void synthesizeTableSymbols(); + void + synthesizeMVPIndirectFunctionTableSymbolIfNeeded(uint32_t tableSymbolCount); std::unique_ptr wasmObj; }; diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp --- a/lld/wasm/InputFiles.cpp +++ b/lld/wasm/InputFiles.cpp @@ -310,69 +310,113 @@ } } -// Since LLVM 12, we expect that if an input file defines or uses a table, it -// declares the tables using symbols and records each use with a relocation. -// This way when the linker combines inputs, it can collate the tables used by -// the inputs, assigning them distinct table numbers, and renumber all the uses -// as appropriate. At the same time, the linker has special logic to build the +// An object file can have two approaches to tables. With the reference-types +// feature enabled, input files that define or use tables declare the tables +// using symbols, and record each use with a relocation. This way when the +// linker combines inputs, it can collate the tables used by the inputs, +// assigning them distinct table numbers, and renumber all the uses as +// appropriate. At the same time, the linker has special logic to build the // indirect function table if it is needed. // -// However, object files produced by LLVM 11 and earlier neither write table -// symbols nor record relocations, and yet still use tables via call_indirect, -// and via function pointer bitcasts. We can detect these object files, as they -// declare tables as imports or define them locally, but don't have table -// symbols. synthesizeTableSymbols serves as a shim when loading these older -// input files, defining the missing symbols to allow the indirect function -// table to be built. +// However, MVP object files (those that target WebAssembly 1.0, the "minimum +// viable product" version of WebAssembly) neither write table symbols nor +// record relocations. These files can have at most one table, the indirect +// function table used by call_indirect and which is the address space for +// function points. If this table is present, it is always an import. If we +// have a file with a table import but no table symbols, it is an MVP object +// file. synthesizeMVPIndirectFunctionTableSymbolIfNeeded serves as a shim when +// loading these input files, defining the missing symbol to allow the indirect +// function table to be built. // -// Table uses in these older files won't be relocated, as they have no -// relocations. In practice this isn't a problem, as these object files -// typically just declare a single table named __indirect_function_table and -// having table number 0, so relocation would be idempotent anyway. -void ObjFile::synthesizeTableSymbols() { - uint32_t tableNumber = 0; - const WasmGlobalType *globalType = nullptr; - const WasmEventType *eventType = nullptr; - const WasmSignature *signature = nullptr; - if (wasmObj->getNumImportedTables()) { - for (const auto &import : wasmObj->imports()) { - if (import.Kind == WASM_EXTERNAL_TABLE) { - auto *info = make(); - info->Name = import.Field; - info->Kind = WASM_SYMBOL_TYPE_TABLE; - info->ImportModule = import.Module; - info->ImportName = import.Field; - info->Flags = WASM_SYMBOL_UNDEFINED; - info->Flags |= WASM_SYMBOL_NO_STRIP; - info->ElementIndex = tableNumber++; - LLVM_DEBUG(dbgs() << "Synthesizing symbol for table import: " - << info->Name << "\n"); - auto *wasmSym = make(*info, globalType, &import.Table, - eventType, signature); - symbols.push_back(createUndefined(*wasmSym, false)); - // Because there are no TABLE_NUMBER relocs in this case, we can't - // compute accurate liveness info; instead, just mark the symbol as - // always live. - symbols.back()->markLive(); - } +// As indirect function table table usage in MVP objects cannot be relocated, +// the linker must ensure that this table gets assigned index zero. +void ObjFile::synthesizeMVPIndirectFunctionTableSymbolIfNeeded( + uint32_t tableSymbolCount) { + uint32_t tableCount = wasmObj->getNumImportedTables() + tables.size(); + + // If there are symbols for all tables, then all is good. + if (tableCount == tableSymbolCount) + return; + + // It's possible for an input to define tables and also use the indirect + // function table, but forget to compile with -mattr=+reference-types. + // For these newer files, we require symbols for all tables, and + // relocations for all of their uses. + if (tableSymbolCount != 0) { + error(toString(this) + + ": expected one symbol table entry for each of the " + + Twine(tableCount) + " table(s) present, but got " + + Twine(tableSymbolCount) + + " symbol(s) instead. Recompile this input with " + "-mattr=+reference-types."); + return; + } + + // An MVP object file can have up to one table import, for the indirect + // function table, but will have no table definitions. + if (tables.size()) { + error(toString(this) + + ": unexpected table definition(s) without corresponding " + "symbol-table entries. Recompile this input with " + "-mattr=+reference-types."); + return; + } + + // An MVP object file can have only one table import. + if (tableCount != 1) { + error(toString(this) + + ": multiple table imports, but no corresponding symbol-table " + "entries. Recompile this input with -mattr=+reference-types."); + return; + } + + const WasmImport *tableImport = nullptr; + for (const auto &import : wasmObj->imports()) { + if (import.Kind == WASM_EXTERNAL_TABLE) { + assert(!tableImport); + tableImport = &import; } } - for (const auto &table : tables) { - auto *info = make(); - // Empty name. - info->Kind = WASM_SYMBOL_TYPE_TABLE; - info->Flags = WASM_SYMBOL_BINDING_LOCAL; - info->Flags |= WASM_SYMBOL_VISIBILITY_HIDDEN; - info->Flags |= WASM_SYMBOL_NO_STRIP; - info->ElementIndex = tableNumber++; - LLVM_DEBUG(dbgs() << "Synthesizing symbol for table definition: " - << info->Name << "\n"); - auto *wasmSym = make(*info, globalType, &table->getType(), - eventType, signature); - symbols.push_back(createDefined(*wasmSym)); - // Mark live, for the same reasons as for imported tables. - symbols.back()->markLive(); + assert(tableImport); + + // We can only synthesize a symtab entry for the indirect function table; if + // it has an unexpected name or type, assume that it's not actually the + // indirect function table. + if (tableImport->Field != functionTableName || + tableImport->Table.ElemType != uint8_t(ValType::FUNCREF)) { + error(toString(this) + ": table import " + Twine(tableImport->Field) + + " is missing a " + "symbol table entry; recompile this input with " + "-mattr=+reference-types."); + return; } + + auto *info = make(); + info->Name = tableImport->Field; + info->Kind = WASM_SYMBOL_TYPE_TABLE; + info->ImportModule = tableImport->Module; + info->ImportName = tableImport->Field; + info->Flags = WASM_SYMBOL_UNDEFINED; + info->Flags |= WASM_SYMBOL_NO_STRIP; + info->ElementIndex = 0; + LLVM_DEBUG(dbgs() << "Synthesizing symbol for table import: " << info->Name + << "\n"); + const WasmGlobalType *globalType = nullptr; + const WasmEventType *eventType = nullptr; + const WasmSignature *signature = nullptr; + auto *wasmSym = make(*info, globalType, &tableImport->Table, + eventType, signature); + Symbol *sym = createUndefined(*wasmSym, false); + // We're only sure it's a TableSymbol if the createUndefined succeeded. + if (errorCount()) + return; + symbols.push_back(sym); + // Because there are no TABLE_NUMBER relocs, we can't compute accurate + // liveness info; instead, just mark the symbol as always live. + sym->markLive(); + // We assume that this compilation unit has unrelocatable references to + // this table, so preassign this table's table number. + cast(sym)->preassignTableNumberZero(); } void ObjFile::parse(bool ignoreComdats) { @@ -489,11 +533,11 @@ // Populate `Symbols` based on the symbols in the object. symbols.reserve(wasmObj->getNumberOfSymbols()); - bool haveTableSymbol = false; + uint32_t tableSymbolCount = 0; for (const SymbolRef &sym : wasmObj->symbols()) { const WasmSymbol &wasmSym = wasmObj->getWasmSymbol(sym.getRawDataRefImpl()); if (wasmSym.isTypeTable()) - haveTableSymbol = true; + tableSymbolCount++; if (wasmSym.isDefined()) { // createDefined may fail if the symbol is comdat excluded in which case // we fall back to creating an undefined symbol @@ -506,12 +550,7 @@ symbols.push_back(createUndefined(wasmSym, isCalledDirectly[idx])); } - // As a stopgap measure while implementing table support, if the object file - // has table definitions or imports but no table symbols, synthesize symbols - // for those tables. Mark as NO_STRIP to ensure they reach the output file, - // even if there are no TABLE_NUMBER relocs against them. - if (!haveTableSymbol) - synthesizeTableSymbols(); + synthesizeMVPIndirectFunctionTableSymbolIfNeeded(tableSymbolCount); } bool ObjFile::isExcludedByComdat(InputChunk *chunk) const { diff --git a/lld/wasm/SymbolTable.cpp b/lld/wasm/SymbolTable.cpp --- a/lld/wasm/SymbolTable.cpp +++ b/lld/wasm/SymbolTable.cpp @@ -275,11 +275,20 @@ LLVM_DEBUG(dbgs() << "addSyntheticTable: " << name << " -> " << table << "\n"); Symbol *s = find(name); + bool hasPreassignedTableNumberZero = false; + if (auto *ts = dyn_cast_or_null(s)) + hasPreassignedTableNumberZero = ts->hasPreassignedTableNumberZero(); assert(!s || s->isUndefined()); if (!s) s = insertName(name).first; syntheticTables.emplace_back(table); - return replaceSymbol(s, name, flags, nullptr, table); + DefinedTable *ret = + replaceSymbol(s, name, flags, nullptr, table); + // For MVP inputs, the synthesized indirect function table must be allocated + // to table 0. + if (hasPreassignedTableNumberZero) + ret->preassignTableNumberZero(); + return ret; } static bool shouldReplace(const Symbol *existing, InputFile *newFile, diff --git a/lld/wasm/Symbols.h b/lld/wasm/Symbols.h --- a/lld/wasm/Symbols.h +++ b/lld/wasm/Symbols.h @@ -378,6 +378,9 @@ void setTableNumber(uint32_t number); bool hasTableNumber() const; + void preassignTableNumberZero(); + bool hasPreassignedTableNumberZero() const; + protected: TableSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f, const WasmTableType *type) diff --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp --- a/lld/wasm/Symbols.cpp +++ b/lld/wasm/Symbols.cpp @@ -368,6 +368,8 @@ } void TableSymbol::setTableNumber(uint32_t number) { + if (const auto *t = dyn_cast(this)) + return t->table->setTableNumber(number); LLVM_DEBUG(dbgs() << "setTableNumber " << name << " -> " << number << "\n"); assert(tableNumber == INVALID_INDEX); tableNumber = number; @@ -379,6 +381,19 @@ return tableNumber != INVALID_INDEX; } +void TableSymbol::preassignTableNumberZero() { + if (!hasPreassignedTableNumberZero()) + setTableNumber(0); +} + +bool TableSymbol::hasPreassignedTableNumberZero() const { + if (hasTableNumber()) { + assert(getTableNumber() == 0); + return true; + } + return false; +} + DefinedTable::DefinedTable(StringRef name, uint32_t flags, InputFile *file, InputTable *table) : TableSymbol(name, DefinedTableKind, flags, file, diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp --- a/lld/wasm/SyntheticSections.cpp +++ b/lld/wasm/SyntheticSections.cpp @@ -118,8 +118,16 @@ g->setGlobalIndex(numImportedGlobals++); else if (auto *e = dyn_cast(sym)) e->setEventIndex(numImportedEvents++); - else - cast(sym)->setTableNumber(numImportedTables++); + else { + // It is always possible to preserve preassigned table number 0 for imported + // tables, as imported tables are always numbered from 0. + TableSymbol *t = cast(sym); + if (t->hasPreassignedTableNumberZero()) + assert(numImportedTables == 0); + else + t->setTableNumber(numImportedTables); + numImportedTables++; + } } void ImportSection::writeBody() { @@ -223,7 +231,27 @@ uint32_t tableNumber = out.importSec->getNumImportedTables() + inputTables.size(); inputTables.push_back(table); - table->setTableNumber(tableNumber); + if (table->hasTableNumber()) { + // If a reftypes input has imported tables, any indirect function table + // referenced by an MVP input and preassigned to table number 0 will no + // longer work as the imported tables get the lowest table numbers (unless + // for some reason the indirect function table is itself imported). + assert(table->getTableNumber() == 0); + if (tableNumber != 0) { + // Of the declarations of __indirect_function_table in the various inputs, + // we don't know which one is responsible for preassigning the table + // number. Probably here we should grovel the imports to determine which + // input file imported a table. + error(toString(table->file) + ": " + table->getName() + + ": Cannot " + "preassign table number 0 to indirect function table, because " + "object files compiled with -mattr=+reference-types declared " + "table imports. Suggest recompiling all inputs with " + "-mattr=+reference-types."); + } + } else { + table->setTableNumber(tableNumber); + } } void MemorySection::writeBody() { diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -582,6 +582,7 @@ } void Writer::calculateImports() { + std::vector deferredTables; for (Symbol *sym : symtab->getSymbols()) { if (!sym->isUndefined()) continue; @@ -591,10 +592,22 @@ continue; if (!sym->isUsedInRegularObj) continue; - if (shouldImport(sym)) { - LLVM_DEBUG(dbgs() << "import: " << sym->getName() << "\n"); - out.importSec->addImport(sym); + if (!shouldImport(sym)) + continue; + if (auto *ts = dyn_cast(sym)) { + // Ensure that an indirect function table with a preassigned table number + // of 0 gets written as the first table import. + if (!ts->hasPreassignedTableNumberZero()) { + deferredTables.push_back(sym); + continue; + } } + LLVM_DEBUG(dbgs() << "import: " << sym->getName() << "\n"); + out.importSec->addImport(sym); + } + for (Symbol *sym : deferredTables) { + LLVM_DEBUG(dbgs() << "import: " << sym->getName() << "\n"); + out.importSec->addImport(sym); } }