diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp b/clang/lib/Driver/ToolChains/WebAssembly.cpp --- a/clang/lib/Driver/ToolChains/WebAssembly.cpp +++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp @@ -62,6 +62,12 @@ const char *Linker = Args.MakeArgString(getLinkerPath(Args)); ArgStringList CmdArgs; + CmdArgs.push_back("-m"); + if (getToolChain().getTriple().isArch64Bit()) + CmdArgs.push_back("wasm64"); + else + CmdArgs.push_back("wasm32"); + if (Args.hasArg(options::OPT_s)) CmdArgs.push_back("--strip-all"); diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h --- a/lld/wasm/Config.h +++ b/lld/wasm/Config.h @@ -36,6 +36,7 @@ bool importMemory; bool sharedMemory; bool importTable; + bool is64; bool mergeDataSegments; bool pie; bool printGcSections; diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -378,6 +378,18 @@ config->exportDynamic = args.hasFlag(OPT_export_dynamic, OPT_no_export_dynamic, config->shared); + // Parse wasm32/64. + config->is64 = false; + if (auto *arg = args.getLastArg(OPT_m)) { + StringRef s = arg->getValue(); + if (s == "wasm32") + config->is64 = false; + else if (s == "wasm64") + config->is64 = true; + else + error("invalid target architecture: " + s); + } + // --threads= takes a positive integer and provides the default value for // --thinlto-jobs=. if (auto *arg = args.getLastArg(OPT_threads)) { @@ -498,9 +510,15 @@ static GlobalSymbol *createGlobalVariable(StringRef name, bool isMutable, int value) { llvm::wasm::WasmGlobal wasmGlobal; - wasmGlobal.Type = {WASM_TYPE_I32, isMutable}; - wasmGlobal.InitExpr.Value.Int32 = value; - wasmGlobal.InitExpr.Opcode = WASM_OPCODE_I32_CONST; + if (config->is64) { + wasmGlobal.Type = {WASM_TYPE_I64, isMutable}; + wasmGlobal.InitExpr.Value.Int64 = value; + wasmGlobal.InitExpr.Opcode = WASM_OPCODE_I64_CONST; + } else { + wasmGlobal.Type = {WASM_TYPE_I32, isMutable}; + wasmGlobal.InitExpr.Value.Int32 = value; + wasmGlobal.InitExpr.Opcode = WASM_OPCODE_I32_CONST; + } wasmGlobal.SymbolName = name; return symtab->addSyntheticGlobal(name, WASM_SYMBOL_VISIBILITY_HIDDEN, make(wasmGlobal, nullptr)); @@ -513,9 +531,13 @@ static WasmSignature nullSignature = {{}, {}}; static WasmSignature i32ArgSignature = {{}, {ValType::I32}}; + static WasmSignature i64ArgSignature = {{}, {ValType::I64}}; static llvm::wasm::WasmGlobalType globalTypeI32 = {WASM_TYPE_I32, false}; + static llvm::wasm::WasmGlobalType globalTypeI64 = {WASM_TYPE_I64, false}; static llvm::wasm::WasmGlobalType mutableGlobalTypeI32 = {WASM_TYPE_I32, true}; + static llvm::wasm::WasmGlobalType mutableGlobalTypeI64 = {WASM_TYPE_I64, + true}; WasmSym::callCtors = symtab->addSyntheticFunction( "__wasm_call_ctors", WASM_SYMBOL_VISIBILITY_HIDDEN, make(nullSignature, "__wasm_call_ctors")); @@ -530,15 +552,16 @@ if (config->isPic) { - WasmSym::stackPointer = - createUndefinedGlobal("__stack_pointer", &mutableGlobalTypeI32); + WasmSym::stackPointer = createUndefinedGlobal( + "__stack_pointer", + config->is64 ? &mutableGlobalTypeI64 : &mutableGlobalTypeI32); // For PIC code, we import two global variables (__memory_base and // __table_base) from the environment and use these as the offset at // which to load our static data and function table. // See: // https://github.com/WebAssembly/tool-conventions/blob/master/DynamicLinking.md - WasmSym::memoryBase = - createUndefinedGlobal("__memory_base", &globalTypeI32); + WasmSym::memoryBase = createUndefinedGlobal( + "__memory_base", config->is64 ? &globalTypeI64 : &globalTypeI32); WasmSym::tableBase = createUndefinedGlobal("__table_base", &globalTypeI32); WasmSym::memoryBase->markLive(); WasmSym::tableBase->markLive(); @@ -563,7 +586,9 @@ WasmSym::tlsAlign = createGlobalVariable("__tls_align", false, 1); WasmSym::initTLS = symtab->addSyntheticFunction( "__wasm_init_tls", WASM_SYMBOL_VISIBILITY_HIDDEN, - make(i32ArgSignature, "__wasm_init_tls")); + make(config->is64 ? i64ArgSignature + : i32ArgSignature, + "__wasm_init_tls")); } } diff --git a/lld/wasm/InputChunks.cpp b/lld/wasm/InputChunks.cpp --- a/lld/wasm/InputChunks.cpp +++ b/lld/wasm/InputChunks.cpp @@ -32,6 +32,18 @@ llvm_unreachable("unknown reloc type"); } +bool relocIs64(uint8_t relocType) { + switch (relocType) { + case R_WASM_MEMORY_ADDR_LEB64: + case R_WASM_MEMORY_ADDR_SLEB64: + case R_WASM_MEMORY_ADDR_REL_SLEB64: + case R_WASM_MEMORY_ADDR_I64: + return true; + default: + return false; + } +} + std::string toString(const wasm::InputChunk *c) { return (toString(c->file) + ":(" + c->getName() + ")").str(); } @@ -323,12 +335,17 @@ LLVM_DEBUG(dbgs() << "generating runtime relocations: " << getName() << " count=" << relocations.size() << "\n"); + unsigned opcode_ptr_const = + config->is64 ? WASM_OPCODE_I64_CONST : WASM_OPCODE_I32_CONST; + unsigned opcode_ptr_add = + config->is64 ? WASM_OPCODE_I64_ADD : WASM_OPCODE_I32_ADD; + // TODO(sbc): Encode the relocations in the data section and write a loop // here to apply them. uint32_t segmentVA = outputSeg->startVA + outputSegmentOffset; for (const WasmRelocation &rel : relocations) { - uint32_t offset = rel.Offset - getInputSectionOffset(); - uint32_t outputOffset = segmentVA + offset; + uint64_t offset = rel.Offset - getInputSectionOffset(); + uint64_t outputOffset = segmentVA + offset; LLVM_DEBUG(dbgs() << "gen reloc: type=" << relocTypeToString(rel.Type) << " addend=" << rel.Addend << " index=" << rel.Index @@ -339,9 +356,17 @@ writeUleb128(os, WasmSym::memoryBase->getGlobalIndex(), "memory_base"); // Add the offset of the relocation - writeU8(os, WASM_OPCODE_I32_CONST, "I32_CONST"); + writeU8(os, opcode_ptr_const, "CONST"); writeSleb128(os, outputOffset, "offset"); - writeU8(os, WASM_OPCODE_I32_ADD, "ADD"); + writeU8(os, opcode_ptr_add, "ADD"); + + bool is64 = relocIs64(rel.Type); + unsigned opcode_reloc_const = + is64 ? WASM_OPCODE_I64_CONST : WASM_OPCODE_I32_CONST; + unsigned opcode_reloc_add = + is64 ? WASM_OPCODE_I64_ADD : WASM_OPCODE_I32_ADD; + unsigned opcode_reloc_store = + is64 ? WASM_OPCODE_I64_STORE : WASM_OPCODE_I32_STORE; Symbol *sym = file->getSymbol(rel); // Now figure out what we want to store @@ -349,9 +374,9 @@ writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET"); writeUleb128(os, sym->getGOTIndex(), "global index"); if (rel.Addend) { - writeU8(os, WASM_OPCODE_I32_CONST, "CONST"); + writeU8(os, opcode_reloc_const, "CONST"); writeSleb128(os, rel.Addend, "addend"); - writeU8(os, WASM_OPCODE_I32_ADD, "ADD"); + writeU8(os, opcode_reloc_add, "ADD"); } } else { const GlobalSymbol* baseSymbol = WasmSym::memoryBase; @@ -359,13 +384,13 @@ baseSymbol = WasmSym::tableBase; writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET"); writeUleb128(os, baseSymbol->getGlobalIndex(), "base"); - writeU8(os, WASM_OPCODE_I32_CONST, "CONST"); + writeU8(os, opcode_reloc_const, "CONST"); writeSleb128(os, file->calcNewValue(rel), "offset"); - writeU8(os, WASM_OPCODE_I32_ADD, "ADD"); + writeU8(os, opcode_reloc_add, "ADD"); } // Store that value at the virtual address - writeU8(os, WASM_OPCODE_I32_STORE, "I32_STORE"); + writeU8(os, opcode_reloc_store, "I32_STORE"); writeUleb128(os, 2, "align"); writeUleb128(os, 0, "offset"); } diff --git a/lld/wasm/Options.td b/lld/wasm/Options.td --- a/lld/wasm/Options.td +++ b/lld/wasm/Options.td @@ -62,6 +62,8 @@ def L: JoinedOrSeparate<["-"], "L">, MetaVarName<"">, HelpText<"Add a directory to the library search path">; +def m: JoinedOrSeparate<["-"], "m">, HelpText<"Set target emulation">; + def mllvm: S<"mllvm">, HelpText<"Options to pass to LLVM">; def no_color_diagnostics: F<"no-color-diagnostics">, @@ -179,7 +181,6 @@ def: J<"entry=">, Alias; def: Flag<["-"], "E">, Alias, HelpText<"Alias for --export-dynamic">; def: Flag<["-"], "i">, Alias; -def: Flag<["-"], "m">, Alias; def: Flag<["-"], "r">, Alias; def: Flag<["-"], "s">, Alias, HelpText<"Alias for --strip-all">; def: Flag<["-"], "S">, Alias, HelpText<"Alias for --strip-debug">; diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h --- a/llvm/include/llvm/BinaryFormat/Wasm.h +++ b/llvm/include/llvm/BinaryFormat/Wasm.h @@ -254,11 +254,13 @@ WASM_OPCODE_GLOBAL_GET = 0x23, WASM_OPCODE_GLOBAL_SET = 0x24, WASM_OPCODE_I32_STORE = 0x36, + WASM_OPCODE_I64_STORE = 0x37, WASM_OPCODE_I32_CONST = 0x41, WASM_OPCODE_I64_CONST = 0x42, WASM_OPCODE_F32_CONST = 0x43, WASM_OPCODE_F64_CONST = 0x44, WASM_OPCODE_I32_ADD = 0x6a, + WASM_OPCODE_I64_ADD = 0x7c, WASM_OPCODE_REF_NULL = 0xd0, }; diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -434,9 +434,12 @@ // GetExternalSymbolSymbol does, since if there's no code that // refers to this symbol, we have to set it here. SPSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL); - // FIXME: need to check subtarget to see if its wasm64, but we - // can't cast to WebAssemblySubtarget here. - SPSym->setGlobalType(wasm::WasmGlobalType{wasm::WASM_TYPE_I32, true}); + SPSym->setGlobalType(wasm::WasmGlobalType{ + uint8_t(Asm->getSubtargetInfo().getTargetTriple().getArch() == + Triple::wasm64 + ? wasm::WASM_TYPE_I64 + : wasm::WASM_TYPE_I32), + true}); DIELoc *Loc = new (DIEValueAllocator) DIELoc; addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_WASM_location); addSInt(*Loc, dwarf::DW_FORM_sdata, FrameBase.Location.WasmLoc.Kind); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h --- a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h @@ -53,6 +53,15 @@ MachineBasicBlock::iterator &InsertStore, const DebugLoc &DL) const; + static unsigned getSPReg(const MachineFunction &MF); + static unsigned getFPReg(const MachineFunction &MF); + static unsigned getOpcConst(const MachineFunction &MF); + static unsigned getOpcAdd(const MachineFunction &MF); + static unsigned getOpcSub(const MachineFunction &MF); + static unsigned getOpcAnd(const MachineFunction &MF); + static unsigned getOpcGlobGet(const MachineFunction &MF); + static unsigned getOpcGlobSet(const MachineFunction &MF); + private: bool hasBP(const MachineFunction &MF) const; bool needsSPForLocalFrame(const MachineFunction &MF) const; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp @@ -87,8 +87,8 @@ } // In function with EH pads, we need to make a copy of the value of -// __stack_pointer global in SP32 register, in order to use it when restoring -// __stack_pointer after an exception is caught. +// __stack_pointer global in SP32/64 register, in order to use it when +// restoring __stack_pointer after an exception is caught. bool WebAssemblyFrameLowering::needsPrologForEH( const MachineFunction &MF) const { auto EHType = MF.getTarget().getMCAsmInfo()->getExceptionHandlingType(); @@ -123,6 +123,57 @@ return needsSPForLocalFrame(MF) && !CanUseRedZone; } +unsigned WebAssemblyFrameLowering::getSPReg(const MachineFunction &MF) { + return MF.getSubtarget().hasAddr64() + ? WebAssembly::SP64 + : WebAssembly::SP32; +} + +unsigned WebAssemblyFrameLowering::getFPReg(const MachineFunction &MF) { + return MF.getSubtarget().hasAddr64() + ? WebAssembly::FP64 + : WebAssembly::FP32; +} + +unsigned +WebAssemblyFrameLowering::getOpcConst(const MachineFunction &MF) { + return MF.getSubtarget().hasAddr64() + ? WebAssembly::CONST_I64 + : WebAssembly::CONST_I32; +} + +unsigned WebAssemblyFrameLowering::getOpcAdd(const MachineFunction &MF) { + return MF.getSubtarget().hasAddr64() + ? WebAssembly::ADD_I64 + : WebAssembly::ADD_I32; +} + +unsigned WebAssemblyFrameLowering::getOpcSub(const MachineFunction &MF) { + return MF.getSubtarget().hasAddr64() + ? WebAssembly::SUB_I64 + : WebAssembly::SUB_I32; +} + +unsigned WebAssemblyFrameLowering::getOpcAnd(const MachineFunction &MF) { + return MF.getSubtarget().hasAddr64() + ? WebAssembly::AND_I64 + : WebAssembly::AND_I32; +} + +unsigned +WebAssemblyFrameLowering::getOpcGlobGet(const MachineFunction &MF) { + return MF.getSubtarget().hasAddr64() + ? WebAssembly::GLOBAL_GET_I64 + : WebAssembly::GLOBAL_GET_I32; +} + +unsigned +WebAssemblyFrameLowering::getOpcGlobSet(const MachineFunction &MF) { + return MF.getSubtarget().hasAddr64() + ? WebAssembly::GLOBAL_SET_I64 + : WebAssembly::GLOBAL_SET_I32; +} + void WebAssemblyFrameLowering::writeSPToGlobal( unsigned SrcReg, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator &InsertStore, const DebugLoc &DL) const { @@ -130,7 +181,8 @@ const char *ES = "__stack_pointer"; auto *SPSymbol = MF.createExternalSymbolName(ES); - BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::GLOBAL_SET_I32)) + + BuildMI(MBB, InsertStore, DL, TII->get(getOpcGlobSet(MF))) .addExternalSymbol(SPSymbol) .addReg(SrcReg); } @@ -141,11 +193,12 @@ MachineBasicBlock::iterator I) const { assert(!I->getOperand(0).getImm() && (hasFP(MF) || hasBP(MF)) && "Call frame pseudos should only be used for dynamic stack adjustment"); - const auto *TII = MF.getSubtarget().getInstrInfo(); + auto &ST = MF.getSubtarget(); + const auto *TII = ST.getInstrInfo(); if (I->getOpcode() == TII->getCallFrameDestroyOpcode() && needsSPWriteback(MF)) { DebugLoc DL = I->getDebugLoc(); - writeSPToGlobal(WebAssembly::SP32, MF, MBB, I, DL); + writeSPToGlobal(getSPReg(MF), MF, MBB, I, DL); } return MBB.erase(I); } @@ -161,7 +214,8 @@ return; uint64_t StackSize = MFI.getStackSize(); - const auto *TII = MF.getSubtarget().getInstrInfo(); + auto &ST = MF.getSubtarget(); + const auto *TII = ST.getInstrInfo(); auto &MRI = MF.getRegInfo(); auto InsertPt = MBB.begin(); @@ -172,13 +226,13 @@ const TargetRegisterClass *PtrRC = MRI.getTargetRegisterInfo()->getPointerRegClass(MF); - unsigned SPReg = WebAssembly::SP32; + unsigned SPReg = getSPReg(MF); if (StackSize) SPReg = MRI.createVirtualRegister(PtrRC); const char *ES = "__stack_pointer"; auto *SPSymbol = MF.createExternalSymbolName(ES); - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::GLOBAL_GET_I32), SPReg) + BuildMI(MBB, InsertPt, DL, TII->get(getOpcGlobGet(MF)), SPReg) .addExternalSymbol(SPSymbol); bool HasBP = hasBP(MF); @@ -192,32 +246,30 @@ if (StackSize) { // Subtract the frame size Register OffsetReg = MRI.createVirtualRegister(PtrRC); - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) + BuildMI(MBB, InsertPt, DL, TII->get(getOpcConst(MF)), OffsetReg) .addImm(StackSize); - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::SUB_I32), - WebAssembly::SP32) + BuildMI(MBB, InsertPt, DL, TII->get(getOpcSub(MF)), getSPReg(MF)) .addReg(SPReg) .addReg(OffsetReg); } if (HasBP) { Register BitmaskReg = MRI.createVirtualRegister(PtrRC); Align Alignment = MFI.getMaxAlign(); - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), BitmaskReg) - .addImm((int)~(Alignment.value() - 1)); - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::AND_I32), - WebAssembly::SP32) - .addReg(WebAssembly::SP32) + BuildMI(MBB, InsertPt, DL, TII->get(getOpcConst(MF)), BitmaskReg) + .addImm((int64_t) ~(Alignment.value() - 1)); + BuildMI(MBB, InsertPt, DL, TII->get(getOpcAnd(MF)), getSPReg(MF)) + .addReg(getSPReg(MF)) .addReg(BitmaskReg); } if (hasFP(MF)) { // Unlike most conventional targets (where FP points to the saved FP), // FP points to the bottom of the fixed-size locals, so we can use positive // offsets in load/store instructions. - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY), WebAssembly::FP32) - .addReg(WebAssembly::SP32); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY), getFPReg(MF)) + .addReg(getSPReg(MF)); } if (StackSize && needsSPWriteback(MF)) { - writeSPToGlobal(WebAssembly::SP32, MF, MBB, InsertPt, DL); + writeSPToGlobal(getSPReg(MF), MF, MBB, InsertPt, DL); } } @@ -226,7 +278,8 @@ uint64_t StackSize = MF.getFrameInfo().getStackSize(); if (!needsSP(MF) || !needsSPWriteback(MF)) return; - const auto *TII = MF.getSubtarget().getInstrInfo(); + auto &ST = MF.getSubtarget(); + const auto *TII = ST.getInstrInfo(); auto &MRI = MF.getRegInfo(); auto InsertPt = MBB.getFirstTerminator(); DebugLoc DL; @@ -237,6 +290,7 @@ // Restore the stack pointer. If we had fixed-size locals, add the offset // subtracted in the prolog. unsigned SPReg = 0; + unsigned SPFPReg = hasFP(MF) ? getFPReg(MF) : getSPReg(MF); if (hasBP(MF)) { auto FI = MF.getInfo(); SPReg = FI->getBasePointerVreg(); @@ -244,16 +298,17 @@ const TargetRegisterClass *PtrRC = MRI.getTargetRegisterInfo()->getPointerRegClass(MF); Register OffsetReg = MRI.createVirtualRegister(PtrRC); - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) + BuildMI(MBB, InsertPt, DL, TII->get(getOpcConst(MF)), OffsetReg) .addImm(StackSize); - // In the epilog we don't need to write the result back to the SP32 physreg - // because it won't be used again. We can use a stackified register instead. + // In the epilog we don't need to write the result back to the SP32/64 + // physreg because it won't be used again. We can use a stackified register + // instead. SPReg = MRI.createVirtualRegister(PtrRC); - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::ADD_I32), SPReg) - .addReg(hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32) + BuildMI(MBB, InsertPt, DL, TII->get(getOpcAdd(MF)), SPReg) + .addReg(SPFPReg) .addReg(OffsetReg); } else { - SPReg = hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32; + SPReg = SPFPReg; } writeSPToGlobal(SPReg, MF, MBB, InsertPt, DL); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -77,6 +77,13 @@ return; } + MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout()); + auto GlobalGetIns = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64 + : WebAssembly::GLOBAL_GET_I32; + auto ConstIns = + PtrVT == MVT::i64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32; + auto AddIns = PtrVT == MVT::i64 ? WebAssembly::ADD_I64 : WebAssembly::ADD_I32; + // Few custom selection stuff. SDLoc DL(Node); MachineFunction &MF = CurDAG->getMachineFunction(); @@ -140,20 +147,16 @@ false); } - MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout()); - assert(PtrVT == MVT::i32 && "only wasm32 is supported for now"); - SDValue TLSBaseSym = CurDAG->getTargetExternalSymbol("__tls_base", PtrVT); SDValue TLSOffsetSym = CurDAG->getTargetGlobalAddress( GA->getGlobal(), DL, PtrVT, GA->getOffset(), 0); - MachineSDNode *TLSBase = CurDAG->getMachineNode(WebAssembly::GLOBAL_GET_I32, - DL, MVT::i32, TLSBaseSym); - MachineSDNode *TLSOffset = CurDAG->getMachineNode( - WebAssembly::CONST_I32, DL, MVT::i32, TLSOffsetSym); - MachineSDNode *TLSAddress = - CurDAG->getMachineNode(WebAssembly::ADD_I32, DL, MVT::i32, - SDValue(TLSBase, 0), SDValue(TLSOffset, 0)); + MachineSDNode *TLSBase = + CurDAG->getMachineNode(GlobalGetIns, DL, PtrVT, TLSBaseSym); + MachineSDNode *TLSOffset = + CurDAG->getMachineNode(ConstIns, DL, PtrVT, TLSOffsetSym); + MachineSDNode *TLSAddress = CurDAG->getMachineNode( + AddIns, DL, PtrVT, SDValue(TLSBase, 0), SDValue(TLSOffset, 0)); ReplaceNode(Node, TLSAddress); return; } @@ -162,22 +165,16 @@ unsigned IntNo = cast(Node->getOperand(0))->getZExtValue(); switch (IntNo) { case Intrinsic::wasm_tls_size: { - MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout()); - assert(PtrVT == MVT::i32 && "only wasm32 is supported for now"); - MachineSDNode *TLSSize = CurDAG->getMachineNode( - WebAssembly::GLOBAL_GET_I32, DL, PtrVT, - CurDAG->getTargetExternalSymbol("__tls_size", MVT::i32)); + GlobalGetIns, DL, PtrVT, + CurDAG->getTargetExternalSymbol("__tls_size", PtrVT)); ReplaceNode(Node, TLSSize); return; } case Intrinsic::wasm_tls_align: { - MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout()); - assert(PtrVT == MVT::i32 && "only wasm32 is supported for now"); - MachineSDNode *TLSAlign = CurDAG->getMachineNode( - WebAssembly::GLOBAL_GET_I32, DL, PtrVT, - CurDAG->getTargetExternalSymbol("__tls_align", MVT::i32)); + GlobalGetIns, DL, PtrVT, + CurDAG->getTargetExternalSymbol("__tls_align", PtrVT)); ReplaceNode(Node, TLSAlign); return; } @@ -188,11 +185,8 @@ unsigned IntNo = cast(Node->getOperand(1))->getZExtValue(); switch (IntNo) { case Intrinsic::wasm_tls_base: { - MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout()); - assert(PtrVT == MVT::i32 && "only wasm32 is supported for now"); - MachineSDNode *TLSBase = CurDAG->getMachineNode( - WebAssembly::GLOBAL_GET_I32, DL, MVT::i32, MVT::Other, + GlobalGetIns, DL, PtrVT, MVT::Other, CurDAG->getTargetExternalSymbol("__tls_base", PtrVT), Node->getOperand(0)); ReplaceNode(Node, TLSBase); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -209,6 +209,7 @@ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand); setOperationAction(ISD::FrameIndex, MVT::i32, Custom); + setOperationAction(ISD::FrameIndex, MVT::i64, Custom); setOperationAction(ISD::CopyToReg, MVT::Other, Custom); // Expand these forms; we pattern-match the forms that we can handle in isel. @@ -613,7 +614,11 @@ if (VT.isVector()) return VT.changeVectorElementTypeToInteger(); - return TargetLowering::getSetCCResultType(DL, C, VT); + // So far, all branch instructions in Wasm take an I32 condition. + // The default TargetLowering::getSetCCResultType returns the pointer size, + // which would be useful to reduce instruction counts when testing + // against 64-bit pointers/values if at some point Wasm supports that. + return EVT::getIntegerVT(C, 32); } bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp @@ -408,8 +408,8 @@ ++InsertPos; if (InsertPos->getOpcode() == WebAssembly::CATCH) ++InsertPos; - FrameLowering->writeSPToGlobal(WebAssembly::SP32, MF, MBB, InsertPos, - MBB.begin()->getDebugLoc()); + FrameLowering->writeSPToGlobal(FrameLowering->getSPReg(MF), MF, MBB, + InsertPos, MBB.begin()->getDebugLoc()); } return Changed; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp @@ -81,8 +81,9 @@ strcmp(Name, "__stack_pointer") == 0 || strcmp(Name, "__tls_base") == 0; WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL); WasmSym->setGlobalType(wasm::WasmGlobalType{ - uint8_t(Subtarget.hasAddr64() ? wasm::WASM_TYPE_I64 - : wasm::WASM_TYPE_I32), + uint8_t(Subtarget.hasAddr64() && strcmp(Name, "__table_base") != 0 + ? wasm::WASM_TYPE_I64 + : wasm::WASM_TYPE_I32), Mutable}); return WasmSym; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -248,7 +248,8 @@ } // Check for writes to __stack_pointer global. - if (MI.getOpcode() == WebAssembly::GLOBAL_SET_I32 && + if ((MI.getOpcode() == WebAssembly::GLOBAL_SET_I32 || + MI.getOpcode() == WebAssembly::GLOBAL_SET_I64) && strcmp(MI.getOperand(0).getSymbolName(), "__stack_pointer") == 0) StackPointer = true; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp @@ -88,16 +88,17 @@ // If this is an address being added to a constant, fold the frame offset // into the constant. - if (MI.getOpcode() == WebAssembly::ADD_I32) { + if (MI.getOpcode() == WebAssemblyFrameLowering::getOpcAdd(MF)) { MachineOperand &OtherMO = MI.getOperand(3 - FIOperandNum); if (OtherMO.isReg()) { Register OtherMOReg = OtherMO.getReg(); if (Register::isVirtualRegister(OtherMOReg)) { MachineInstr *Def = MF.getRegInfo().getUniqueVRegDef(OtherMOReg); // TODO: For now we just opportunistically do this in the case where - // the CONST_I32 happens to have exactly one def and one use. We + // the CONST_I32/64 happens to have exactly one def and one use. We // should generalize this to optimize in more cases. - if (Def && Def->getOpcode() == WebAssembly::CONST_I32 && + if (Def && Def->getOpcode() == + WebAssemblyFrameLowering::getOpcConst(MF) && MRI.hasOneNonDBGUse(Def->getOperand(0).getReg())) { MachineOperand &ImmMO = Def->getOperand(1); ImmMO.setImm(ImmMO.getImm() + uint32_t(FrameOffset)); @@ -109,20 +110,22 @@ } } - // Otherwise create an i32.add SP, offset and make it the operand. + // Otherwise create an i32/64.add SP, offset and make it the operand. const auto *TII = MF.getSubtarget().getInstrInfo(); unsigned FIRegOperand = FrameRegister; if (FrameOffset) { - // Create i32.add SP, offset and make it the operand. + // Create i32/64.add SP, offset and make it the operand. const TargetRegisterClass *PtrRC = MRI.getTargetRegisterInfo()->getPointerRegClass(MF); Register OffsetOp = MRI.createVirtualRegister(PtrRC); - BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::CONST_I32), + BuildMI(MBB, *II, II->getDebugLoc(), + TII->get(WebAssemblyFrameLowering::getOpcConst(MF)), OffsetOp) .addImm(FrameOffset); FIRegOperand = MRI.createVirtualRegister(PtrRC); - BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::ADD_I32), + BuildMI(MBB, *II, II->getDebugLoc(), + TII->get(WebAssemblyFrameLowering::getOpcAdd(MF)), FIRegOperand) .addReg(FrameRegister) .addReg(OffsetOp); diff --git a/llvm/test/CodeGen/WebAssembly/stack-alignment.ll b/llvm/test/CodeGen/WebAssembly/stack-alignment.ll --- a/llvm/test/CodeGen/WebAssembly/stack-alignment.ll +++ b/llvm/test/CodeGen/WebAssembly/stack-alignment.ll @@ -1,22 +1,20 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s - -target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" -target triple = "wasm32-unknown-unknown" +; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck -DPTR=32 %s +; RUN: llc < %s --mtriple=wasm64-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck -DPTR=64 %s declare void @somefunc(i32*) ; CHECK-LABEL: underalign: ; CHECK: global.get $push[[L1:.+]]=, __stack_pointer{{$}} -; CHECK-NEXT: i32.const $push[[L2:.+]]=, 16 -; CHECK-NEXT: i32.sub $push[[L10:.+]]=, $pop[[L1]], $pop[[L2]] +; CHECK-NEXT: i[[PTR]].const $push[[L2:.+]]=, 16 +; CHECK-NEXT: i[[PTR]].sub $push[[L10:.+]]=, $pop[[L1]], $pop[[L2]] ; CHECK-NEXT: local.tee $push{{.+}}=, [[SP:.+]], $pop[[L10]] ; CHECK: local.get $push[[L3:.+]]=, [[SP]]{{$}} -; CHECK: i32.add $push[[underaligned:.+]]=, $pop[[L3]], $pop{{.+}} -; CHECK-NEXT: call somefunc, $pop[[underaligned]] +; CHECK: i[[PTR]].add $push[[underaligned:.+]]=, $pop[[L3]], $pop{{.+}} +; CHECK-NEXT: call somefunc, $pop[[underaligned]] ; CHECK: local.get $push[[M4:.+]]=, [[SP]]{{$}} -; CHECK: i32.add $push[[L5:.+]]=, $pop[[M4]], $pop{{.+}} +; CHECK: i[[PTR]].add $push[[L5:.+]]=, $pop[[M4]], $pop{{.+}} ; CHECK-NEXT: global.set __stack_pointer, $pop[[L5]] define void @underalign() { entry: @@ -27,17 +25,17 @@ ; CHECK-LABEL: overalign: ; CHECK: global.get $push[[L10:.+]]=, __stack_pointer{{$}} -; CHECK-NEXT: local.tee $push[[L9:.+]]=, [[BP:.+]], $pop[[L10]] -; CHECK-NEXT: i32.const $push[[L2:.+]]=, 32 -; CHECK-NEXT: i32.sub $push[[L8:.+]]=, $pop[[L9]], $pop[[L2]] -; CHECK-NEXT: i32.const $push[[L3:.+]]=, -32 -; CHECK-NEXT: i32.and $push[[L7:.+]]=, $pop[[L8]], $pop[[L3]] -; CHECK-NEXT: local.tee $push{{.+}}=, [[SP:.+]], $pop[[L7]] +; CHECK-NEXT: local.tee $push[[L9:.+]]=, [[BP:.+]], $pop[[L10]] +; CHECK-NEXT: i[[PTR]].const $push[[L2:.+]]=, 32 +; CHECK-NEXT: i[[PTR]].sub $push[[L8:.+]]=, $pop[[L9]], $pop[[L2]] +; CHECK-NEXT: i[[PTR]].const $push[[L3:.+]]=, -32 +; CHECK-NEXT: i[[PTR]].and $push[[L7:.+]]=, $pop[[L8]], $pop[[L3]] +; CHECK-NEXT: local.tee $push{{.+}}=, [[SP:.+]], $pop[[L7]] -; CHECK: local.get $push[[M5:.+]]=, [[SP]]{{$}} -; CHECK: call somefunc, $pop[[M5]]{{$}} +; CHECK: local.get $push[[M5:.+]]=, [[SP]]{{$}} +; CHECK: call somefunc, $pop[[M5]]{{$}} -; CHECK: local.get $push[[M6:.+]]=, [[BP]]{{$}} +; CHECK: local.get $push[[M6:.+]]=, [[BP]]{{$}} ; CHECK-NEXT: global.set __stack_pointer, $pop[[M6]] define void @overalign() { entry: @@ -48,19 +46,19 @@ ; CHECK-LABEL: over_and_normal_align: ; CHECK: global.get $push[[L14:.+]]=, __stack_pointer{{$}} -; CHECK-NEXT: local.tee $push[[L13:.+]]=, [[BP:.+]], $pop[[L14]] -; CHECK: i32.sub $push[[L12:.+]]=, $pop[[L13]], $pop{{.+}} -; CHECK: i32.and $push[[L11:.+]]=, $pop[[L12]], $pop{{.+}} -; CHECK-NEXT: local.tee $push{{.+}}=, [[SP:.+]], $pop[[L11]] +; CHECK-NEXT: local.tee $push[[L13:.+]]=, [[BP:.+]], $pop[[L14]] +; CHECK: i[[PTR]].sub $push[[L12:.+]]=, $pop[[L13]], $pop{{.+}} +; CHECK: i[[PTR]].and $push[[L11:.+]]=, $pop[[L12]], $pop{{.+}} +; CHECK-NEXT: local.tee $push{{.+}}=, [[SP:.+]], $pop[[L11]] ; CHECK: local.get $push[[M6:.+]]=, [[SP]]{{$}} -; CHECK: i32.add $push[[L6:.+]]=, $pop[[M6]], $pop{{.+}} -; CHECK-NEXT: call somefunc, $pop[[L6]] -; CHECK: local.get $push[[M7:.+]]=, [[SP]]{{$}} -; CHECK: i32.add $push[[L8:.+]]=, $pop[[M7]], $pop{{.+}} -; CHECK-NEXT: call somefunc, $pop[[L8]] +; CHECK: i[[PTR]].add $push[[L6:.+]]=, $pop[[M6]], $pop{{.+}} +; CHECK-NEXT: call somefunc, $pop[[L6]] +; CHECK: local.get $push[[M7:.+]]=, [[SP]]{{$}} +; CHECK: i[[PTR]].add $push[[L8:.+]]=, $pop[[M7]], $pop{{.+}} +; CHECK-NEXT: call somefunc, $pop[[L8]] -; CHECK: local.get $push[[L6:.+]]=, [[BP]]{{$}} +; CHECK: local.get $push[[L6:.+]]=, [[BP]]{{$}} ; CHECK-NEXT: global.set __stack_pointer, $pop[[L6]] define void @over_and_normal_align() { entry: @@ -73,14 +71,14 @@ ; CHECK-LABEL: dynamic_overalign: ; CHECK: global.get $push[[L18:.+]]=, __stack_pointer{{$}} -; CHECK-NEXT: local.tee $push[[L17:.+]]=, [[SP:.+]], $pop[[L18]] -; CHECK-NEXT: local.set [[BP:.+]], $pop[[L17]] -; CHECK: local.tee $push{{.+}}=, [[SP_2:.+]], $pop{{.+}} +; CHECK-NEXT: local.tee $push[[L17:.+]]=, [[SP:.+]], $pop[[L18]] +; CHECK-NEXT: local.set [[BP:.+]], $pop[[L17]] +; CHECK: local.tee $push{{.+}}=, [[SP_2:.+]], $pop{{.+}} -; CHECK: local.get $push[[M8:.+]]=, [[SP_2]]{{$}} -; CHECK: call somefunc, $pop[[M8]] +; CHECK: local.get $push[[M8:.+]]=, [[SP_2]]{{$}} +; CHECK: call somefunc, $pop[[M8]] -; CHECK: local.get $push[[M9:.+]]=, [[BP]]{{$}} +; CHECK: local.get $push[[M9:.+]]=, [[BP]]{{$}} ; CHECK-NEXT: global.set __stack_pointer, $pop[[M9]] define void @dynamic_overalign(i32 %num) { entry: @@ -91,18 +89,18 @@ ; CHECK-LABEL: overalign_and_dynamic: ; CHECK: global.get $push[[L21:.+]]=, __stack_pointer{{$}} -; CHECK-NEXT: local.tee $push[[L20:.+]]=, [[BP:.+]], $pop[[L21]] -; CHECK: i32.sub $push[[L19:.+]]=, $pop[[L20]], $pop{{.+}} -; CHECK: i32.and $push[[L18:.+]]=, $pop[[L19]], $pop{{.+}} -; CHECK: local.tee $push{{.+}}=, [[FP:.+]], $pop[[L18]] -; CHECK: local.get $push[[M10:.+]]=, [[FP]]{{$}} -; CHECK: i32.sub $push[[L16:.+]]=, $pop[[M10]], $pop{{.+}} -; CHECK-NEXT: local.tee $push{{.+}}=, [[SP:.+]], $pop[[L16]] - -; CHECK: local.get $push[[over:.+]]=, [[FP]] -; CHECK-NEXT: call somefunc, $pop[[over]] -; CHECK: local.get $push[[another:.+]]=, [[SP]] -; CHECK-NEXT: call somefunc, $pop[[another]] +; CHECK-NEXT: local.tee $push[[L20:.+]]=, [[BP:.+]], $pop[[L21]] +; CHECK: i[[PTR]].sub $push[[L19:.+]]=, $pop[[L20]], $pop{{.+}} +; CHECK: i[[PTR]].and $push[[L18:.+]]=, $pop[[L19]], $pop{{.+}} +; CHECK: local.tee $push{{.+}}=, [[FP:.+]], $pop[[L18]] +; CHECK: local.get $push[[M10:.+]]=, [[FP]]{{$}} +; CHECK: i[[PTR]].sub $push[[L16:.+]]=, $pop[[M10]], $pop{{.+}} +; CHECK-NEXT: local.tee $push{{.+}}=, [[SP:.+]], $pop[[L16]] + +; CHECK: local.get $push[[over:.+]]=, [[FP]] +; CHECK-NEXT: call somefunc, $pop[[over]] +; CHECK: local.get $push[[another:.+]]=, [[SP]] +; CHECK-NEXT: call somefunc, $pop[[another]] ; CHECK: local.get $push[[M11:.+]]=, [[BP]]{{$}} ; CHECK-NEXT: global.set __stack_pointer, $pop[[M11]] @@ -117,23 +115,23 @@ ; CHECK-LABEL: overalign_static_and_dynamic: ; CHECK: global.get $push[[L26:.+]]=, __stack_pointer{{$}} -; CHECK-NEXT: local.tee $push[[L25:.+]]=, [[BP:.+]], $pop[[L26]] -; CHECK: i32.sub $push[[L24:.+]]=, $pop[[L25]], $pop{{.+}} -; CHECK: i32.and $push[[L23:.+]]=, $pop[[L24]], $pop{{.+}} -; CHECK: local.tee $push{{.+}}=, [[FP:.+]], $pop[[L23]] -; CHECK: local.get $push[[M12:.+]]=, [[FP]]{{$}} -; CHECK: i32.sub $push[[L21:.+]]=, $pop[[M12]], $pop{{.+}} -; CHECK-NEXT: local.tee $push{{.+}}=, [[SP:.+]], $pop[[L21]] - -; CHECK: local.get $push[[L19:.+]]=, [[FP]] -; CHECK: local.tee $push[[L18:.+]]=, [[FP_2:.+]], $pop[[L19]] -; CHECK: i32.add $push[[over:.+]]=, $pop[[L18]], $pop{{.+}} -; CHECK-NEXT: call somefunc, $pop[[over]] -; CHECK: local.get $push[[M12:.+]]=, [[SP]] -; CHECK: call somefunc, $pop[[M12]] -; CHECK: local.get $push[[M13:.+]]=, [[FP_2]] -; CHECK: i32.add $push[[static:.+]]=, $pop[[M13]], $pop{{.+}} -; CHECK-NEXT: call somefunc, $pop[[static]] +; CHECK-NEXT: local.tee $push[[L25:.+]]=, [[BP:.+]], $pop[[L26]] +; CHECK: i[[PTR]].sub $push[[L24:.+]]=, $pop[[L25]], $pop{{.+}} +; CHECK: i[[PTR]].and $push[[L23:.+]]=, $pop[[L24]], $pop{{.+}} +; CHECK: local.tee $push{{.+}}=, [[FP:.+]], $pop[[L23]] +; CHECK: local.get $push[[M12:.+]]=, [[FP]]{{$}} +; CHECK: i[[PTR]].sub $push[[L21:.+]]=, $pop[[M12]], $pop{{.+}} +; CHECK-NEXT: local.tee $push{{.+}}=, [[SP:.+]], $pop[[L21]] + +; CHECK: local.get $push[[L19:.+]]=, [[FP]] +; CHECK: local.tee $push[[L18:.+]]=, [[FP_2:.+]], $pop[[L19]] +; CHECK: i[[PTR]].add $push[[over:.+]]=, $pop[[L18]], $pop{{.+}} +; CHECK-NEXT: call somefunc, $pop[[over]] +; CHECK: local.get $push[[M12:.+]]=, [[SP]] +; CHECK: call somefunc, $pop[[M12]] +; CHECK: local.get $push[[M13:.+]]=, [[FP_2]] +; CHECK: i[[PTR]].add $push[[static:.+]]=, $pop[[M13]], $pop{{.+}} +; CHECK-NEXT: call somefunc, $pop[[static]] ; CHECK: local.get $push[[M14:.+]]=, [[BP]]{{$}} ; CHECK-NEXT: global.set __stack_pointer, $pop[[M14]] diff --git a/llvm/test/CodeGen/WebAssembly/userstack.ll b/llvm/test/CodeGen/WebAssembly/userstack.ll --- a/llvm/test/CodeGen/WebAssembly/userstack.ll +++ b/llvm/test/CodeGen/WebAssembly/userstack.ll @@ -1,18 +1,16 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s - -target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" -target triple = "wasm32-unknown-unknown" +; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck -DPTR=32 %s +; RUN: llc < %s --mtriple=wasm64-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck -DPTR=64 %s declare void @ext_func(i64* %ptr) declare void @ext_func_i32(i32* %ptr) ; CHECK-LABEL: alloca32: ; Check that there is an extra local for the stack pointer. -; CHECK: .local i32{{$}} +; CHECK: .local i[[PTR]]{{$}} define void @alloca32() noredzone { ; CHECK-NEXT: global.get $push[[L2:.+]]=, __stack_pointer{{$}} - ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 16 - ; CHECK-NEXT: i32.sub $push[[L9:.+]]=, $pop[[L2]], $pop[[L3]] + ; CHECK-NEXT: i[[PTR]].const $push[[L3:.+]]=, 16 + ; CHECK-NEXT: i[[PTR]].sub $push[[L9:.+]]=, $pop[[L2]], $pop[[L3]] ; CHECK-NEXT: local.tee $push[[L8:.+]]=, [[SP:.+]], $pop[[L9]]{{$}} ; CHECK-NEXT: global.set __stack_pointer, $pop[[L8]]{{$}} %retval = alloca i32 @@ -21,18 +19,18 @@ ; CHECK: i32.store 12($pop[[L4]]), $pop[[L0]] store i32 0, i32* %retval ; CHECK: local.get $push[[L6:.+]]=, [[SP]]{{$}} - ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 16 - ; CHECK-NEXT: i32.add $push[[L7:.+]]=, $pop[[L6]], $pop[[L5]] + ; CHECK-NEXT: i[[PTR]].const $push[[L5:.+]]=, 16 + ; CHECK-NEXT: i[[PTR]].add $push[[L7:.+]]=, $pop[[L6]], $pop[[L5]] ; CHECK-NEXT: global.set __stack_pointer, $pop[[L7]] ret void } ; CHECK-LABEL: alloca3264: -; CHECK: .local i32{{$}} +; CHECK: .local i[[PTR]]{{$}} define void @alloca3264() { ; CHECK: global.get $push[[L3:.+]]=, __stack_pointer{{$}} - ; CHECK-NEXT: i32.const $push[[L4:.+]]=, 16 - ; CHECK-NEXT: i32.sub $push[[L6:.+]]=, $pop[[L3]], $pop[[L4]] + ; CHECK-NEXT: i[[PTR]].const $push[[L4:.+]]=, 16 + ; CHECK-NEXT: i[[PTR]].sub $push[[L6:.+]]=, $pop[[L3]], $pop[[L4]] ; CHECK-NEXT: local.tee $push[[L5:.+]]=, [[SP:.+]], $pop[[L6]] %r1 = alloca i32 %r2 = alloca double @@ -48,17 +46,17 @@ } ; CHECK-LABEL: allocarray: -; CHECK: .local i32{{$}} +; CHECK: .local i[[PTR]]{{$}} define void @allocarray() { ; CHECK-NEXT: global.get $push[[L4:.+]]=, __stack_pointer{{$}} - ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 144{{$}} - ; CHECK-NEXT: i32.sub $push[[L12:.+]]=, $pop[[L4]], $pop[[L5]] + ; CHECK-NEXT: i[[PTR]].const $push[[L5:.+]]=, 144{{$}} + ; CHECK-NEXT: i[[PTR]].sub $push[[L12:.+]]=, $pop[[L4]], $pop[[L5]] ; CHECK-NEXT: local.tee $push[[L11:.+]]=, 0, $pop[[L12]] ; CHECK-NEXT: global.set __stack_pointer, $pop[[L11]] %r = alloca [33 x i32] - ; CHECK: i32.const $push{{.+}}=, 24 - ; CHECK-NEXT: i32.add $push[[L3:.+]]=, $pop{{.+}}, $pop{{.+}} + ; CHECK: i[[PTR]].const $push{{.+}}=, 24 + ; CHECK-NEXT: i[[PTR]].add $push[[L3:.+]]=, $pop{{.+}}, $pop{{.+}} ; CHECK-NEXT: i32.const $push[[L1:.+]]=, 1{{$}} ; CHECK-NEXT: i32.store 0($pop[[L3]]), $pop[[L1]]{{$}} ; CHECK-NEXT: local.get $push[[L4:.+]]=, 0{{$}} @@ -70,16 +68,16 @@ store i32 1, i32* %p2 ; CHECK-NEXT: local.get $push[[L2:.+]]=, [[SP]]{{$}} - ; CHECK-NEXT: i32.const $push[[L7:.+]]=, 144 - ; CHECK-NEXT: i32.add $push[[L8:.+]]=, $pop[[L2]], $pop[[L7]] + ; CHECK-NEXT: i[[PTR]].const $push[[L7:.+]]=, 144 + ; CHECK-NEXT: i[[PTR]].add $push[[L8:.+]]=, $pop[[L2]], $pop[[L7]] ; CHECK-NEXT: global.set __stack_pointer, $pop[[L8]] ret void } ; CHECK-LABEL: non_mem_use define void @non_mem_use(i8** %addr) { - ; CHECK: i32.const $push[[L2:.+]]=, 48 - ; CHECK-NEXT: i32.sub $push[[L12:.+]]=, {{.+}}, $pop[[L2]] + ; CHECK: i[[PTR]].const $push[[L2:.+]]=, 48 + ; CHECK-NEXT: i[[PTR]].sub $push[[L12:.+]]=, {{.+}}, $pop[[L2]] ; CHECK-NEXT: local.tee $push[[L11:.+]]=, [[SP:.+]], $pop[[L12]] ; CHECK-NEXT: global.set __stack_pointer, $pop[[L11]] %buf = alloca [27 x i8], align 16 @@ -87,8 +85,8 @@ %r2 = alloca i64 ; %r is at SP+8 ; CHECK: local.get $push[[L3:.+]]=, [[SP]] - ; CHECK: i32.const $push[[OFF:.+]]=, 8 - ; CHECK-NEXT: i32.add $push[[ARG1:.+]]=, $pop[[L3]], $pop[[OFF]] + ; CHECK: i[[PTR]].const $push[[OFF:.+]]=, 8 + ; CHECK-NEXT: i[[PTR]].add $push[[ARG1:.+]]=, $pop[[L3]], $pop[[OFF]] ; CHECK-NEXT: call ext_func, $pop[[ARG1]] call void @ext_func(i64* %r) ; %r2 is at SP+0, no add needed @@ -98,20 +96,20 @@ ; Use as a value, but in a store ; %buf is at SP+16 ; CHECK: local.get $push[[L5:.+]]=, [[SP]] - ; CHECK: i32.const $push[[OFF:.+]]=, 16 - ; CHECK-NEXT: i32.add $push[[VAL:.+]]=, $pop[[L5]], $pop[[OFF]] - ; CHECK-NEXT: i32.store 0($pop{{.+}}), $pop[[VAL]] + ; CHECK: i[[PTR]].const $push[[OFF:.+]]=, 16 + ; CHECK-NEXT: i[[PTR]].add $push[[VAL:.+]]=, $pop[[L5]], $pop[[OFF]] + ; CHECK-NEXT: i[[PTR]].store 0($pop{{.+}}), $pop[[VAL]] %gep = getelementptr inbounds [27 x i8], [27 x i8]* %buf, i32 0, i32 0 store i8* %gep, i8** %addr ret void } ; CHECK-LABEL: allocarray_inbounds: -; CHECK: .local i32{{$}} +; CHECK: .local i[[PTR]]{{$}} define void @allocarray_inbounds() { ; CHECK: global.get $push[[L3:.+]]=, __stack_pointer{{$}} - ; CHECK-NEXT: i32.const $push[[L4:.+]]=, 32{{$}} - ; CHECK-NEXT: i32.sub $push[[L11:.+]]=, $pop[[L3]], $pop[[L4]] + ; CHECK-NEXT: i[[PTR]].const $push[[L4:.+]]=, 32{{$}} + ; CHECK-NEXT: i[[PTR]].sub $push[[L11:.+]]=, $pop[[L3]], $pop[[L4]] ; CHECK-NEXT: local.tee $push[[L10:.+]]=, [[SP:.+]], $pop[[L11]] ; CHECK-NEXT: global.set __stack_pointer, $pop[[L10]]{{$}} %r = alloca [5 x i32] @@ -125,8 +123,8 @@ store i32 1, i32* %p2 call void @ext_func(i64* null); ; CHECK: call ext_func - ; CHECK: i32.const $push[[L5:.+]]=, 32{{$}} - ; CHECK-NEXT: i32.add $push[[L7:.+]]=, ${{.+}}, $pop[[L5]] + ; CHECK: i[[PTR]].const $push[[L5:.+]]=, 32{{$}} + ; CHECK-NEXT: i[[PTR]].add $push[[L7:.+]]=, ${{.+}}, $pop[[L5]] ; CHECK-NEXT: global.set __stack_pointer, $pop[[L7]] ret void } @@ -136,7 +134,7 @@ ; CHECK: global.get $push[[L13:.+]]=, __stack_pointer{{$}} ; CHECK-NEXT: local.tee $push[[L12:.+]]=, [[SP:.+]], $pop[[L13]]{{$}} ; Target independent codegen bumps the stack pointer. - ; CHECK: i32.sub + ; CHECK: i[[PTR]].sub ; Check that SP is written back to memory after decrement ; CHECK: global.set __stack_pointer, %r = alloca i32, i32 %alloc @@ -152,12 +150,12 @@ ; CHECK: global.get $push[[L13:.+]]=, __stack_pointer{{$}} ; CHECK-NEXT: local.tee $push[[L12:.+]]=, [[SP:.+]], $pop[[L13]]{{$}} ; Target independent codegen bumps the stack pointer - ; CHECK: i32.sub + ; CHECK: i[[PTR]].sub %r = alloca i32, i32 %alloc - ; CHECK-NEXT: local.tee $push[[L8:.+]]=, {{.+}}, $pop - ; CHECK: local.get $push[[L7:.+]]=, 0{{$}} - ; CHECK-NEXT: i32.const $push[[L6:.+]]=, 0{{$}} - ; CHECK-NEXT: i32.store 0($pop[[L7]]), $pop[[L6]]{{$}} + ; CHECK-NEXT: local.tee $push[[L8:.+]]=, [[SP2:.+]], $pop + ; CHECK: local.get $push[[L7:.+]]=, [[SP2]]{{$}} + ; CHECK-NEXT: i32.const $push[[L6:.+]]=, 0{{$}} + ; CHECK-NEXT: i32.store 0($pop[[L7]]), $pop[[L6]]{{$}} store i32 0, i32* %r ; CHECK-NEXT: return ret void @@ -167,8 +165,8 @@ define void @dynamic_static_alloca(i32 %alloc) noredzone { ; Decrement SP in the prolog by the static amount and writeback to memory. ; CHECK: global.get $push[[L11:.+]]=, __stack_pointer{{$}} - ; CHECK-NEXT: i32.const $push[[L12:.+]]=, 16 - ; CHECK-NEXT: i32.sub $push[[L23:.+]]=, $pop[[L11]], $pop[[L12]] + ; CHECK-NEXT: i[[PTR]].const $push[[L12:.+]]=, 16 + ; CHECK-NEXT: i[[PTR]].sub $push[[L23:.+]]=, $pop[[L11]], $pop[[L12]] ; CHECK-NEXT: local.tee $push[[L22:.+]]=, [[SP:.+]], $pop[[L23]] ; CHECK-NEXT: global.set __stack_pointer, $pop[[L22]] @@ -181,7 +179,7 @@ store volatile i32 101, i32* %static ; Decrement SP in the body by the dynamic amount. - ; CHECK: i32.sub + ; CHECK: i[[PTR]].sub ; CHECK: local.tee $push[[L16:.+]]=, [[dynamic_local:.+]], $pop{{.+}} ; CHECK: local.tee $push[[L15:.+]]=, [[other:.+]], $pop[[L16]]{{$}} ; CHECK: global.set __stack_pointer, $pop[[L15]]{{$}} @@ -201,7 +199,7 @@ store volatile i32 103, i32* %dynamic ; Decrement SP in the body by the dynamic amount. - ; CHECK: i32.sub + ; CHECK: i[[PTR]].sub ; CHECK: local.tee $push{{.+}}=, [[dynamic2_local:.+]], $pop{{.+}} %dynamic.2 = alloca i32, i32 %alloc @@ -224,8 +222,8 @@ ; Writeback to memory. ; CHECK: local.get $push[[L24:.+]]=, [[FP]]{{$}} - ; CHECK: i32.const $push[[L18:.+]]=, 16 - ; CHECK-NEXT: i32.add $push[[L19:.+]]=, $pop[[L24]], $pop[[L18]] + ; CHECK: i[[PTR]].const $push[[L18:.+]]=, 16 + ; CHECK-NEXT: i[[PTR]].add $push[[L19:.+]]=, $pop[[L24]], $pop[[L18]] ; CHECK-NEXT: global.set __stack_pointer, $pop[[L19]] ret void } @@ -273,11 +271,11 @@ ; CHECK-LABEL: copytoreg_fi: define void @copytoreg_fi(i1 %cond, i32* %b) { entry: - ; CHECK: i32.const $push[[L1:.+]]=, 16 - ; CHECK-NEXT: i32.sub $push[[L3:.+]]=, {{.+}}, $pop[[L1]] + ; CHECK: i[[PTR]].const $push[[L1:.+]]=, 16 + ; CHECK-NEXT: i[[PTR]].sub $push[[L3:.+]]=, {{.+}}, $pop[[L1]] %addr = alloca i32 - ; CHECK: i32.const $push[[OFF:.+]]=, 12 - ; CHECK-NEXT: i32.add $push[[ADDR:.+]]=, $pop[[L3]], $pop[[OFF]] + ; CHECK: i[[PTR]].const $push[[OFF:.+]]=, 12 + ; CHECK-NEXT: i[[PTR]].add $push[[ADDR:.+]]=, $pop[[L3]], $pop[[OFF]] ; CHECK-NEXT: local.set [[COPY:.+]], $pop[[ADDR]] br label %body body: @@ -309,7 +307,7 @@ ; Test __builtin_frame_address(1). ; CHECK-LABEL: frameaddress_1: -; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i[[PTR]].const $push0=, 0{{$}} ; CHECK-NEXT: call use_i8_star, $pop0{{$}} ; CHECK-NEXT: return{{$}} define void @frameaddress_1() { @@ -330,6 +328,6 @@ ret void } -; CHECK: .globaltype __stack_pointer, i32{{$}} +; CHECK: .globaltype __stack_pointer, i[[PTR]]{{$}} ; TODO: test over-aligned alloca diff --git a/llvm/test/MC/WebAssembly/stack-ptr.ll b/llvm/test/MC/WebAssembly/stack-ptr.ll --- a/llvm/test/MC/WebAssembly/stack-ptr.ll +++ b/llvm/test/MC/WebAssembly/stack-ptr.ll @@ -1,6 +1,5 @@ -; RUN: llc -filetype=obj %s -o - | obj2yaml | FileCheck %s - -target triple = "wasm32-unknown-unknown" +; RUN: llc --mtriple=wasm32-unknown-unknown -filetype=obj %s -o - | obj2yaml | FileCheck --check-prefixes CHECK,CHK32 %s +; RUN: llc --mtriple=wasm64-unknown-unknown -filetype=obj %s -o - | obj2yaml | FileCheck --check-prefixes CHECK,CHK64 %s ; Function that uses explict stack, and should generate a reference to ; __stack_pointer, along with the corresponding reloction entry. @@ -15,7 +14,8 @@ ; CHECK: - Module: env ; CHECK: Field: __stack_pointer ; CHECK: Kind: GLOBAL -; CHECK: GlobalType: I32 +; CHK32: GlobalType: I32 +; CHK64: GlobalType: I64 ; CHECK: GlobalMutable: true ; CHECK: - Type: CODE ; CHECK: Relocations: diff --git a/llvm/test/MC/WebAssembly/wasm64.s b/llvm/test/MC/WebAssembly/wasm64.s --- a/llvm/test/MC/WebAssembly/wasm64.s +++ b/llvm/test/MC/WebAssembly/wasm64.s @@ -51,6 +51,11 @@ i64.const 0 f32.store .L.str # relocatable offset! + ### 64-bit SP + + global.get __stack_pointer + drop + end_function .section .rodata..L.str,"",@ @@ -62,7 +67,7 @@ .size .L.str, 24 .globaltype myglob64, i64 - + .globaltype __stack_pointer, i64 # CHECK: .functype test (i64) -> () @@ -155,6 +160,11 @@ # BIN-NEXT: Kind: GLOBAL # BIN-NEXT: GlobalType: I64 # BIN-NEXT: GlobalMutable: true +# BIN-NEXT: - Module: env +# BIN-NEXT: Field: __stack_pointer +# BIN-NEXT: Kind: GLOBAL +# BIN-NEXT: GlobalType: I64 +# BIN-NEXT: GlobalMutable: true # BIN-NEXT: - Type: FUNCTION # BIN-NEXT: FunctionTypes: [ 0 ] # BIN-NEXT: - Type: DATACOUNT @@ -179,12 +189,15 @@ # BIN-NEXT: - Type: R_WASM_MEMORY_ADDR_LEB64 # BIN-NEXT: Index: 1 # BIN-NEXT: Offset: 0x00000078 +# BIN-NEXT: - Type: R_WASM_GLOBAL_INDEX_LEB +# BIN-NEXT: Index: 3 +# BIN-NEXT: Offset: 0x00000083 # BIN-NEXT: Functions: # BIN-NEXT: - Index: 0 # BIN-NEXT: Locals: # BIN-NEXT: - Type: I64 # BIN-NEXT: Count: 1 -# BIN-NEXT: Body: 42002A02001A20002A02001A42808080808080808080002A02001A2380808080002A02001A42002A02808080808080808080001A4300000000420038020043000000002000380200430000000042808080808080808080003802004300000000238080808000380200430000000042003802808080808080808080000B +# BIN-NEXT: Body: 42002A02001A20002A02001A42808080808080808080002A02001A2380808080002A02001A42002A02808080808080808080001A4300000000420038020043000000002000380200430000000042808080808080808080003802004300000000238080808000380200430000000042003802808080808080808080002381808080001A0B # BIN-NEXT: - Type: DATA # BIN-NEXT: Relocations: # BIN-NEXT: - Type: R_WASM_MEMORY_ADDR_I64 @@ -217,6 +230,11 @@ # BIN-NEXT: Name: myglob64 # BIN-NEXT: Flags: [ UNDEFINED ] # BIN-NEXT: Global: 0 +# BIN-NEXT: - Index: 3 +# BIN-NEXT: Kind: GLOBAL +# BIN-NEXT: Name: __stack_pointer +# BIN-NEXT: Flags: [ UNDEFINED ] +# BIN-NEXT: Global: 1 # BIN-NEXT: SegmentInfo: # BIN-NEXT: - Index: 0 # BIN-NEXT: Name: .rodata..L.str