diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp b/clang/lib/Driver/ToolChains/WebAssembly.cpp
--- a/clang/lib/Driver/ToolChains/WebAssembly.cpp
+++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp
@@ -62,6 +62,12 @@
   const char *Linker = Args.MakeArgString(getLinkerPath(Args));
   ArgStringList CmdArgs;
 
+  CmdArgs.push_back("-m");
+  if (getToolChain().getTriple().isArch64Bit())
+    CmdArgs.push_back("wasm64");
+  else
+    CmdArgs.push_back("wasm32");
+
   if (Args.hasArg(options::OPT_s))
     CmdArgs.push_back("--strip-all");
 
diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h
--- a/lld/wasm/Config.h
+++ b/lld/wasm/Config.h
@@ -36,6 +36,7 @@
   bool importMemory;
   bool sharedMemory;
   bool importTable;
+  bool is64;
   bool mergeDataSegments;
   bool pie;
   bool printGcSections;
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -378,6 +378,18 @@
   config->exportDynamic =
       args.hasFlag(OPT_export_dynamic, OPT_no_export_dynamic, config->shared);
 
+  // Parse wasm32/64.
+  config->is64 = false;
+  if (auto *arg = args.getLastArg(OPT_m)) {
+    StringRef s = arg->getValue();
+    if (s == "wasm32")
+      config->is64 = false;
+    else if (s == "wasm64")
+      config->is64 = true;
+    else
+      error("invalid target architecture: " + s);
+  }
+
   // --threads= takes a positive integer and provides the default value for
   // --thinlto-jobs=.
   if (auto *arg = args.getLastArg(OPT_threads)) {
@@ -498,9 +510,15 @@
 static GlobalSymbol *createGlobalVariable(StringRef name, bool isMutable,
                                           int value) {
   llvm::wasm::WasmGlobal wasmGlobal;
-  wasmGlobal.Type = {WASM_TYPE_I32, isMutable};
-  wasmGlobal.InitExpr.Value.Int32 = value;
-  wasmGlobal.InitExpr.Opcode = WASM_OPCODE_I32_CONST;
+  if (config->is64) {
+    wasmGlobal.Type = {WASM_TYPE_I64, isMutable};
+    wasmGlobal.InitExpr.Value.Int64 = value;
+    wasmGlobal.InitExpr.Opcode = WASM_OPCODE_I64_CONST;
+  } else {
+    wasmGlobal.Type = {WASM_TYPE_I32, isMutable};
+    wasmGlobal.InitExpr.Value.Int32 = value;
+    wasmGlobal.InitExpr.Opcode = WASM_OPCODE_I32_CONST;
+  }
   wasmGlobal.SymbolName = name;
   return symtab->addSyntheticGlobal(name, WASM_SYMBOL_VISIBILITY_HIDDEN,
                                     make<InputGlobal>(wasmGlobal, nullptr));
@@ -513,9 +531,13 @@
 
   static WasmSignature nullSignature = {{}, {}};
   static WasmSignature i32ArgSignature = {{}, {ValType::I32}};
+  static WasmSignature i64ArgSignature = {{}, {ValType::I64}};
   static llvm::wasm::WasmGlobalType globalTypeI32 = {WASM_TYPE_I32, false};
+  static llvm::wasm::WasmGlobalType globalTypeI64 = {WASM_TYPE_I64, false};
   static llvm::wasm::WasmGlobalType mutableGlobalTypeI32 = {WASM_TYPE_I32,
                                                             true};
+  static llvm::wasm::WasmGlobalType mutableGlobalTypeI64 = {WASM_TYPE_I64,
+                                                            true};
   WasmSym::callCtors = symtab->addSyntheticFunction(
       "__wasm_call_ctors", WASM_SYMBOL_VISIBILITY_HIDDEN,
       make<SyntheticFunction>(nullSignature, "__wasm_call_ctors"));
@@ -530,15 +552,16 @@
 
 
   if (config->isPic) {
-    WasmSym::stackPointer =
-        createUndefinedGlobal("__stack_pointer", &mutableGlobalTypeI32);
+    WasmSym::stackPointer = createUndefinedGlobal(
+        "__stack_pointer",
+        config->is64 ? &mutableGlobalTypeI64 : &mutableGlobalTypeI32);
     // For PIC code, we import two global variables (__memory_base and
     // __table_base) from the environment and use these as the offset at
     // which to load our static data and function table.
     // See:
     // https://github.com/WebAssembly/tool-conventions/blob/master/DynamicLinking.md
-    WasmSym::memoryBase =
-        createUndefinedGlobal("__memory_base", &globalTypeI32);
+    WasmSym::memoryBase = createUndefinedGlobal(
+        "__memory_base", config->is64 ? &globalTypeI64 : &globalTypeI32);
     WasmSym::tableBase = createUndefinedGlobal("__table_base", &globalTypeI32);
     WasmSym::memoryBase->markLive();
     WasmSym::tableBase->markLive();
@@ -563,7 +586,9 @@
     WasmSym::tlsAlign = createGlobalVariable("__tls_align", false, 1);
     WasmSym::initTLS = symtab->addSyntheticFunction(
         "__wasm_init_tls", WASM_SYMBOL_VISIBILITY_HIDDEN,
-        make<SyntheticFunction>(i32ArgSignature, "__wasm_init_tls"));
+        make<SyntheticFunction>(config->is64 ? i64ArgSignature
+                                             : i32ArgSignature,
+                                "__wasm_init_tls"));
   }
 }
 
diff --git a/lld/wasm/InputChunks.cpp b/lld/wasm/InputChunks.cpp
--- a/lld/wasm/InputChunks.cpp
+++ b/lld/wasm/InputChunks.cpp
@@ -32,6 +32,18 @@
   llvm_unreachable("unknown reloc type");
 }
 
+bool relocIs64(uint8_t relocType) {
+  switch (relocType) {
+  case R_WASM_MEMORY_ADDR_LEB64:
+  case R_WASM_MEMORY_ADDR_SLEB64:
+  case R_WASM_MEMORY_ADDR_REL_SLEB64:
+  case R_WASM_MEMORY_ADDR_I64:
+    return true;
+  default:
+    return false;
+  }
+}
+
 std::string toString(const wasm::InputChunk *c) {
   return (toString(c->file) + ":(" + c->getName() + ")").str();
 }
@@ -323,12 +335,17 @@
   LLVM_DEBUG(dbgs() << "generating runtime relocations: " << getName()
                     << " count=" << relocations.size() << "\n");
 
+  unsigned opcode_ptr_const =
+      config->is64 ? WASM_OPCODE_I64_CONST : WASM_OPCODE_I32_CONST;
+  unsigned opcode_ptr_add =
+      config->is64 ? WASM_OPCODE_I64_ADD : WASM_OPCODE_I32_ADD;
+
   // TODO(sbc): Encode the relocations in the data section and write a loop
   // here to apply them.
   uint32_t segmentVA = outputSeg->startVA + outputSegmentOffset;
   for (const WasmRelocation &rel : relocations) {
-    uint32_t offset = rel.Offset - getInputSectionOffset();
-    uint32_t outputOffset = segmentVA + offset;
+    uint64_t offset = rel.Offset - getInputSectionOffset();
+    uint64_t outputOffset = segmentVA + offset;
 
     LLVM_DEBUG(dbgs() << "gen reloc: type=" << relocTypeToString(rel.Type)
                       << " addend=" << rel.Addend << " index=" << rel.Index
@@ -339,9 +356,17 @@
     writeUleb128(os, WasmSym::memoryBase->getGlobalIndex(), "memory_base");
 
     // Add the offset of the relocation
-    writeU8(os, WASM_OPCODE_I32_CONST, "I32_CONST");
+    writeU8(os, opcode_ptr_const, "CONST");
     writeSleb128(os, outputOffset, "offset");
-    writeU8(os, WASM_OPCODE_I32_ADD, "ADD");
+    writeU8(os, opcode_ptr_add, "ADD");
+
+    bool is64 = relocIs64(rel.Type);
+    unsigned opcode_reloc_const =
+        is64 ? WASM_OPCODE_I64_CONST : WASM_OPCODE_I32_CONST;
+    unsigned opcode_reloc_add =
+        is64 ? WASM_OPCODE_I64_ADD : WASM_OPCODE_I32_ADD;
+    unsigned opcode_reloc_store =
+        is64 ? WASM_OPCODE_I64_STORE : WASM_OPCODE_I32_STORE;
 
     Symbol *sym = file->getSymbol(rel);
     // Now figure out what we want to store
@@ -349,9 +374,9 @@
       writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET");
       writeUleb128(os, sym->getGOTIndex(), "global index");
       if (rel.Addend) {
-        writeU8(os, WASM_OPCODE_I32_CONST, "CONST");
+        writeU8(os, opcode_reloc_const, "CONST");
         writeSleb128(os, rel.Addend, "addend");
-        writeU8(os, WASM_OPCODE_I32_ADD, "ADD");
+        writeU8(os, opcode_reloc_add, "ADD");
       }
     } else {
       const GlobalSymbol* baseSymbol = WasmSym::memoryBase;
@@ -359,13 +384,13 @@
         baseSymbol = WasmSym::tableBase;
       writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET");
       writeUleb128(os, baseSymbol->getGlobalIndex(), "base");
-      writeU8(os, WASM_OPCODE_I32_CONST, "CONST");
+      writeU8(os, opcode_reloc_const, "CONST");
       writeSleb128(os, file->calcNewValue(rel), "offset");
-      writeU8(os, WASM_OPCODE_I32_ADD, "ADD");
+      writeU8(os, opcode_reloc_add, "ADD");
     }
 
     // Store that value at the virtual address
-    writeU8(os, WASM_OPCODE_I32_STORE, "I32_STORE");
+    writeU8(os, opcode_reloc_store, "I32_STORE");
     writeUleb128(os, 2, "align");
     writeUleb128(os, 0, "offset");
   }
diff --git a/lld/wasm/Options.td b/lld/wasm/Options.td
--- a/lld/wasm/Options.td
+++ b/lld/wasm/Options.td
@@ -62,6 +62,8 @@
 def L: JoinedOrSeparate<["-"], "L">, MetaVarName<"<dir>">,
   HelpText<"Add a directory to the library search path">;
 
+def m: JoinedOrSeparate<["-"], "m">, HelpText<"Set target emulation">;
+
 def mllvm: S<"mllvm">, HelpText<"Options to pass to LLVM">;
 
 def no_color_diagnostics: F<"no-color-diagnostics">,
@@ -179,7 +181,6 @@
 def: J<"entry=">, Alias<entry>;
 def: Flag<["-"], "E">, Alias<export_dynamic>, HelpText<"Alias for --export-dynamic">;
 def: Flag<["-"], "i">, Alias<initial_memory>;
-def: Flag<["-"], "m">, Alias<max_memory>;
 def: Flag<["-"], "r">, Alias<relocatable>;
 def: Flag<["-"], "s">, Alias<strip_all>, HelpText<"Alias for --strip-all">;
 def: Flag<["-"], "S">, Alias<strip_debug>, HelpText<"Alias for --strip-debug">;
diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h
--- a/llvm/include/llvm/BinaryFormat/Wasm.h
+++ b/llvm/include/llvm/BinaryFormat/Wasm.h
@@ -254,11 +254,13 @@
   WASM_OPCODE_GLOBAL_GET = 0x23,
   WASM_OPCODE_GLOBAL_SET = 0x24,
   WASM_OPCODE_I32_STORE = 0x36,
+  WASM_OPCODE_I64_STORE = 0x37,
   WASM_OPCODE_I32_CONST = 0x41,
   WASM_OPCODE_I64_CONST = 0x42,
   WASM_OPCODE_F32_CONST = 0x43,
   WASM_OPCODE_F64_CONST = 0x44,
   WASM_OPCODE_I32_ADD = 0x6a,
+  WASM_OPCODE_I64_ADD = 0x7c,
   WASM_OPCODE_REF_NULL = 0xd0,
 };
 
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -434,9 +434,12 @@
         // GetExternalSymbolSymbol does, since if there's no code that
         // refers to this symbol, we have to set it here.
         SPSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
-        // FIXME: need to check subtarget to see if its wasm64, but we
-        // can't cast to WebAssemblySubtarget here.
-        SPSym->setGlobalType(wasm::WasmGlobalType{wasm::WASM_TYPE_I32, true});
+        SPSym->setGlobalType(wasm::WasmGlobalType{
+            uint8_t(Asm->getSubtargetInfo().getTargetTriple().getArch() ==
+                            Triple::wasm64
+                        ? wasm::WASM_TYPE_I64
+                        : wasm::WASM_TYPE_I32),
+            true});
         DIELoc *Loc = new (DIEValueAllocator) DIELoc;
         addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_WASM_location);
         addSInt(*Loc, dwarf::DW_FORM_sdata, FrameBase.Location.WasmLoc.Kind);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
@@ -53,6 +53,15 @@
                        MachineBasicBlock::iterator &InsertStore,
                        const DebugLoc &DL) const;
 
+  static unsigned getSPReg(const MachineFunction &MF);
+  static unsigned getFPReg(const MachineFunction &MF);
+  static unsigned getOpcConst(const MachineFunction &MF);
+  static unsigned getOpcAdd(const MachineFunction &MF);
+  static unsigned getOpcSub(const MachineFunction &MF);
+  static unsigned getOpcAnd(const MachineFunction &MF);
+  static unsigned getOpcGlobGet(const MachineFunction &MF);
+  static unsigned getOpcGlobSet(const MachineFunction &MF);
+
 private:
   bool hasBP(const MachineFunction &MF) const;
   bool needsSPForLocalFrame(const MachineFunction &MF) const;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
@@ -87,8 +87,8 @@
 }
 
 // In function with EH pads, we need to make a copy of the value of
-// __stack_pointer global in SP32 register, in order to use it when restoring
-// __stack_pointer after an exception is caught.
+// __stack_pointer global in SP32/64 register, in order to use it when
+// restoring __stack_pointer after an exception is caught.
 bool WebAssemblyFrameLowering::needsPrologForEH(
     const MachineFunction &MF) const {
   auto EHType = MF.getTarget().getMCAsmInfo()->getExceptionHandlingType();
@@ -123,6 +123,57 @@
   return needsSPForLocalFrame(MF) && !CanUseRedZone;
 }
 
+unsigned WebAssemblyFrameLowering::getSPReg(const MachineFunction &MF) {
+  return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()
+             ? WebAssembly::SP64
+             : WebAssembly::SP32;
+}
+
+unsigned WebAssemblyFrameLowering::getFPReg(const MachineFunction &MF) {
+  return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()
+             ? WebAssembly::FP64
+             : WebAssembly::FP32;
+}
+
+unsigned
+WebAssemblyFrameLowering::getOpcConst(const MachineFunction &MF) {
+  return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()
+             ? WebAssembly::CONST_I64
+             : WebAssembly::CONST_I32;
+}
+
+unsigned WebAssemblyFrameLowering::getOpcAdd(const MachineFunction &MF) {
+  return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()
+             ? WebAssembly::ADD_I64
+             : WebAssembly::ADD_I32;
+}
+
+unsigned WebAssemblyFrameLowering::getOpcSub(const MachineFunction &MF) {
+  return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()
+             ? WebAssembly::SUB_I64
+             : WebAssembly::SUB_I32;
+}
+
+unsigned WebAssemblyFrameLowering::getOpcAnd(const MachineFunction &MF) {
+  return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()
+             ? WebAssembly::AND_I64
+             : WebAssembly::AND_I32;
+}
+
+unsigned
+WebAssemblyFrameLowering::getOpcGlobGet(const MachineFunction &MF) {
+  return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()
+             ? WebAssembly::GLOBAL_GET_I64
+             : WebAssembly::GLOBAL_GET_I32;
+}
+
+unsigned
+WebAssemblyFrameLowering::getOpcGlobSet(const MachineFunction &MF) {
+  return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()
+             ? WebAssembly::GLOBAL_SET_I64
+             : WebAssembly::GLOBAL_SET_I32;
+}
+
 void WebAssemblyFrameLowering::writeSPToGlobal(
     unsigned SrcReg, MachineFunction &MF, MachineBasicBlock &MBB,
     MachineBasicBlock::iterator &InsertStore, const DebugLoc &DL) const {
@@ -130,7 +181,8 @@
 
   const char *ES = "__stack_pointer";
   auto *SPSymbol = MF.createExternalSymbolName(ES);
-  BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::GLOBAL_SET_I32))
+
+  BuildMI(MBB, InsertStore, DL, TII->get(getOpcGlobSet(MF)))
       .addExternalSymbol(SPSymbol)
       .addReg(SrcReg);
 }
@@ -141,11 +193,12 @@
     MachineBasicBlock::iterator I) const {
   assert(!I->getOperand(0).getImm() && (hasFP(MF) || hasBP(MF)) &&
          "Call frame pseudos should only be used for dynamic stack adjustment");
-  const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+  auto &ST = MF.getSubtarget<WebAssemblySubtarget>();
+  const auto *TII = ST.getInstrInfo();
   if (I->getOpcode() == TII->getCallFrameDestroyOpcode() &&
       needsSPWriteback(MF)) {
     DebugLoc DL = I->getDebugLoc();
-    writeSPToGlobal(WebAssembly::SP32, MF, MBB, I, DL);
+    writeSPToGlobal(getSPReg(MF), MF, MBB, I, DL);
   }
   return MBB.erase(I);
 }
@@ -161,7 +214,8 @@
     return;
   uint64_t StackSize = MFI.getStackSize();
 
-  const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+  auto &ST = MF.getSubtarget<WebAssemblySubtarget>();
+  const auto *TII = ST.getInstrInfo();
   auto &MRI = MF.getRegInfo();
 
   auto InsertPt = MBB.begin();
@@ -172,13 +226,13 @@
 
   const TargetRegisterClass *PtrRC =
       MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
-  unsigned SPReg = WebAssembly::SP32;
+  unsigned SPReg = getSPReg(MF);
   if (StackSize)
     SPReg = MRI.createVirtualRegister(PtrRC);
 
   const char *ES = "__stack_pointer";
   auto *SPSymbol = MF.createExternalSymbolName(ES);
-  BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::GLOBAL_GET_I32), SPReg)
+  BuildMI(MBB, InsertPt, DL, TII->get(getOpcGlobGet(MF)), SPReg)
       .addExternalSymbol(SPSymbol);
 
   bool HasBP = hasBP(MF);
@@ -192,32 +246,30 @@
   if (StackSize) {
     // Subtract the frame size
     Register OffsetReg = MRI.createVirtualRegister(PtrRC);
-    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
+    BuildMI(MBB, InsertPt, DL, TII->get(getOpcConst(MF)), OffsetReg)
         .addImm(StackSize);
-    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::SUB_I32),
-            WebAssembly::SP32)
+    BuildMI(MBB, InsertPt, DL, TII->get(getOpcSub(MF)), getSPReg(MF))
         .addReg(SPReg)
         .addReg(OffsetReg);
   }
   if (HasBP) {
     Register BitmaskReg = MRI.createVirtualRegister(PtrRC);
     Align Alignment = MFI.getMaxAlign();
-    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), BitmaskReg)
-        .addImm((int)~(Alignment.value() - 1));
-    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::AND_I32),
-            WebAssembly::SP32)
-        .addReg(WebAssembly::SP32)
+    BuildMI(MBB, InsertPt, DL, TII->get(getOpcConst(MF)), BitmaskReg)
+        .addImm((int64_t) ~(Alignment.value() - 1));
+    BuildMI(MBB, InsertPt, DL, TII->get(getOpcAnd(MF)), getSPReg(MF))
+        .addReg(getSPReg(MF))
         .addReg(BitmaskReg);
   }
   if (hasFP(MF)) {
     // Unlike most conventional targets (where FP points to the saved FP),
     // FP points to the bottom of the fixed-size locals, so we can use positive
     // offsets in load/store instructions.
-    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY), WebAssembly::FP32)
-        .addReg(WebAssembly::SP32);
+    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY), getFPReg(MF))
+        .addReg(getSPReg(MF));
   }
   if (StackSize && needsSPWriteback(MF)) {
-    writeSPToGlobal(WebAssembly::SP32, MF, MBB, InsertPt, DL);
+    writeSPToGlobal(getSPReg(MF), MF, MBB, InsertPt, DL);
   }
 }
 
@@ -226,7 +278,8 @@
   uint64_t StackSize = MF.getFrameInfo().getStackSize();
   if (!needsSP(MF) || !needsSPWriteback(MF))
     return;
-  const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+  auto &ST = MF.getSubtarget<WebAssemblySubtarget>();
+  const auto *TII = ST.getInstrInfo();
   auto &MRI = MF.getRegInfo();
   auto InsertPt = MBB.getFirstTerminator();
   DebugLoc DL;
@@ -237,6 +290,7 @@
   // Restore the stack pointer. If we had fixed-size locals, add the offset
   // subtracted in the prolog.
   unsigned SPReg = 0;
+  unsigned SPFPReg = hasFP(MF) ? getFPReg(MF) : getSPReg(MF);
   if (hasBP(MF)) {
     auto FI = MF.getInfo<WebAssemblyFunctionInfo>();
     SPReg = FI->getBasePointerVreg();
@@ -244,16 +298,17 @@
     const TargetRegisterClass *PtrRC =
         MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
     Register OffsetReg = MRI.createVirtualRegister(PtrRC);
-    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
+    BuildMI(MBB, InsertPt, DL, TII->get(getOpcConst(MF)), OffsetReg)
         .addImm(StackSize);
-    // In the epilog we don't need to write the result back to the SP32 physreg
-    // because it won't be used again. We can use a stackified register instead.
+    // In the epilog we don't need to write the result back to the SP32/64
+    // physreg because it won't be used again. We can use a stackified register
+    // instead.
     SPReg = MRI.createVirtualRegister(PtrRC);
-    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::ADD_I32), SPReg)
-        .addReg(hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32)
+    BuildMI(MBB, InsertPt, DL, TII->get(getOpcAdd(MF)), SPReg)
+        .addReg(SPFPReg)
         .addReg(OffsetReg);
   } else {
-    SPReg = hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32;
+    SPReg = SPFPReg;
   }
 
   writeSPToGlobal(SPReg, MF, MBB, InsertPt, DL);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -77,6 +77,13 @@
     return;
   }
 
+  MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout());
+  auto GlobalGetIns = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
+                                        : WebAssembly::GLOBAL_GET_I32;
+  auto ConstIns =
+      PtrVT == MVT::i64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
+  auto AddIns = PtrVT == MVT::i64 ? WebAssembly::ADD_I64 : WebAssembly::ADD_I32;
+
   // Few custom selection stuff.
   SDLoc DL(Node);
   MachineFunction &MF = CurDAG->getMachineFunction();
@@ -140,20 +147,16 @@
                          false);
     }
 
-    MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout());
-    assert(PtrVT == MVT::i32 && "only wasm32 is supported for now");
-
     SDValue TLSBaseSym = CurDAG->getTargetExternalSymbol("__tls_base", PtrVT);
     SDValue TLSOffsetSym = CurDAG->getTargetGlobalAddress(
         GA->getGlobal(), DL, PtrVT, GA->getOffset(), 0);
 
-    MachineSDNode *TLSBase = CurDAG->getMachineNode(WebAssembly::GLOBAL_GET_I32,
-                                                    DL, MVT::i32, TLSBaseSym);
-    MachineSDNode *TLSOffset = CurDAG->getMachineNode(
-        WebAssembly::CONST_I32, DL, MVT::i32, TLSOffsetSym);
-    MachineSDNode *TLSAddress =
-        CurDAG->getMachineNode(WebAssembly::ADD_I32, DL, MVT::i32,
-                               SDValue(TLSBase, 0), SDValue(TLSOffset, 0));
+    MachineSDNode *TLSBase =
+        CurDAG->getMachineNode(GlobalGetIns, DL, PtrVT, TLSBaseSym);
+    MachineSDNode *TLSOffset =
+        CurDAG->getMachineNode(ConstIns, DL, PtrVT, TLSOffsetSym);
+    MachineSDNode *TLSAddress = CurDAG->getMachineNode(
+        AddIns, DL, PtrVT, SDValue(TLSBase, 0), SDValue(TLSOffset, 0));
     ReplaceNode(Node, TLSAddress);
     return;
   }
@@ -162,22 +165,16 @@
     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
     switch (IntNo) {
     case Intrinsic::wasm_tls_size: {
-      MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout());
-      assert(PtrVT == MVT::i32 && "only wasm32 is supported for now");
-
       MachineSDNode *TLSSize = CurDAG->getMachineNode(
-          WebAssembly::GLOBAL_GET_I32, DL, PtrVT,
-          CurDAG->getTargetExternalSymbol("__tls_size", MVT::i32));
+          GlobalGetIns, DL, PtrVT,
+          CurDAG->getTargetExternalSymbol("__tls_size", PtrVT));
       ReplaceNode(Node, TLSSize);
       return;
     }
     case Intrinsic::wasm_tls_align: {
-      MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout());
-      assert(PtrVT == MVT::i32 && "only wasm32 is supported for now");
-
       MachineSDNode *TLSAlign = CurDAG->getMachineNode(
-          WebAssembly::GLOBAL_GET_I32, DL, PtrVT,
-          CurDAG->getTargetExternalSymbol("__tls_align", MVT::i32));
+          GlobalGetIns, DL, PtrVT,
+          CurDAG->getTargetExternalSymbol("__tls_align", PtrVT));
       ReplaceNode(Node, TLSAlign);
       return;
     }
@@ -188,11 +185,8 @@
     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
     switch (IntNo) {
     case Intrinsic::wasm_tls_base: {
-      MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout());
-      assert(PtrVT == MVT::i32 && "only wasm32 is supported for now");
-
       MachineSDNode *TLSBase = CurDAG->getMachineNode(
-          WebAssembly::GLOBAL_GET_I32, DL, MVT::i32, MVT::Other,
+          GlobalGetIns, DL, PtrVT, MVT::Other,
           CurDAG->getTargetExternalSymbol("__tls_base", PtrVT),
           Node->getOperand(0));
       ReplaceNode(Node, TLSBase);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -209,6 +209,7 @@
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand);
 
   setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
+  setOperationAction(ISD::FrameIndex, MVT::i64, Custom);
   setOperationAction(ISD::CopyToReg, MVT::Other, Custom);
 
   // Expand these forms; we pattern-match the forms that we can handle in isel.
@@ -613,7 +614,11 @@
   if (VT.isVector())
     return VT.changeVectorElementTypeToInteger();
 
-  return TargetLowering::getSetCCResultType(DL, C, VT);
+  // So far, all branch instructions in Wasm take an I32 condition.
+  // The default TargetLowering::getSetCCResultType returns the pointer size,
+  // which would be useful to reduce instruction counts when testing
+  // against 64-bit pointers/values if at some point Wasm supports that.
+  return EVT::getIntegerVT(C, 32);
 }
 
 bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
--- a/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
@@ -408,8 +408,8 @@
       ++InsertPos;
     if (InsertPos->getOpcode() == WebAssembly::CATCH)
       ++InsertPos;
-    FrameLowering->writeSPToGlobal(WebAssembly::SP32, MF, MBB, InsertPos,
-                                   MBB.begin()->getDebugLoc());
+    FrameLowering->writeSPToGlobal(FrameLowering->getSPReg(MF), MF, MBB,
+                                   InsertPos, MBB.begin()->getDebugLoc());
   }
   return Changed;
 }
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
--- a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
@@ -81,8 +81,9 @@
         strcmp(Name, "__stack_pointer") == 0 || strcmp(Name, "__tls_base") == 0;
     WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
     WasmSym->setGlobalType(wasm::WasmGlobalType{
-        uint8_t(Subtarget.hasAddr64() ? wasm::WASM_TYPE_I64
-                                      : wasm::WASM_TYPE_I32),
+        uint8_t(Subtarget.hasAddr64() && strcmp(Name, "__table_base") != 0
+                    ? wasm::WASM_TYPE_I64
+                    : wasm::WASM_TYPE_I32),
         Mutable});
     return WasmSym;
   }
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -248,7 +248,8 @@
   }
 
   // Check for writes to __stack_pointer global.
-  if (MI.getOpcode() == WebAssembly::GLOBAL_SET_I32 &&
+  if ((MI.getOpcode() == WebAssembly::GLOBAL_SET_I32 ||
+       MI.getOpcode() == WebAssembly::GLOBAL_SET_I64) &&
       strcmp(MI.getOperand(0).getSymbolName(), "__stack_pointer") == 0)
     StackPointer = true;
 
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
@@ -88,16 +88,17 @@
 
   // If this is an address being added to a constant, fold the frame offset
   // into the constant.
-  if (MI.getOpcode() == WebAssembly::ADD_I32) {
+  if (MI.getOpcode() == WebAssemblyFrameLowering::getOpcAdd(MF)) {
     MachineOperand &OtherMO = MI.getOperand(3 - FIOperandNum);
     if (OtherMO.isReg()) {
       Register OtherMOReg = OtherMO.getReg();
       if (Register::isVirtualRegister(OtherMOReg)) {
         MachineInstr *Def = MF.getRegInfo().getUniqueVRegDef(OtherMOReg);
         // TODO: For now we just opportunistically do this in the case where
-        // the CONST_I32 happens to have exactly one def and one use. We
+        // the CONST_I32/64 happens to have exactly one def and one use. We
         // should generalize this to optimize in more cases.
-        if (Def && Def->getOpcode() == WebAssembly::CONST_I32 &&
+        if (Def && Def->getOpcode() ==
+              WebAssemblyFrameLowering::getOpcConst(MF) &&
             MRI.hasOneNonDBGUse(Def->getOperand(0).getReg())) {
           MachineOperand &ImmMO = Def->getOperand(1);
           ImmMO.setImm(ImmMO.getImm() + uint32_t(FrameOffset));
@@ -109,20 +110,22 @@
     }
   }
 
-  // Otherwise create an i32.add SP, offset and make it the operand.
+  // Otherwise create an i32/64.add SP, offset and make it the operand.
   const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
 
   unsigned FIRegOperand = FrameRegister;
   if (FrameOffset) {
-    // Create i32.add SP, offset and make it the operand.
+    // Create i32/64.add SP, offset and make it the operand.
     const TargetRegisterClass *PtrRC =
         MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
     Register OffsetOp = MRI.createVirtualRegister(PtrRC);
-    BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::CONST_I32),
+    BuildMI(MBB, *II, II->getDebugLoc(),
+            TII->get(WebAssemblyFrameLowering::getOpcConst(MF)),
             OffsetOp)
         .addImm(FrameOffset);
     FIRegOperand = MRI.createVirtualRegister(PtrRC);
-    BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::ADD_I32),
+    BuildMI(MBB, *II, II->getDebugLoc(),
+            TII->get(WebAssemblyFrameLowering::getOpcAdd(MF)),
             FIRegOperand)
         .addReg(FrameRegister)
         .addReg(OffsetOp);
diff --git a/llvm/test/CodeGen/WebAssembly/stack-alignment.ll b/llvm/test/CodeGen/WebAssembly/stack-alignment.ll
--- a/llvm/test/CodeGen/WebAssembly/stack-alignment.ll
+++ b/llvm/test/CodeGen/WebAssembly/stack-alignment.ll
@@ -1,22 +1,20 @@
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s
-
-target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
-target triple = "wasm32-unknown-unknown"
+; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck -DPTR=32 %s
+; RUN: llc < %s --mtriple=wasm64-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck -DPTR=64 %s
 
 declare void @somefunc(i32*)
 
 ; CHECK-LABEL: underalign:
 ; CHECK:      global.get $push[[L1:.+]]=, __stack_pointer{{$}}
-; CHECK-NEXT: i32.const $push[[L2:.+]]=, 16
-; CHECK-NEXT: i32.sub   $push[[L10:.+]]=, $pop[[L1]], $pop[[L2]]
+; CHECK-NEXT: i[[PTR]].const $push[[L2:.+]]=, 16
+; CHECK-NEXT: i[[PTR]].sub $push[[L10:.+]]=, $pop[[L1]], $pop[[L2]]
 ; CHECK-NEXT: local.tee $push{{.+}}=, [[SP:.+]], $pop[[L10]]
 
 ; CHECK:      local.get $push[[L3:.+]]=, [[SP]]{{$}}
-; CHECK:      i32.add   $push[[underaligned:.+]]=, $pop[[L3]], $pop{{.+}}
-; CHECK-NEXT: call      somefunc, $pop[[underaligned]]
+; CHECK:      i[[PTR]].add $push[[underaligned:.+]]=, $pop[[L3]], $pop{{.+}}
+; CHECK-NEXT: call somefunc, $pop[[underaligned]]
 
 ; CHECK:      local.get $push[[M4:.+]]=, [[SP]]{{$}}
-; CHECK:      i32.add   $push[[L5:.+]]=, $pop[[M4]], $pop{{.+}}
+; CHECK:      i[[PTR]].add $push[[L5:.+]]=, $pop[[M4]], $pop{{.+}}
 ; CHECK-NEXT: global.set __stack_pointer, $pop[[L5]]
 define void @underalign() {
 entry:
@@ -27,17 +25,17 @@
 
 ; CHECK-LABEL: overalign:
 ; CHECK:      global.get $push[[L10:.+]]=, __stack_pointer{{$}}
-; CHECK-NEXT: local.tee  $push[[L9:.+]]=, [[BP:.+]], $pop[[L10]]
-; CHECK-NEXT: i32.const  $push[[L2:.+]]=, 32
-; CHECK-NEXT: i32.sub    $push[[L8:.+]]=, $pop[[L9]], $pop[[L2]]
-; CHECK-NEXT: i32.const  $push[[L3:.+]]=, -32
-; CHECK-NEXT: i32.and    $push[[L7:.+]]=, $pop[[L8]], $pop[[L3]]
-; CHECK-NEXT: local.tee  $push{{.+}}=, [[SP:.+]], $pop[[L7]]
+; CHECK-NEXT: local.tee $push[[L9:.+]]=, [[BP:.+]], $pop[[L10]]
+; CHECK-NEXT: i[[PTR]].const $push[[L2:.+]]=, 32
+; CHECK-NEXT: i[[PTR]].sub $push[[L8:.+]]=, $pop[[L9]], $pop[[L2]]
+; CHECK-NEXT: i[[PTR]].const $push[[L3:.+]]=, -32
+; CHECK-NEXT: i[[PTR]].and $push[[L7:.+]]=, $pop[[L8]], $pop[[L3]]
+; CHECK-NEXT: local.tee $push{{.+}}=, [[SP:.+]], $pop[[L7]]
 
-; CHECK:      local.get  $push[[M5:.+]]=, [[SP]]{{$}}
-; CHECK:      call       somefunc, $pop[[M5]]{{$}}
+; CHECK:      local.get $push[[M5:.+]]=, [[SP]]{{$}}
+; CHECK:      call somefunc, $pop[[M5]]{{$}}
 
-; CHECK:      local.get  $push[[M6:.+]]=, [[BP]]{{$}}
+; CHECK:      local.get $push[[M6:.+]]=, [[BP]]{{$}}
 ; CHECK-NEXT: global.set __stack_pointer, $pop[[M6]]
 define void @overalign() {
 entry:
@@ -48,19 +46,19 @@
 
 ; CHECK-LABEL: over_and_normal_align:
 ; CHECK:      global.get $push[[L14:.+]]=, __stack_pointer{{$}}
-; CHECK-NEXT: local.tee  $push[[L13:.+]]=, [[BP:.+]], $pop[[L14]]
-; CHECK:      i32.sub    $push[[L12:.+]]=, $pop[[L13]], $pop{{.+}}
-; CHECK:      i32.and    $push[[L11:.+]]=, $pop[[L12]], $pop{{.+}}
-; CHECK-NEXT: local.tee  $push{{.+}}=, [[SP:.+]], $pop[[L11]]
+; CHECK-NEXT: local.tee $push[[L13:.+]]=, [[BP:.+]], $pop[[L14]]
+; CHECK:      i[[PTR]].sub $push[[L12:.+]]=, $pop[[L13]], $pop{{.+}}
+; CHECK:      i[[PTR]].and $push[[L11:.+]]=, $pop[[L12]], $pop{{.+}}
+; CHECK-NEXT: local.tee $push{{.+}}=, [[SP:.+]], $pop[[L11]]
 
 ; CHECK:      local.get  $push[[M6:.+]]=, [[SP]]{{$}}
-; CHECK:      i32.add    $push[[L6:.+]]=, $pop[[M6]], $pop{{.+}}
-; CHECK-NEXT: call       somefunc, $pop[[L6]]
-; CHECK:      local.get  $push[[M7:.+]]=, [[SP]]{{$}}
-; CHECK:      i32.add    $push[[L8:.+]]=, $pop[[M7]], $pop{{.+}}
-; CHECK-NEXT: call       somefunc, $pop[[L8]]
+; CHECK:      i[[PTR]].add $push[[L6:.+]]=, $pop[[M6]], $pop{{.+}}
+; CHECK-NEXT: call somefunc, $pop[[L6]]
+; CHECK:      local.get $push[[M7:.+]]=, [[SP]]{{$}}
+; CHECK:      i[[PTR]].add $push[[L8:.+]]=, $pop[[M7]], $pop{{.+}}
+; CHECK-NEXT: call somefunc, $pop[[L8]]
 
-; CHECK:      local.get  $push[[L6:.+]]=, [[BP]]{{$}}
+; CHECK:      local.get $push[[L6:.+]]=, [[BP]]{{$}}
 ; CHECK-NEXT: global.set __stack_pointer, $pop[[L6]]
 define void @over_and_normal_align() {
 entry:
@@ -73,14 +71,14 @@
 
 ; CHECK-LABEL: dynamic_overalign:
 ; CHECK:      global.get $push[[L18:.+]]=, __stack_pointer{{$}}
-; CHECK-NEXT: local.tee  $push[[L17:.+]]=, [[SP:.+]], $pop[[L18]]
-; CHECK-NEXT: local.set  [[BP:.+]], $pop[[L17]]
-; CHECK:      local.tee  $push{{.+}}=, [[SP_2:.+]], $pop{{.+}}
+; CHECK-NEXT: local.tee $push[[L17:.+]]=, [[SP:.+]], $pop[[L18]]
+; CHECK-NEXT: local.set [[BP:.+]], $pop[[L17]]
+; CHECK:      local.tee $push{{.+}}=, [[SP_2:.+]], $pop{{.+}}
 
-; CHECK:      local.get  $push[[M8:.+]]=, [[SP_2]]{{$}}
-; CHECK:      call       somefunc, $pop[[M8]]
+; CHECK:      local.get $push[[M8:.+]]=, [[SP_2]]{{$}}
+; CHECK:      call somefunc, $pop[[M8]]
 
-; CHECK:      local.get  $push[[M9:.+]]=, [[BP]]{{$}}
+; CHECK:      local.get $push[[M9:.+]]=, [[BP]]{{$}}
 ; CHECK-NEXT: global.set __stack_pointer, $pop[[M9]]
 define void @dynamic_overalign(i32 %num) {
 entry:
@@ -91,18 +89,18 @@
 
 ; CHECK-LABEL: overalign_and_dynamic:
 ; CHECK:      global.get $push[[L21:.+]]=, __stack_pointer{{$}}
-; CHECK-NEXT: local.tee  $push[[L20:.+]]=, [[BP:.+]], $pop[[L21]]
-; CHECK:      i32.sub    $push[[L19:.+]]=, $pop[[L20]], $pop{{.+}}
-; CHECK:      i32.and    $push[[L18:.+]]=, $pop[[L19]], $pop{{.+}}
-; CHECK:      local.tee  $push{{.+}}=, [[FP:.+]], $pop[[L18]]
-; CHECK:      local.get  $push[[M10:.+]]=, [[FP]]{{$}}
-; CHECK:      i32.sub    $push[[L16:.+]]=, $pop[[M10]], $pop{{.+}}
-; CHECK-NEXT: local.tee  $push{{.+}}=, [[SP:.+]], $pop[[L16]]
-
-; CHECK:      local.get  $push[[over:.+]]=, [[FP]]
-; CHECK-NEXT: call       somefunc, $pop[[over]]
-; CHECK:      local.get  $push[[another:.+]]=, [[SP]]
-; CHECK-NEXT: call       somefunc, $pop[[another]]
+; CHECK-NEXT: local.tee $push[[L20:.+]]=, [[BP:.+]], $pop[[L21]]
+; CHECK:      i[[PTR]].sub $push[[L19:.+]]=, $pop[[L20]], $pop{{.+}}
+; CHECK:      i[[PTR]].and $push[[L18:.+]]=, $pop[[L19]], $pop{{.+}}
+; CHECK:      local.tee $push{{.+}}=, [[FP:.+]], $pop[[L18]]
+; CHECK:      local.get $push[[M10:.+]]=, [[FP]]{{$}}
+; CHECK:      i[[PTR]].sub $push[[L16:.+]]=, $pop[[M10]], $pop{{.+}}
+; CHECK-NEXT: local.tee $push{{.+}}=, [[SP:.+]], $pop[[L16]]
+
+; CHECK:      local.get $push[[over:.+]]=, [[FP]]
+; CHECK-NEXT: call somefunc, $pop[[over]]
+; CHECK:      local.get $push[[another:.+]]=, [[SP]]
+; CHECK-NEXT: call somefunc, $pop[[another]]
 
 ; CHECK:      local.get  $push[[M11:.+]]=, [[BP]]{{$}}
 ; CHECK-NEXT: global.set __stack_pointer, $pop[[M11]]
@@ -117,23 +115,23 @@
 
 ; CHECK-LABEL: overalign_static_and_dynamic:
 ; CHECK:      global.get $push[[L26:.+]]=, __stack_pointer{{$}}
-; CHECK-NEXT: local.tee  $push[[L25:.+]]=, [[BP:.+]], $pop[[L26]]
-; CHECK:      i32.sub    $push[[L24:.+]]=, $pop[[L25]], $pop{{.+}}
-; CHECK:      i32.and    $push[[L23:.+]]=, $pop[[L24]], $pop{{.+}}
-; CHECK:      local.tee  $push{{.+}}=, [[FP:.+]], $pop[[L23]]
-; CHECK:      local.get  $push[[M12:.+]]=, [[FP]]{{$}}
-; CHECK:      i32.sub    $push[[L21:.+]]=, $pop[[M12]], $pop{{.+}}
-; CHECK-NEXT: local.tee  $push{{.+}}=, [[SP:.+]], $pop[[L21]]
-
-; CHECK:      local.get  $push[[L19:.+]]=, [[FP]]
-; CHECK:      local.tee  $push[[L18:.+]]=, [[FP_2:.+]], $pop[[L19]]
-; CHECK:      i32.add    $push[[over:.+]]=, $pop[[L18]], $pop{{.+}}
-; CHECK-NEXT: call       somefunc, $pop[[over]]
-; CHECK:      local.get  $push[[M12:.+]]=, [[SP]]
-; CHECK:      call       somefunc, $pop[[M12]]
-; CHECK:      local.get  $push[[M13:.+]]=, [[FP_2]]
-; CHECK:      i32.add    $push[[static:.+]]=, $pop[[M13]], $pop{{.+}}
-; CHECK-NEXT: call       somefunc, $pop[[static]]
+; CHECK-NEXT: local.tee $push[[L25:.+]]=, [[BP:.+]], $pop[[L26]]
+; CHECK:      i[[PTR]].sub $push[[L24:.+]]=, $pop[[L25]], $pop{{.+}}
+; CHECK:      i[[PTR]].and $push[[L23:.+]]=, $pop[[L24]], $pop{{.+}}
+; CHECK:      local.tee $push{{.+}}=, [[FP:.+]], $pop[[L23]]
+; CHECK:      local.get $push[[M12:.+]]=, [[FP]]{{$}}
+; CHECK:      i[[PTR]].sub $push[[L21:.+]]=, $pop[[M12]], $pop{{.+}}
+; CHECK-NEXT: local.tee $push{{.+}}=, [[SP:.+]], $pop[[L21]]
+
+; CHECK:      local.get $push[[L19:.+]]=, [[FP]]
+; CHECK:      local.tee $push[[L18:.+]]=, [[FP_2:.+]], $pop[[L19]]
+; CHECK:      i[[PTR]].add $push[[over:.+]]=, $pop[[L18]], $pop{{.+}}
+; CHECK-NEXT: call somefunc, $pop[[over]]
+; CHECK:      local.get $push[[M12:.+]]=, [[SP]]
+; CHECK:      call somefunc, $pop[[M12]]
+; CHECK:      local.get $push[[M13:.+]]=, [[FP_2]]
+; CHECK:      i[[PTR]].add $push[[static:.+]]=, $pop[[M13]], $pop{{.+}}
+; CHECK-NEXT: call somefunc, $pop[[static]]
 
 ; CHECK:      local.get  $push[[M14:.+]]=, [[BP]]{{$}}
 ; CHECK-NEXT: global.set __stack_pointer, $pop[[M14]]
diff --git a/llvm/test/CodeGen/WebAssembly/userstack.ll b/llvm/test/CodeGen/WebAssembly/userstack.ll
--- a/llvm/test/CodeGen/WebAssembly/userstack.ll
+++ b/llvm/test/CodeGen/WebAssembly/userstack.ll
@@ -1,18 +1,16 @@
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s
-
-target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
-target triple = "wasm32-unknown-unknown"
+; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck -DPTR=32 %s
+; RUN: llc < %s --mtriple=wasm64-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck -DPTR=64 %s
 
 declare void @ext_func(i64* %ptr)
 declare void @ext_func_i32(i32* %ptr)
 
 ; CHECK-LABEL: alloca32:
 ; Check that there is an extra local for the stack pointer.
-; CHECK: .local i32{{$}}
+; CHECK: .local i[[PTR]]{{$}}
 define void @alloca32() noredzone {
  ; CHECK-NEXT: global.get $push[[L2:.+]]=, __stack_pointer{{$}}
- ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 16
- ; CHECK-NEXT: i32.sub $push[[L9:.+]]=, $pop[[L2]], $pop[[L3]]
+ ; CHECK-NEXT: i[[PTR]].const $push[[L3:.+]]=, 16
+ ; CHECK-NEXT: i[[PTR]].sub $push[[L9:.+]]=, $pop[[L2]], $pop[[L3]]
  ; CHECK-NEXT: local.tee $push[[L8:.+]]=, [[SP:.+]], $pop[[L9]]{{$}}
  ; CHECK-NEXT: global.set __stack_pointer, $pop[[L8]]{{$}}
  %retval = alloca i32
@@ -21,18 +19,18 @@
  ; CHECK: i32.store 12($pop[[L4]]), $pop[[L0]]
  store i32 0, i32* %retval
  ; CHECK: local.get $push[[L6:.+]]=, [[SP]]{{$}}
- ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 16
- ; CHECK-NEXT: i32.add $push[[L7:.+]]=, $pop[[L6]], $pop[[L5]]
+ ; CHECK-NEXT: i[[PTR]].const $push[[L5:.+]]=, 16
+ ; CHECK-NEXT: i[[PTR]].add $push[[L7:.+]]=, $pop[[L6]], $pop[[L5]]
  ; CHECK-NEXT: global.set __stack_pointer, $pop[[L7]]
  ret void
 }
 
 ; CHECK-LABEL: alloca3264:
-; CHECK: .local i32{{$}}
+; CHECK: .local i[[PTR]]{{$}}
 define void @alloca3264() {
  ; CHECK: global.get $push[[L3:.+]]=, __stack_pointer{{$}}
- ; CHECK-NEXT: i32.const $push[[L4:.+]]=, 16
- ; CHECK-NEXT: i32.sub $push[[L6:.+]]=, $pop[[L3]], $pop[[L4]]
+ ; CHECK-NEXT: i[[PTR]].const $push[[L4:.+]]=, 16
+ ; CHECK-NEXT: i[[PTR]].sub $push[[L6:.+]]=, $pop[[L3]], $pop[[L4]]
  ; CHECK-NEXT: local.tee $push[[L5:.+]]=, [[SP:.+]], $pop[[L6]]
  %r1 = alloca i32
  %r2 = alloca double
@@ -48,17 +46,17 @@
 }
 
 ; CHECK-LABEL: allocarray:
-; CHECK: .local i32{{$}}
+; CHECK: .local i[[PTR]]{{$}}
 define void @allocarray() {
  ; CHECK-NEXT: global.get $push[[L4:.+]]=, __stack_pointer{{$}}
- ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 144{{$}}
- ; CHECK-NEXT: i32.sub $push[[L12:.+]]=, $pop[[L4]], $pop[[L5]]
+ ; CHECK-NEXT: i[[PTR]].const $push[[L5:.+]]=, 144{{$}}
+ ; CHECK-NEXT: i[[PTR]].sub $push[[L12:.+]]=, $pop[[L4]], $pop[[L5]]
  ; CHECK-NEXT: local.tee $push[[L11:.+]]=, 0, $pop[[L12]]
  ; CHECK-NEXT: global.set __stack_pointer, $pop[[L11]]
  %r = alloca [33 x i32]
 
- ; CHECK:      i32.const $push{{.+}}=, 24
- ; CHECK-NEXT: i32.add $push[[L3:.+]]=, $pop{{.+}}, $pop{{.+}}
+ ; CHECK:      i[[PTR]].const $push{{.+}}=, 24
+ ; CHECK-NEXT: i[[PTR]].add $push[[L3:.+]]=, $pop{{.+}}, $pop{{.+}}
  ; CHECK-NEXT: i32.const $push[[L1:.+]]=, 1{{$}}
  ; CHECK-NEXT: i32.store 0($pop[[L3]]), $pop[[L1]]{{$}}
  ; CHECK-NEXT: local.get $push[[L4:.+]]=, 0{{$}}
@@ -70,16 +68,16 @@
  store i32 1, i32* %p2
 
  ; CHECK-NEXT: local.get $push[[L2:.+]]=, [[SP]]{{$}}
- ; CHECK-NEXT: i32.const $push[[L7:.+]]=, 144
- ; CHECK-NEXT: i32.add $push[[L8:.+]]=, $pop[[L2]], $pop[[L7]]
+ ; CHECK-NEXT: i[[PTR]].const $push[[L7:.+]]=, 144
+ ; CHECK-NEXT: i[[PTR]].add $push[[L8:.+]]=, $pop[[L2]], $pop[[L7]]
  ; CHECK-NEXT: global.set __stack_pointer, $pop[[L8]]
  ret void
 }
 
 ; CHECK-LABEL: non_mem_use
 define void @non_mem_use(i8** %addr) {
- ; CHECK: i32.const $push[[L2:.+]]=, 48
- ; CHECK-NEXT: i32.sub $push[[L12:.+]]=, {{.+}}, $pop[[L2]]
+ ; CHECK: i[[PTR]].const $push[[L2:.+]]=, 48
+ ; CHECK-NEXT: i[[PTR]].sub $push[[L12:.+]]=, {{.+}}, $pop[[L2]]
  ; CHECK-NEXT: local.tee $push[[L11:.+]]=, [[SP:.+]], $pop[[L12]]
  ; CHECK-NEXT: global.set __stack_pointer, $pop[[L11]]
  %buf = alloca [27 x i8], align 16
@@ -87,8 +85,8 @@
  %r2 = alloca i64
  ; %r is at SP+8
  ; CHECK: local.get $push[[L3:.+]]=, [[SP]]
- ; CHECK: i32.const $push[[OFF:.+]]=, 8
- ; CHECK-NEXT: i32.add $push[[ARG1:.+]]=, $pop[[L3]], $pop[[OFF]]
+ ; CHECK: i[[PTR]].const $push[[OFF:.+]]=, 8
+ ; CHECK-NEXT: i[[PTR]].add $push[[ARG1:.+]]=, $pop[[L3]], $pop[[OFF]]
  ; CHECK-NEXT: call ext_func, $pop[[ARG1]]
  call void @ext_func(i64* %r)
  ; %r2 is at SP+0, no add needed
@@ -98,20 +96,20 @@
  ; Use as a value, but in a store
  ; %buf is at SP+16
  ; CHECK: local.get $push[[L5:.+]]=, [[SP]]
- ; CHECK: i32.const $push[[OFF:.+]]=, 16
- ; CHECK-NEXT: i32.add $push[[VAL:.+]]=, $pop[[L5]], $pop[[OFF]]
- ; CHECK-NEXT: i32.store 0($pop{{.+}}), $pop[[VAL]]
+ ; CHECK: i[[PTR]].const $push[[OFF:.+]]=, 16
+ ; CHECK-NEXT: i[[PTR]].add $push[[VAL:.+]]=, $pop[[L5]], $pop[[OFF]]
+ ; CHECK-NEXT: i[[PTR]].store 0($pop{{.+}}), $pop[[VAL]]
  %gep = getelementptr inbounds [27 x i8], [27 x i8]* %buf, i32 0, i32 0
  store i8* %gep, i8** %addr
  ret void
 }
 
 ; CHECK-LABEL: allocarray_inbounds:
-; CHECK: .local i32{{$}}
+; CHECK: .local i[[PTR]]{{$}}
 define void @allocarray_inbounds() {
  ; CHECK: global.get $push[[L3:.+]]=, __stack_pointer{{$}}
- ; CHECK-NEXT: i32.const $push[[L4:.+]]=, 32{{$}}
- ; CHECK-NEXT: i32.sub $push[[L11:.+]]=, $pop[[L3]], $pop[[L4]]
+ ; CHECK-NEXT: i[[PTR]].const $push[[L4:.+]]=, 32{{$}}
+ ; CHECK-NEXT: i[[PTR]].sub $push[[L11:.+]]=, $pop[[L3]], $pop[[L4]]
  ; CHECK-NEXT: local.tee $push[[L10:.+]]=, [[SP:.+]], $pop[[L11]]
  ; CHECK-NEXT: global.set __stack_pointer, $pop[[L10]]{{$}}
  %r = alloca [5 x i32]
@@ -125,8 +123,8 @@
  store i32 1, i32* %p2
  call void @ext_func(i64* null);
  ; CHECK: call ext_func
- ; CHECK: i32.const $push[[L5:.+]]=, 32{{$}}
- ; CHECK-NEXT: i32.add $push[[L7:.+]]=, ${{.+}}, $pop[[L5]]
+ ; CHECK: i[[PTR]].const $push[[L5:.+]]=, 32{{$}}
+ ; CHECK-NEXT: i[[PTR]].add $push[[L7:.+]]=, ${{.+}}, $pop[[L5]]
  ; CHECK-NEXT: global.set __stack_pointer, $pop[[L7]]
  ret void
 }
@@ -136,7 +134,7 @@
  ; CHECK: global.get $push[[L13:.+]]=, __stack_pointer{{$}}
  ; CHECK-NEXT: local.tee $push[[L12:.+]]=, [[SP:.+]], $pop[[L13]]{{$}}
  ; Target independent codegen bumps the stack pointer.
- ; CHECK: i32.sub
+ ; CHECK: i[[PTR]].sub
  ; Check that SP is written back to memory after decrement
  ; CHECK: global.set __stack_pointer,
  %r = alloca i32, i32 %alloc
@@ -152,12 +150,12 @@
  ; CHECK: global.get $push[[L13:.+]]=, __stack_pointer{{$}}
  ; CHECK-NEXT: local.tee $push[[L12:.+]]=, [[SP:.+]], $pop[[L13]]{{$}}
  ; Target independent codegen bumps the stack pointer
- ; CHECK: i32.sub
+ ; CHECK: i[[PTR]].sub
  %r = alloca i32, i32 %alloc
- ; CHECK-NEXT: local.tee       $push[[L8:.+]]=, {{.+}}, $pop
- ; CHECK: local.get $push[[L7:.+]]=, 0{{$}}
- ; CHECK-NEXT: i32.const       $push[[L6:.+]]=, 0{{$}}
- ; CHECK-NEXT: i32.store       0($pop[[L7]]), $pop[[L6]]{{$}}
+ ; CHECK-NEXT: local.tee $push[[L8:.+]]=, [[SP2:.+]], $pop
+ ; CHECK: local.get $push[[L7:.+]]=, [[SP2]]{{$}}
+ ; CHECK-NEXT: i32.const $push[[L6:.+]]=, 0{{$}}
+ ; CHECK-NEXT: i32.store 0($pop[[L7]]), $pop[[L6]]{{$}}
  store i32 0, i32* %r
  ; CHECK-NEXT: return
  ret void
@@ -167,8 +165,8 @@
 define void @dynamic_static_alloca(i32 %alloc) noredzone {
  ; Decrement SP in the prolog by the static amount and writeback to memory.
  ; CHECK: global.get $push[[L11:.+]]=, __stack_pointer{{$}}
- ; CHECK-NEXT: i32.const $push[[L12:.+]]=, 16
- ; CHECK-NEXT: i32.sub $push[[L23:.+]]=, $pop[[L11]], $pop[[L12]]
+ ; CHECK-NEXT: i[[PTR]].const $push[[L12:.+]]=, 16
+ ; CHECK-NEXT: i[[PTR]].sub $push[[L23:.+]]=, $pop[[L11]], $pop[[L12]]
  ; CHECK-NEXT: local.tee $push[[L22:.+]]=, [[SP:.+]], $pop[[L23]]
  ; CHECK-NEXT: global.set __stack_pointer, $pop[[L22]]
 
@@ -181,7 +179,7 @@
  store volatile i32 101, i32* %static
 
  ; Decrement SP in the body by the dynamic amount.
- ; CHECK: i32.sub
+ ; CHECK: i[[PTR]].sub
  ; CHECK: local.tee $push[[L16:.+]]=, [[dynamic_local:.+]], $pop{{.+}}
  ; CHECK: local.tee $push[[L15:.+]]=, [[other:.+]], $pop[[L16]]{{$}}
  ; CHECK: global.set __stack_pointer, $pop[[L15]]{{$}}
@@ -201,7 +199,7 @@
  store volatile i32 103, i32* %dynamic
 
  ; Decrement SP in the body by the dynamic amount.
- ; CHECK: i32.sub
+ ; CHECK: i[[PTR]].sub
  ; CHECK: local.tee $push{{.+}}=, [[dynamic2_local:.+]], $pop{{.+}}
  %dynamic.2 = alloca i32, i32 %alloc
 
@@ -224,8 +222,8 @@
 
  ; Writeback to memory.
  ; CHECK: local.get $push[[L24:.+]]=, [[FP]]{{$}}
- ; CHECK: i32.const $push[[L18:.+]]=, 16
- ; CHECK-NEXT: i32.add $push[[L19:.+]]=, $pop[[L24]], $pop[[L18]]
+ ; CHECK: i[[PTR]].const $push[[L18:.+]]=, 16
+ ; CHECK-NEXT: i[[PTR]].add $push[[L19:.+]]=, $pop[[L24]], $pop[[L18]]
  ; CHECK-NEXT: global.set __stack_pointer, $pop[[L19]]
  ret void
 }
@@ -273,11 +271,11 @@
 ; CHECK-LABEL: copytoreg_fi:
 define void @copytoreg_fi(i1 %cond, i32* %b) {
 entry:
- ; CHECK: i32.const $push[[L1:.+]]=, 16
- ; CHECK-NEXT: i32.sub $push[[L3:.+]]=, {{.+}}, $pop[[L1]]
+ ; CHECK: i[[PTR]].const $push[[L1:.+]]=, 16
+ ; CHECK-NEXT: i[[PTR]].sub $push[[L3:.+]]=, {{.+}}, $pop[[L1]]
  %addr = alloca i32
- ; CHECK: i32.const $push[[OFF:.+]]=, 12
- ; CHECK-NEXT: i32.add $push[[ADDR:.+]]=, $pop[[L3]], $pop[[OFF]]
+ ; CHECK: i[[PTR]].const $push[[OFF:.+]]=, 12
+ ; CHECK-NEXT: i[[PTR]].add $push[[ADDR:.+]]=, $pop[[L3]], $pop[[OFF]]
  ; CHECK-NEXT: local.set [[COPY:.+]], $pop[[ADDR]]
  br label %body
 body:
@@ -309,7 +307,7 @@
 ; Test __builtin_frame_address(1).
 
 ; CHECK-LABEL: frameaddress_1:
-; CHECK:      i32.const $push0=, 0{{$}}
+; CHECK:      i[[PTR]].const $push0=, 0{{$}}
 ; CHECK-NEXT: call use_i8_star, $pop0{{$}}
 ; CHECK-NEXT: return{{$}}
 define void @frameaddress_1() {
@@ -330,6 +328,6 @@
   ret void
 }
 
-; CHECK: .globaltype	__stack_pointer, i32{{$}}
+; CHECK: .globaltype	__stack_pointer, i[[PTR]]{{$}}
 
 ; TODO: test over-aligned alloca
diff --git a/llvm/test/MC/WebAssembly/stack-ptr.ll b/llvm/test/MC/WebAssembly/stack-ptr.ll
--- a/llvm/test/MC/WebAssembly/stack-ptr.ll
+++ b/llvm/test/MC/WebAssembly/stack-ptr.ll
@@ -1,6 +1,5 @@
-; RUN: llc -filetype=obj %s -o - | obj2yaml | FileCheck %s
-
-target triple = "wasm32-unknown-unknown"
+; RUN: llc --mtriple=wasm32-unknown-unknown -filetype=obj %s -o - | obj2yaml | FileCheck --check-prefixes CHECK,CHK32 %s
+; RUN: llc --mtriple=wasm64-unknown-unknown -filetype=obj %s -o - | obj2yaml | FileCheck --check-prefixes CHECK,CHK64 %s
 
 ; Function that uses explict stack, and should generate a reference to
 ; __stack_pointer, along with the corresponding reloction entry.
@@ -15,7 +14,8 @@
 ; CHECK:       - Module:          env
 ; CHECK:         Field:           __stack_pointer
 ; CHECK:         Kind:            GLOBAL
-; CHECK:         GlobalType:      I32
+; CHK32:         GlobalType:      I32
+; CHK64:         GlobalType:      I64
 ; CHECK:         GlobalMutable:   true
 ; CHECK:   - Type:            CODE
 ; CHECK:     Relocations:
diff --git a/llvm/test/MC/WebAssembly/wasm64.s b/llvm/test/MC/WebAssembly/wasm64.s
--- a/llvm/test/MC/WebAssembly/wasm64.s
+++ b/llvm/test/MC/WebAssembly/wasm64.s
@@ -51,6 +51,11 @@
     i64.const   0
     f32.store   .L.str    # relocatable offset!
 
+    ### 64-bit SP
+
+    global.get  __stack_pointer
+    drop
+
     end_function
 
     .section    .rodata..L.str,"",@
@@ -62,7 +67,7 @@
     .size       .L.str, 24
 
     .globaltype myglob64, i64
-
+    .globaltype __stack_pointer, i64
 
 
 # CHECK:              .functype       test (i64) -> ()
@@ -155,6 +160,11 @@
 # BIN-NEXT:         Kind:            GLOBAL
 # BIN-NEXT:         GlobalType:      I64
 # BIN-NEXT:         GlobalMutable:   true
+# BIN-NEXT:       - Module:          env
+# BIN-NEXT:         Field:           __stack_pointer
+# BIN-NEXT:         Kind:            GLOBAL
+# BIN-NEXT:         GlobalType:      I64
+# BIN-NEXT:         GlobalMutable:   true
 # BIN-NEXT:   - Type:            FUNCTION
 # BIN-NEXT:     FunctionTypes:   [ 0 ]
 # BIN-NEXT:   - Type:            DATACOUNT
@@ -179,12 +189,15 @@
 # BIN-NEXT:       - Type:            R_WASM_MEMORY_ADDR_LEB64
 # BIN-NEXT:         Index:           1
 # BIN-NEXT:         Offset:          0x00000078
+# BIN-NEXT:       - Type: R_WASM_GLOBAL_INDEX_LEB
+# BIN-NEXT:         Index: 3
+# BIN-NEXT:         Offset: 0x00000083
 # BIN-NEXT:     Functions:
 # BIN-NEXT:       - Index:           0
 # BIN-NEXT:         Locals:
 # BIN-NEXT:           - Type:            I64
 # BIN-NEXT:             Count:           1
-# BIN-NEXT:         Body:            42002A02001A20002A02001A42808080808080808080002A02001A2380808080002A02001A42002A02808080808080808080001A4300000000420038020043000000002000380200430000000042808080808080808080003802004300000000238080808000380200430000000042003802808080808080808080000B
+# BIN-NEXT:         Body:            42002A02001A20002A02001A42808080808080808080002A02001A2380808080002A02001A42002A02808080808080808080001A4300000000420038020043000000002000380200430000000042808080808080808080003802004300000000238080808000380200430000000042003802808080808080808080002381808080001A0B
 # BIN-NEXT:   - Type:            DATA
 # BIN-NEXT:     Relocations:
 # BIN-NEXT:       - Type:            R_WASM_MEMORY_ADDR_I64
@@ -217,6 +230,11 @@
 # BIN-NEXT:         Name:            myglob64
 # BIN-NEXT:         Flags:           [ UNDEFINED ]
 # BIN-NEXT:         Global:          0
+# BIN-NEXT:       - Index:           3
+# BIN-NEXT:         Kind:            GLOBAL
+# BIN-NEXT:         Name:            __stack_pointer
+# BIN-NEXT:         Flags:           [ UNDEFINED ]
+# BIN-NEXT:         Global:          1
 # BIN-NEXT:     SegmentInfo:
 # BIN-NEXT:       - Index:           0
 # BIN-NEXT:         Name:            .rodata..L.str