Index: llvm/trunk/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h =================================================================== --- llvm/trunk/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ llvm/trunk/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -331,14 +331,6 @@ } } -/// The operand number of the load or store address in load/store instructions. -static const unsigned LoadAddressOperandNo = 3; -static const unsigned StoreAddressOperandNo = 2; - -/// The operand number of the load or store p2align in load/store instructions. -static const unsigned LoadP2AlignOperandNo = 1; -static const unsigned StoreP2AlignOperandNo = 0; - /// This is used to indicate block signatures. enum class ExprType : unsigned { Void = 0x40, Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td @@ -11,6 +11,7 @@ /// //===----------------------------------------------------------------------===// +let UseNamedOperandTable = 1 in multiclass ATOMIC_I pattern_r, string asmstr_r = "", string asmstr_s = "", bits<32> atomic_op = -1> { @@ -810,9 +811,9 @@ defm "" : ATOMIC_I<(outs rc:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$exp, - rc:$new), + rc:$new_), (outs), (ins P2Align:$p2align, offset32_op:$off), [], - !strconcat(name, "\t$dst, ${off}(${addr})${p2align}, $exp, $new"), + !strconcat(name, "\t$dst, ${off}(${addr})${p2align}, $exp, $new_"), !strconcat(name, "\t${off}${p2align}"), atomic_op>; } Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrInfo.h =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrInfo.h +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrInfo.h @@ -21,8 +21,17 @@ #define GET_INSTRINFO_HEADER #include "WebAssemblyGenInstrInfo.inc" +#define GET_INSTRINFO_OPERAND_ENUM +#include "WebAssemblyGenInstrInfo.inc" + namespace llvm { +namespace WebAssembly { + +int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex); + +} + class WebAssemblySubtarget; class WebAssemblyInstrInfo final : public WebAssemblyGenInstrInfo { Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -27,6 +27,10 @@ #define GET_INSTRINFO_CTOR_DTOR #include "WebAssemblyGenInstrInfo.inc" +// defines WebAssembly::getNamedOperandIdx +#define GET_INSTRINFO_NAMED_OPS +#include "WebAssemblyGenInstrInfo.inc" + WebAssemblyInstrInfo::WebAssemblyInstrInfo(const WebAssemblySubtarget &STI) : WebAssemblyGenInstrInfo(WebAssembly::ADJCALLSTACKDOWN, WebAssembly::ADJCALLSTACKUP, Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrMemory.td =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrMemory.td +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -52,7 +52,7 @@ // Defines atomic and non-atomic loads, regular and extending. multiclass WebAssemblyLoad { - let mayLoad = 1 in + let mayLoad = 1, UseNamedOperandTable = 1 in defm "": I<(outs rc:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), (outs), (ins P2Align:$p2align, offset32_op:$off), @@ -294,7 +294,7 @@ // Defines atomic and non-atomic stores, regular and truncating multiclass WebAssemblyStore { - let mayStore = 1 in + let mayStore = 1, UseNamedOperandTable = 1 in defm "" : I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$val), (outs), Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -41,12 +41,12 @@ // Load: v128.load multiclass SIMDLoad { - let mayLoad = 1 in + let mayLoad = 1, UseNamedOperandTable = 1 in defm LOAD_#vec_t : - SIMD_I<(outs V128:$dst), (ins P2Align:$align, offset32_op:$off, I32:$addr), - (outs), (ins P2Align:$align, offset32_op:$off), [], - "v128.load\t$dst, ${off}(${addr})$align", - "v128.load\t$off$align", 0>; + SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), + (outs), (ins P2Align:$p2align, offset32_op:$off), [], + "v128.load\t$dst, ${off}(${addr})$p2align", + "v128.load\t$off$p2align", 0>; } foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { @@ -65,12 +65,12 @@ // Store: v128.store multiclass SIMDStore { - let mayStore = 1 in + let mayStore = 1, UseNamedOperandTable = 1 in defm STORE_#vec_t : - SIMD_I<(outs), (ins P2Align:$align, offset32_op:$off, I32:$addr, V128:$vec), - (outs), (ins P2Align:$align, offset32_op:$off), [], - "v128.store\t${off}(${addr})$align, $vec", - "v128.store\t$off$align", 1>; + SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, V128:$vec), + (outs), (ins P2Align:$p2align, offset32_op:$off), [], + "v128.store\t${off}(${addr})$p2align, $vec", + "v128.store\t$off$p2align", 1>; } foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp @@ -70,13 +70,16 @@ // If this is the address operand of a load or store, make it relative to SP // and fold the frame offset directly in. - if ((MI.mayLoad() && FIOperandNum == WebAssembly::LoadAddressOperandNo) || - (MI.mayStore() && FIOperandNum == WebAssembly::StoreAddressOperandNo)) { - assert(FrameOffset >= 0 && MI.getOperand(FIOperandNum - 1).getImm() >= 0); - int64_t Offset = MI.getOperand(FIOperandNum - 1).getImm() + FrameOffset; + unsigned AddrOperandNum = WebAssembly::getNamedOperandIdx( + MI.getOpcode(), WebAssembly::OpName::addr); + if (AddrOperandNum == FIOperandNum) { + unsigned OffsetOperandNum = WebAssembly::getNamedOperandIdx( + MI.getOpcode(), WebAssembly::OpName::off); + assert(FrameOffset >= 0 && MI.getOperand(OffsetOperandNum).getImm() >= 0); + int64_t Offset = MI.getOperand(OffsetOperandNum).getImm() + FrameOffset; if (static_cast(Offset) <= std::numeric_limits::max()) { - MI.getOperand(FIOperandNum - 1).setImm(Offset); + MI.getOperand(OffsetOperandNum).setImm(Offset); MI.getOperand(FIOperandNum) .ChangeToRegister(FrameRegister, /*IsDef=*/false); return; Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp @@ -13,6 +13,7 @@ #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "WebAssembly.h" +#include "WebAssemblyInstrInfo.h" #include "WebAssemblyMachineFunctionInfo.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -83,114 +84,11 @@ for (auto &MBB : MF) { for (auto &MI : MBB) { - switch (MI.getOpcode()) { - case WebAssembly::LOAD_I32: - case WebAssembly::LOAD_I64: - case WebAssembly::LOAD_F32: - case WebAssembly::LOAD_F64: - case WebAssembly::LOAD_v16i8: - case WebAssembly::LOAD_v8i16: - case WebAssembly::LOAD_v4i32: - case WebAssembly::LOAD_v2i64: - case WebAssembly::LOAD_v4f32: - case WebAssembly::LOAD_v2f64: - case WebAssembly::LOAD8_S_I32: - case WebAssembly::LOAD8_U_I32: - case WebAssembly::LOAD16_S_I32: - case WebAssembly::LOAD16_U_I32: - case WebAssembly::LOAD8_S_I64: - case WebAssembly::LOAD8_U_I64: - case WebAssembly::LOAD16_S_I64: - case WebAssembly::LOAD16_U_I64: - case WebAssembly::LOAD32_S_I64: - case WebAssembly::LOAD32_U_I64: - case WebAssembly::ATOMIC_LOAD_I32: - case WebAssembly::ATOMIC_LOAD8_U_I32: - case WebAssembly::ATOMIC_LOAD16_U_I32: - case WebAssembly::ATOMIC_LOAD_I64: - case WebAssembly::ATOMIC_LOAD8_U_I64: - case WebAssembly::ATOMIC_LOAD16_U_I64: - case WebAssembly::ATOMIC_LOAD32_U_I64: - case WebAssembly::ATOMIC_RMW8_U_ADD_I32: - case WebAssembly::ATOMIC_RMW8_U_ADD_I64: - case WebAssembly::ATOMIC_RMW8_U_SUB_I32: - case WebAssembly::ATOMIC_RMW8_U_SUB_I64: - case WebAssembly::ATOMIC_RMW8_U_AND_I32: - case WebAssembly::ATOMIC_RMW8_U_AND_I64: - case WebAssembly::ATOMIC_RMW8_U_OR_I32: - case WebAssembly::ATOMIC_RMW8_U_OR_I64: - case WebAssembly::ATOMIC_RMW8_U_XOR_I32: - case WebAssembly::ATOMIC_RMW8_U_XOR_I64: - case WebAssembly::ATOMIC_RMW8_U_XCHG_I32: - case WebAssembly::ATOMIC_RMW8_U_XCHG_I64: - case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I32: - case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64: - case WebAssembly::ATOMIC_RMW16_U_ADD_I32: - case WebAssembly::ATOMIC_RMW16_U_ADD_I64: - case WebAssembly::ATOMIC_RMW16_U_SUB_I32: - case WebAssembly::ATOMIC_RMW16_U_SUB_I64: - case WebAssembly::ATOMIC_RMW16_U_AND_I32: - case WebAssembly::ATOMIC_RMW16_U_AND_I64: - case WebAssembly::ATOMIC_RMW16_U_OR_I32: - case WebAssembly::ATOMIC_RMW16_U_OR_I64: - case WebAssembly::ATOMIC_RMW16_U_XOR_I32: - case WebAssembly::ATOMIC_RMW16_U_XOR_I64: - case WebAssembly::ATOMIC_RMW16_U_XCHG_I32: - case WebAssembly::ATOMIC_RMW16_U_XCHG_I64: - case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I32: - case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64: - case WebAssembly::ATOMIC_RMW_ADD_I32: - case WebAssembly::ATOMIC_RMW32_U_ADD_I64: - case WebAssembly::ATOMIC_RMW_SUB_I32: - case WebAssembly::ATOMIC_RMW32_U_SUB_I64: - case WebAssembly::ATOMIC_RMW_AND_I32: - case WebAssembly::ATOMIC_RMW32_U_AND_I64: - case WebAssembly::ATOMIC_RMW_OR_I32: - case WebAssembly::ATOMIC_RMW32_U_OR_I64: - case WebAssembly::ATOMIC_RMW_XOR_I32: - case WebAssembly::ATOMIC_RMW32_U_XOR_I64: - case WebAssembly::ATOMIC_RMW_XCHG_I32: - case WebAssembly::ATOMIC_RMW32_U_XCHG_I64: - case WebAssembly::ATOMIC_RMW_CMPXCHG_I32: - case WebAssembly::ATOMIC_RMW32_U_CMPXCHG_I64: - case WebAssembly::ATOMIC_RMW_ADD_I64: - case WebAssembly::ATOMIC_RMW_SUB_I64: - case WebAssembly::ATOMIC_RMW_AND_I64: - case WebAssembly::ATOMIC_RMW_OR_I64: - case WebAssembly::ATOMIC_RMW_XOR_I64: - case WebAssembly::ATOMIC_RMW_XCHG_I64: - case WebAssembly::ATOMIC_RMW_CMPXCHG_I64: - case WebAssembly::ATOMIC_NOTIFY: - case WebAssembly::ATOMIC_WAIT_I32: - case WebAssembly::ATOMIC_WAIT_I64: - rewriteP2Align(MI, WebAssembly::LoadP2AlignOperandNo); - break; - case WebAssembly::STORE_I32: - case WebAssembly::STORE_I64: - case WebAssembly::STORE_F32: - case WebAssembly::STORE_F64: - case WebAssembly::STORE_v16i8: - case WebAssembly::STORE_v8i16: - case WebAssembly::STORE_v4i32: - case WebAssembly::STORE_v2i64: - case WebAssembly::STORE_v4f32: - case WebAssembly::STORE_v2f64: - case WebAssembly::STORE8_I32: - case WebAssembly::STORE16_I32: - case WebAssembly::STORE8_I64: - case WebAssembly::STORE16_I64: - case WebAssembly::STORE32_I64: - case WebAssembly::ATOMIC_STORE_I32: - case WebAssembly::ATOMIC_STORE8_I32: - case WebAssembly::ATOMIC_STORE16_I32: - case WebAssembly::ATOMIC_STORE_I64: - case WebAssembly::ATOMIC_STORE8_I64: - case WebAssembly::ATOMIC_STORE16_I64: - case WebAssembly::ATOMIC_STORE32_I64: - rewriteP2Align(MI, WebAssembly::StoreP2AlignOperandNo); - break; - default: - break; + int16_t P2AlignOpNum = WebAssembly::getNamedOperandIdx( + MI.getOpcode(), WebAssembly::OpName::p2align); + if (P2AlignOpNum != -1) { + rewriteP2Align(MI, P2AlignOpNum); + Changed = true; } } } Index: llvm/trunk/test/CodeGen/WebAssembly/bulk-memory.ll =================================================================== --- llvm/trunk/test/CodeGen/WebAssembly/bulk-memory.ll +++ llvm/trunk/test/CodeGen/WebAssembly/bulk-memory.ll @@ -140,3 +140,68 @@ call void @llvm.memset.p0i8.i32(i8* %dest, i8 %val, i32 1024, i1 0) ret void } + +; The following tests check that frame index elimination works for +; bulk memory instructions. The stack pointer is bumped by 16 instead +; of 10 because the stack pointer in WebAssembly is currently always +; 16-byte aligned, even in leaf functions, although it is not written +; back to the global in this case. + +; TODO: Change TransientStackAlignment to 1 to avoid this extra +; arithmetic. This will require forcing the use of StackAlignment in +; PrologEpilogEmitter.cpp when +; WebAssemblyFrameLowering::needsSPWriteback would be true. + +; CHECK-LABEL: memcpy_alloca_src: +; NO-BULK-MEM-NOT: memory.copy +; BULK-MEM-NEXT: .functype memcpy_alloca_src (i32) -> () +; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer +; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 16 +; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] +; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 6 +; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] +; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 10 +; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L4]], $pop[[L5]] +; BULK-MEM-NEXT: return +define void @memcpy_alloca_src(i8* %dst) { + %a = alloca [10 x i8] + %p = bitcast [10 x i8]* %a to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %p, i32 10, i1 false) + ret void +} + +; CHECK-LABEL: memcpy_alloca_dst: +; NO-BULK-MEM-NOT: memory.copy +; BULK-MEM-NEXT: .functype memcpy_alloca_dst (i32) -> () +; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer +; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 16 +; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] +; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 6 +; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] +; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 10 +; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L4]], $0, $pop[[L5]] +; BULK-MEM-NEXT: return +define void @memcpy_alloca_dst(i8* %src) { + %a = alloca [10 x i8] + %p = bitcast [10 x i8]* %a to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %src, i32 10, i1 false) + ret void +} + +; CHECK-LABEL: memset_alloca: +; NO-BULK-MEM-NOT: memory.fill +; BULK-MEM-NEXT: .functype memset_alloca (i32) -> () +; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer +; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 16 +; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] +; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 6 +; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] +; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 10 +; BULK-MEM-NEXT: memory.fill 0, $pop[[L4]], $0, $pop[[L5]] +; BULK-MEM-NEXT: return +define void @memset_alloca(i8 %val) { + %a = alloca [10 x i8] + %p = bitcast [10 x i8]* %a to i8* + call void @llvm.memset.p0i8.i32(i8* %p, i8 %val, i32 10, i1 false) + ret void +}