diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -331,14 +331,6 @@ } } -/// The operand number of the load or store address in load/store instructions. -static const unsigned LoadAddressOperandNo = 3; -static const unsigned StoreAddressOperandNo = 2; - -/// The operand number of the load or store p2align in load/store instructions. -static const unsigned LoadP2AlignOperandNo = 1; -static const unsigned StoreP2AlignOperandNo = 0; - /// This is used to indicate block signatures. enum class ExprType : unsigned { Void = 0x40, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td @@ -11,6 +11,7 @@ /// //===----------------------------------------------------------------------===// +let UseNamedOperandTable = 1 in multiclass ATOMIC_I pattern_r, string asmstr_r = "", string asmstr_s = "", bits<32> atomic_op = -1> { @@ -810,9 +811,9 @@ defm "" : ATOMIC_I<(outs rc:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$exp, - rc:$new), + rc:$new_), (outs), (ins P2Align:$p2align, offset32_op:$off), [], - !strconcat(name, "\t$dst, ${off}(${addr})${p2align}, $exp, $new"), + !strconcat(name, "\t$dst, ${off}(${addr})${p2align}, $exp, $new_"), !strconcat(name, "\t${off}${p2align}"), atomic_op>; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h @@ -21,8 +21,17 @@ #define GET_INSTRINFO_HEADER #include "WebAssemblyGenInstrInfo.inc" +#define GET_INSTRINFO_OPERAND_ENUM +#include "WebAssemblyGenInstrInfo.inc" + namespace llvm { +namespace WebAssembly { + +int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex); + +} + class WebAssemblySubtarget; class WebAssemblyInstrInfo final : public WebAssemblyGenInstrInfo { diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -27,6 +27,10 @@ #define GET_INSTRINFO_CTOR_DTOR #include "WebAssemblyGenInstrInfo.inc" +// defines WebAssembly::getNamedOperandIdx +#define GET_INSTRINFO_NAMED_OPS +#include "WebAssemblyGenInstrInfo.inc" + WebAssemblyInstrInfo::WebAssemblyInstrInfo(const WebAssemblySubtarget &STI) : WebAssemblyGenInstrInfo(WebAssembly::ADJCALLSTACKDOWN, WebAssembly::ADJCALLSTACKUP, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -52,7 +52,7 @@ // Defines atomic and non-atomic loads, regular and extending. multiclass WebAssemblyLoad { - let mayLoad = 1 in + let mayLoad = 1, UseNamedOperandTable = 1 in defm "": I<(outs rc:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), (outs), (ins P2Align:$p2align, offset32_op:$off), @@ -294,7 +294,7 @@ // Defines atomic and non-atomic stores, regular and truncating multiclass WebAssemblyStore { - let mayStore = 1 in + let mayStore = 1, UseNamedOperandTable = 1 in defm "" : I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$val), (outs), diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp @@ -70,13 +70,15 @@ // If this is the address operand of a load or store, make it relative to SP // and fold the frame offset directly in. - if ((MI.mayLoad() && FIOperandNum == WebAssembly::LoadAddressOperandNo) || - (MI.mayStore() && FIOperandNum == WebAssembly::StoreAddressOperandNo)) { - assert(FrameOffset >= 0 && MI.getOperand(FIOperandNum - 1).getImm() >= 0); - int64_t Offset = MI.getOperand(FIOperandNum - 1).getImm() + FrameOffset; + if (unsigned(WebAssembly::getNamedOperandIdx( + MI.getOpcode(), WebAssembly::OpName::addr)) == FIOperandNum) { + unsigned OffsetOperandNum = WebAssembly::getNamedOperandIdx( + MI.getOpcode(), WebAssembly::OpName::off); + assert(FrameOffset >= 0 && MI.getOperand(OffsetOperandNum).getImm() >= 0); + int64_t Offset = MI.getOperand(OffsetOperandNum).getImm() + FrameOffset; if (static_cast(Offset) <= std::numeric_limits::max()) { - MI.getOperand(FIOperandNum - 1).setImm(Offset); + MI.getOperand(OffsetOperandNum).setImm(Offset); MI.getOperand(FIOperandNum) .ChangeToRegister(FrameRegister, /*IsDef=*/false); return; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp @@ -13,6 +13,7 @@ #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "WebAssembly.h" +#include "WebAssemblyInstrInfo.h" #include "WebAssemblyMachineFunctionInfo.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -79,121 +80,16 @@ << "********** Function: " << MF.getName() << '\n'; }); - bool Changed = false; - for (auto &MBB : MF) { for (auto &MI : MBB) { - switch (MI.getOpcode()) { - case WebAssembly::LOAD_I32: - case WebAssembly::LOAD_I64: - case WebAssembly::LOAD_F32: - case WebAssembly::LOAD_F64: - case WebAssembly::LOAD_v16i8: - case WebAssembly::LOAD_v8i16: - case WebAssembly::LOAD_v4i32: - case WebAssembly::LOAD_v2i64: - case WebAssembly::LOAD_v4f32: - case WebAssembly::LOAD_v2f64: - case WebAssembly::LOAD8_S_I32: - case WebAssembly::LOAD8_U_I32: - case WebAssembly::LOAD16_S_I32: - case WebAssembly::LOAD16_U_I32: - case WebAssembly::LOAD8_S_I64: - case WebAssembly::LOAD8_U_I64: - case WebAssembly::LOAD16_S_I64: - case WebAssembly::LOAD16_U_I64: - case WebAssembly::LOAD32_S_I64: - case WebAssembly::LOAD32_U_I64: - case WebAssembly::ATOMIC_LOAD_I32: - case WebAssembly::ATOMIC_LOAD8_U_I32: - case WebAssembly::ATOMIC_LOAD16_U_I32: - case WebAssembly::ATOMIC_LOAD_I64: - case WebAssembly::ATOMIC_LOAD8_U_I64: - case WebAssembly::ATOMIC_LOAD16_U_I64: - case WebAssembly::ATOMIC_LOAD32_U_I64: - case WebAssembly::ATOMIC_RMW8_U_ADD_I32: - case WebAssembly::ATOMIC_RMW8_U_ADD_I64: - case WebAssembly::ATOMIC_RMW8_U_SUB_I32: - case WebAssembly::ATOMIC_RMW8_U_SUB_I64: - case WebAssembly::ATOMIC_RMW8_U_AND_I32: - case WebAssembly::ATOMIC_RMW8_U_AND_I64: - case WebAssembly::ATOMIC_RMW8_U_OR_I32: - case WebAssembly::ATOMIC_RMW8_U_OR_I64: - case WebAssembly::ATOMIC_RMW8_U_XOR_I32: - case WebAssembly::ATOMIC_RMW8_U_XOR_I64: - case WebAssembly::ATOMIC_RMW8_U_XCHG_I32: - case WebAssembly::ATOMIC_RMW8_U_XCHG_I64: - case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I32: - case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64: - case WebAssembly::ATOMIC_RMW16_U_ADD_I32: - case WebAssembly::ATOMIC_RMW16_U_ADD_I64: - case WebAssembly::ATOMIC_RMW16_U_SUB_I32: - case WebAssembly::ATOMIC_RMW16_U_SUB_I64: - case WebAssembly::ATOMIC_RMW16_U_AND_I32: - case WebAssembly::ATOMIC_RMW16_U_AND_I64: - case WebAssembly::ATOMIC_RMW16_U_OR_I32: - case WebAssembly::ATOMIC_RMW16_U_OR_I64: - case WebAssembly::ATOMIC_RMW16_U_XOR_I32: - case WebAssembly::ATOMIC_RMW16_U_XOR_I64: - case WebAssembly::ATOMIC_RMW16_U_XCHG_I32: - case WebAssembly::ATOMIC_RMW16_U_XCHG_I64: - case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I32: - case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64: - case WebAssembly::ATOMIC_RMW_ADD_I32: - case WebAssembly::ATOMIC_RMW32_U_ADD_I64: - case WebAssembly::ATOMIC_RMW_SUB_I32: - case WebAssembly::ATOMIC_RMW32_U_SUB_I64: - case WebAssembly::ATOMIC_RMW_AND_I32: - case WebAssembly::ATOMIC_RMW32_U_AND_I64: - case WebAssembly::ATOMIC_RMW_OR_I32: - case WebAssembly::ATOMIC_RMW32_U_OR_I64: - case WebAssembly::ATOMIC_RMW_XOR_I32: - case WebAssembly::ATOMIC_RMW32_U_XOR_I64: - case WebAssembly::ATOMIC_RMW_XCHG_I32: - case WebAssembly::ATOMIC_RMW32_U_XCHG_I64: - case WebAssembly::ATOMIC_RMW_CMPXCHG_I32: - case WebAssembly::ATOMIC_RMW32_U_CMPXCHG_I64: - case WebAssembly::ATOMIC_RMW_ADD_I64: - case WebAssembly::ATOMIC_RMW_SUB_I64: - case WebAssembly::ATOMIC_RMW_AND_I64: - case WebAssembly::ATOMIC_RMW_OR_I64: - case WebAssembly::ATOMIC_RMW_XOR_I64: - case WebAssembly::ATOMIC_RMW_XCHG_I64: - case WebAssembly::ATOMIC_RMW_CMPXCHG_I64: - case WebAssembly::ATOMIC_NOTIFY: - case WebAssembly::ATOMIC_WAIT_I32: - case WebAssembly::ATOMIC_WAIT_I64: - rewriteP2Align(MI, WebAssembly::LoadP2AlignOperandNo); - break; - case WebAssembly::STORE_I32: - case WebAssembly::STORE_I64: - case WebAssembly::STORE_F32: - case WebAssembly::STORE_F64: - case WebAssembly::STORE_v16i8: - case WebAssembly::STORE_v8i16: - case WebAssembly::STORE_v4i32: - case WebAssembly::STORE_v2i64: - case WebAssembly::STORE_v4f32: - case WebAssembly::STORE_v2f64: - case WebAssembly::STORE8_I32: - case WebAssembly::STORE16_I32: - case WebAssembly::STORE8_I64: - case WebAssembly::STORE16_I64: - case WebAssembly::STORE32_I64: - case WebAssembly::ATOMIC_STORE_I32: - case WebAssembly::ATOMIC_STORE8_I32: - case WebAssembly::ATOMIC_STORE16_I32: - case WebAssembly::ATOMIC_STORE_I64: - case WebAssembly::ATOMIC_STORE8_I64: - case WebAssembly::ATOMIC_STORE16_I64: - case WebAssembly::ATOMIC_STORE32_I64: - rewriteP2Align(MI, WebAssembly::StoreP2AlignOperandNo); - break; - default: - break; + int16_t P2AlignOpNum = WebAssembly::getNamedOperandIdx( + MI.getOpcode(), WebAssembly::OpName::p2align); + if (P2AlignOpNum != -1) { + rewriteP2Align(MI, P2AlignOpNum); } } } - return Changed; + // Not changed + return false; } diff --git a/llvm/test/CodeGen/WebAssembly/bulk-memory.ll b/llvm/test/CodeGen/WebAssembly/bulk-memory.ll --- a/llvm/test/CodeGen/WebAssembly/bulk-memory.ll +++ b/llvm/test/CodeGen/WebAssembly/bulk-memory.ll @@ -140,3 +140,60 @@ call void @llvm.memset.p0i8.i32(i8* %dest, i8 %val, i32 1024, i1 0) ret void } + +; The following tests check that frame index elimination works for +; bulk memory instructions. __stack_pointer is bumped by 16 instead of +; 10 because in non-leaf function it must be 16-byte aligned, and +; there is no special case for leaf functions. + +; CHECK-LABEL: memcpy_alloca_src: +; NO-BULK-MEM-NOT: memory.copy BULK-MEM-NEXT: .functype +; memcpy_alloca_src (i32) -> () BULK-MEM-NEXT: global.get +; $push[[L0:[0-9]+]]=, __stack_pointer BULK-MEM-NEXT: i32.const +; $push[[L1:[0-9]+]]=, 16 BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, +; $pop[[L0]], $pop[[L1]] BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, +; 6 BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] +; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 10 BULK-MEM-NEXT: +; memory.copy 0, 0, $0, $pop[[L4]], $pop[[L5]] BULK-MEM-NEXT: return +define void @memcpy_alloca_src(i8* %dst) { + %a = alloca [10 x i8] + %p = bitcast [10 x i8]* %a to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %p, i32 10, i1 false) + ret void +} + +; CHECK-LABEL: memcpy_alloca_dst: +; NO-BULK-MEM-NOT: memory.copy +; BULK-MEM-NEXT: .functype memcpy_alloca_dst (i32) -> () +; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer +; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 16 +; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] +; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 6 +; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] +; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 10 +; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L4]], $0, $pop[[L5]] +; BULK-MEM-NEXT: return +define void @memcpy_alloca_dst(i8* %src) { + %a = alloca [10 x i8] + %p = bitcast [10 x i8]* %a to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %src, i32 10, i1 false) + ret void +} + +; CHECK-LABEL: memset_alloca: +; NO-BULK-MEM-NOT: memory.fill +; BULK-MEM-NEXT: .functype memset_alloca (i32) -> () +; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer +; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 16 +; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] +; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 6 +; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] +; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 10 +; BULK-MEM-NEXT: memory.fill 0, $pop[[L4]], $0, $pop[[L5]] +; BULK-MEM-NEXT: return +define void @memset_alloca(i8 %val) { + %a = alloca [10 x i8] + %p = bitcast [10 x i8]* %a to i8* + call void @llvm.memset.p0i8.i32(i8* %p, i8 %val, i32 10, i1 false) + ret void +} diff --git a/llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll b/llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll --- a/llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll @@ -20,7 +20,7 @@ ; CHECK-LABEL: load_v16i8_a4: ; CHECK-NEXT: .functype load_v16i8_a4 (i32) -> (v128){{$}} -; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=2{{$}} +; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <16 x i8> @load_v16i8_a4(<16 x i8> *%p) { %v = load <16 x i8>, <16 x i8>* %p, align 4 @@ -31,7 +31,7 @@ ; CHECK-LABEL: load_v16i8_a16: ; CHECK-NEXT: .functype load_v16i8_a16 (i32) -> (v128){{$}} -; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <16 x i8> @load_v16i8_a16(<16 x i8> *%p) { %v = load <16 x i8>, <16 x i8>* %p, align 16 @@ -42,7 +42,7 @@ ; CHECK-LABEL: load_v16i8_a32: ; CHECK-NEXT: .functype load_v16i8_a32 (i32) -> (v128){{$}} -; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <16 x i8> @load_v16i8_a32(<16 x i8> *%p) { %v = load <16 x i8>, <16 x i8>* %p, align 32 @@ -60,7 +60,7 @@ ; CHECK-LABEL: store_v16i8_a4: ; CHECK-NEXT: .functype store_v16i8_a4 (i32, v128) -> (){{$}} -; CHECK-NEXT: v128.store 0($0):p2align=2, $1{{$}} +; CHECK-NEXT: v128.store 0($0):p2align=0, $1{{$}} ; CHECK-NEXT: return{{$}} define void @store_v16i8_a4(<16 x i8> *%p, <16 x i8> %v) { store <16 x i8> %v, <16 x i8>* %p, align 4 @@ -71,7 +71,7 @@ ; CHECK-LABEL: store_v16i8_a16: ; CHECK-NEXT: .functype store_v16i8_a16 (i32, v128) -> (){{$}} -; CHECK-NEXT: v128.store 0($0), $1{{$}} +; CHECK-NEXT: v128.store 0($0):p2align=0, $1{{$}} ; CHECK-NEXT: return{{$}} define void @store_v16i8_a16(<16 x i8> *%p, <16 x i8> %v) { store <16 x i8> %v, <16 x i8>* %p, align 16 @@ -82,7 +82,7 @@ ; CHECK-LABEL: store_v16i8_a32: ; CHECK-NEXT: .functype store_v16i8_a32 (i32, v128) -> (){{$}} -; CHECK-NEXT: v128.store 0($0), $1{{$}} +; CHECK-NEXT: v128.store 0($0):p2align=0, $1{{$}} ; CHECK-NEXT: return{{$}} define void @store_v16i8_a32(<16 x i8> *%p, <16 x i8> %v) { store <16 x i8> %v, <16 x i8>* %p, align 32 @@ -104,7 +104,7 @@ ; CHECK-LABEL: load_v8i16_a4: ; CHECK-NEXT: .functype load_v8i16_a4 (i32) -> (v128){{$}} -; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=2{{$}} +; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_v8i16_a4(<8 x i16> *%p) { %v = load <8 x i16>, <8 x i16>* %p, align 4 @@ -115,7 +115,7 @@ ; CHECK-LABEL: load_v8i16_a16: ; CHECK-NEXT: .functype load_v8i16_a16 (i32) -> (v128){{$}} -; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_v8i16_a16(<8 x i16> *%p) { %v = load <8 x i16>, <8 x i16>* %p, align 16 @@ -126,7 +126,7 @@ ; CHECK-LABEL: load_v8i16_a32: ; CHECK-NEXT: .functype load_v8i16_a32 (i32) -> (v128){{$}} -; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_v8i16_a32(<8 x i16> *%p) { %v = load <8 x i16>, <8 x i16>* %p, align 32 @@ -144,7 +144,7 @@ ; CHECK-LABEL: store_v8i16_a4: ; CHECK-NEXT: .functype store_v8i16_a4 (i32, v128) -> (){{$}} -; CHECK-NEXT: v128.store 0($0):p2align=2, $1{{$}} +; CHECK-NEXT: v128.store 0($0):p2align=0, $1{{$}} ; CHECK-NEXT: return{{$}} define void @store_v8i16_a4(<8 x i16> *%p, <8 x i16> %v) { store <8 x i16> %v, <8 x i16>* %p, align 4 @@ -155,7 +155,7 @@ ; CHECK-LABEL: store_v8i16_a16: ; CHECK-NEXT: .functype store_v8i16_a16 (i32, v128) -> (){{$}} -; CHECK-NEXT: v128.store 0($0), $1{{$}} +; CHECK-NEXT: v128.store 0($0):p2align=0, $1{{$}} ; CHECK-NEXT: return{{$}} define void @store_v8i16_a16(<8 x i16> *%p, <8 x i16> %v) { store <8 x i16> %v, <8 x i16>* %p, align 16 @@ -166,7 +166,7 @@ ; CHECK-LABEL: store_v8i16_a32: ; CHECK-NEXT: .functype store_v8i16_a32 (i32, v128) -> (){{$}} -; CHECK-NEXT: v128.store 0($0), $1{{$}} +; CHECK-NEXT: v128.store 0($0):p2align=0, $1{{$}} ; CHECK-NEXT: return{{$}} define void @store_v8i16_a32(<8 x i16> *%p, <8 x i16> %v) { store <8 x i16> %v, <8 x i16>* %p, align 32 @@ -188,7 +188,7 @@ ; CHECK-LABEL: load_v4i32_a4: ; CHECK-NEXT: .functype load_v4i32_a4 (i32) -> (v128){{$}} -; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=2{{$}} +; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_v4i32_a4(<4 x i32> *%p) { %v = load <4 x i32>, <4 x i32>* %p, align 4 @@ -199,7 +199,7 @@ ; CHECK-LABEL: load_v4i32_a16: ; CHECK-NEXT: .functype load_v4i32_a16 (i32) -> (v128){{$}} -; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_v4i32_a16(<4 x i32> *%p) { %v = load <4 x i32>, <4 x i32>* %p, align 16 @@ -210,7 +210,7 @@ ; CHECK-LABEL: load_v4i32_a32: ; CHECK-NEXT: .functype load_v4i32_a32 (i32) -> (v128){{$}} -; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_v4i32_a32(<4 x i32> *%p) { %v = load <4 x i32>, <4 x i32>* %p, align 32 @@ -228,7 +228,7 @@ ; CHECK-LABEL: store_v4i32_a4: ; CHECK-NEXT: .functype store_v4i32_a4 (i32, v128) -> (){{$}} -; CHECK-NEXT: v128.store 0($0):p2align=2, $1{{$}} +; CHECK-NEXT: v128.store 0($0):p2align=0, $1{{$}} ; CHECK-NEXT: return{{$}} define void @store_v4i32_a4(<4 x i32> *%p, <4 x i32> %v) { store <4 x i32> %v, <4 x i32>* %p, align 4 @@ -239,7 +239,7 @@ ; CHECK-LABEL: store_v4i32_a16: ; CHECK-NEXT: .functype store_v4i32_a16 (i32, v128) -> (){{$}} -; CHECK-NEXT: v128.store 0($0), $1{{$}} +; CHECK-NEXT: v128.store 0($0):p2align=0, $1{{$}} ; CHECK-NEXT: return{{$}} define void @store_v4i32_a16(<4 x i32> *%p, <4 x i32> %v) { store <4 x i32> %v, <4 x i32>* %p, align 16 @@ -250,7 +250,7 @@ ; CHECK-LABEL: store_v4i32_a32: ; CHECK-NEXT: .functype store_v4i32_a32 (i32, v128) -> (){{$}} -; CHECK-NEXT: v128.store 0($0), $1{{$}} +; CHECK-NEXT: v128.store 0($0):p2align=0, $1{{$}} ; CHECK-NEXT: return{{$}} define void @store_v4i32_a32(<4 x i32> *%p, <4 x i32> %v) { store <4 x i32> %v, <4 x i32>* %p, align 32 @@ -272,7 +272,7 @@ ; CHECK-LABEL: load_v2i64_a4: ; CHECK-NEXT: .functype load_v2i64_a4 (i32) -> (v128){{$}} -; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=2{{$}} +; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_v2i64_a4(<2 x i64> *%p) { %v = load <2 x i64>, <2 x i64>* %p, align 4 @@ -283,7 +283,7 @@ ; CHECK-LABEL: load_v2i64_a16: ; CHECK-NEXT: .functype load_v2i64_a16 (i32) -> (v128){{$}} -; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_v2i64_a16(<2 x i64> *%p) { %v = load <2 x i64>, <2 x i64>* %p, align 16 @@ -294,7 +294,7 @@ ; CHECK-LABEL: load_v2i64_a32: ; CHECK-NEXT: .functype load_v2i64_a32 (i32) -> (v128){{$}} -; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_v2i64_a32(<2 x i64> *%p) { %v = load <2 x i64>, <2 x i64>* %p, align 32 @@ -312,7 +312,7 @@ ; CHECK-LABEL: store_v2i64_a4: ; CHECK-NEXT: .functype store_v2i64_a4 (i32, v128) -> (){{$}} -; CHECK-NEXT: v128.store 0($0):p2align=2, $1{{$}} +; CHECK-NEXT: v128.store 0($0):p2align=0, $1{{$}} ; CHECK-NEXT: return{{$}} define void @store_v2i64_a4(<2 x i64> *%p, <2 x i64> %v) { store <2 x i64> %v, <2 x i64>* %p, align 4 @@ -323,7 +323,7 @@ ; CHECK-LABEL: store_v2i64_a16: ; CHECK-NEXT: .functype store_v2i64_a16 (i32, v128) -> (){{$}} -; CHECK-NEXT: v128.store 0($0), $1{{$}} +; CHECK-NEXT: v128.store 0($0):p2align=0, $1{{$}} ; CHECK-NEXT: return{{$}} define void @store_v2i64_a16(<2 x i64> *%p, <2 x i64> %v) { store <2 x i64> %v, <2 x i64>* %p, align 16 @@ -334,7 +334,7 @@ ; CHECK-LABEL: store_v2i64_a32: ; CHECK-NEXT: .functype store_v2i64_a32 (i32, v128) -> (){{$}} -; CHECK-NEXT: v128.store 0($0), $1{{$}} +; CHECK-NEXT: v128.store 0($0):p2align=0, $1{{$}} ; CHECK-NEXT: return{{$}} define void @store_v2i64_a32(<2 x i64> *%p, <2 x i64> %v) { store <2 x i64> %v, <2 x i64>* %p, align 32 @@ -356,7 +356,7 @@ ; CHECK-LABEL: load_v4f32_a4: ; CHECK-NEXT: .functype load_v4f32_a4 (i32) -> (v128){{$}} -; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=2{{$}} +; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <4 x float> @load_v4f32_a4(<4 x float> *%p) { %v = load <4 x float>, <4 x float>* %p, align 4 @@ -367,7 +367,7 @@ ; CHECK-LABEL: load_v4f32_a16: ; CHECK-NEXT: .functype load_v4f32_a16 (i32) -> (v128){{$}} -; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <4 x float> @load_v4f32_a16(<4 x float> *%p) { %v = load <4 x float>, <4 x float>* %p, align 16 @@ -378,7 +378,7 @@ ; CHECK-LABEL: load_v4f32_a32: ; CHECK-NEXT: .functype load_v4f32_a32 (i32) -> (v128){{$}} -; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <4 x float> @load_v4f32_a32(<4 x float> *%p) { %v = load <4 x float>, <4 x float>* %p, align 32 @@ -396,7 +396,7 @@ ; CHECK-LABEL: store_v4f32_a4: ; CHECK-NEXT: .functype store_v4f32_a4 (i32, v128) -> (){{$}} -; CHECK-NEXT: v128.store 0($0):p2align=2, $1{{$}} +; CHECK-NEXT: v128.store 0($0):p2align=0, $1{{$}} ; CHECK-NEXT: return{{$}} define void @store_v4f32_a4(<4 x float> *%p, <4 x float> %v) { store <4 x float> %v, <4 x float>* %p, align 4 @@ -407,7 +407,7 @@ ; CHECK-LABEL: store_v4f32_a16: ; CHECK-NEXT: .functype store_v4f32_a16 (i32, v128) -> (){{$}} -; CHECK-NEXT: v128.store 0($0), $1{{$}} +; CHECK-NEXT: v128.store 0($0):p2align=0, $1{{$}} ; CHECK-NEXT: return{{$}} define void @store_v4f32_a16(<4 x float> *%p, <4 x float> %v) { store <4 x float> %v, <4 x float>* %p, align 16 @@ -418,7 +418,7 @@ ; CHECK-LABEL: store_v4f32_a32: ; CHECK-NEXT: .functype store_v4f32_a32 (i32, v128) -> (){{$}} -; CHECK-NEXT: v128.store 0($0), $1{{$}} +; CHECK-NEXT: v128.store 0($0):p2align=0, $1{{$}} ; CHECK-NEXT: return{{$}} define void @store_v4f32_a32(<4 x float> *%p, <4 x float> %v) { store <4 x float> %v, <4 x float>* %p, align 32 @@ -440,7 +440,7 @@ ; CHECK-LABEL: load_v2f64_a4: ; CHECK-NEXT: .functype load_v2f64_a4 (i32) -> (v128){{$}} -; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=2{{$}} +; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <2 x double> @load_v2f64_a4(<2 x double> *%p) { %v = load <2 x double>, <2 x double>* %p, align 4 @@ -451,7 +451,7 @@ ; CHECK-LABEL: load_v2f64_a16: ; CHECK-NEXT: .functype load_v2f64_a16 (i32) -> (v128){{$}} -; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <2 x double> @load_v2f64_a16(<2 x double> *%p) { %v = load <2 x double>, <2 x double>* %p, align 16 @@ -462,7 +462,7 @@ ; CHECK-LABEL: load_v2f64_a32: ; CHECK-NEXT: .functype load_v2f64_a32 (i32) -> (v128){{$}} -; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define <2 x double> @load_v2f64_a32(<2 x double> *%p) { %v = load <2 x double>, <2 x double>* %p, align 32 @@ -480,7 +480,7 @@ ; CHECK-LABEL: store_v2f64_a4: ; CHECK-NEXT: .functype store_v2f64_a4 (i32, v128) -> (){{$}} -; CHECK-NEXT: v128.store 0($0):p2align=2, $1{{$}} +; CHECK-NEXT: v128.store 0($0):p2align=0, $1{{$}} ; CHECK-NEXT: return{{$}} define void @store_v2f64_a4(<2 x double> *%p, <2 x double> %v) { store <2 x double> %v, <2 x double>* %p, align 4 @@ -491,7 +491,7 @@ ; CHECK-LABEL: store_v2f64_a16: ; CHECK-NEXT: .functype store_v2f64_a16 (i32, v128) -> (){{$}} -; CHECK-NEXT: v128.store 0($0), $1{{$}} +; CHECK-NEXT: v128.store 0($0):p2align=0, $1{{$}} ; CHECK-NEXT: return{{$}} define void @store_v2f64_a16(<2 x double> *%p, <2 x double> %v) { store <2 x double> %v, <2 x double>* %p, align 16 @@ -502,7 +502,7 @@ ; CHECK-LABEL: store_v2f64_a32: ; CHECK-NEXT: .functype store_v2f64_a32 (i32, v128) -> (){{$}} -; CHECK-NEXT: v128.store 0($0), $1{{$}} +; CHECK-NEXT: v128.store 0($0):p2align=0, $1{{$}} ; CHECK-NEXT: return{{$}} define void @store_v2f64_a32(<2 x double> *%p, <2 x double> %v) { store <2 x double> %v, <2 x double>* %p, align 32 diff --git a/llvm/test/CodeGen/WebAssembly/simd-offset.ll b/llvm/test/CodeGen/WebAssembly/simd-offset.ll --- a/llvm/test/CodeGen/WebAssembly/simd-offset.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-offset.ll @@ -13,7 +13,7 @@ ; CHECK-LABEL: load_v16i8: ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype load_v16i8 (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @load_v16i8(<16 x i8>* %p) { %v = load <16 x i8>, <16 x i8>* %p @@ -23,7 +23,7 @@ ; CHECK-LABEL: load_v16i8_with_folded_offset: ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype load_v16i8_with_folded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @load_v16i8_with_folded_offset(<16 x i8>* %p) { %q = ptrtoint <16 x i8>* %p to i32 @@ -36,7 +36,7 @@ ; CHECK-LABEL: load_v16i8_with_folded_gep_offset: ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype load_v16i8_with_folded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @load_v16i8_with_folded_gep_offset(<16 x i8>* %p) { %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1 @@ -49,7 +49,7 @@ ; SIMD128-NEXT: .functype load_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @load_v16i8_with_unfolded_gep_negative_offset(<16 x i8>* %p) { %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1 @@ -62,7 +62,7 @@ ; SIMD128-NEXT: .functype load_v16i8_with_unfolded_offset (i32) -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @load_v16i8_with_unfolded_offset(<16 x i8>* %p) { %q = ptrtoint <16 x i8>* %p to i32 @@ -77,7 +77,7 @@ ; SIMD128-NEXT: .functype load_v16i8_with_unfolded_gep_offset (i32) -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @load_v16i8_with_unfolded_gep_offset(<16 x i8>* %p) { %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1 @@ -89,7 +89,7 @@ ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype load_v16i8_from_numeric_address () -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @load_v16i8_from_numeric_address() { %s = inttoptr i32 32 to <16 x i8>* @@ -101,7 +101,7 @@ ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype load_v16i8_from_global_address () -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v16i8($pop[[L0]]){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v16i8($pop[[L0]]):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} @gv_v16i8 = global <16 x i8> define <16 x i8> @load_v16i8_from_global_address() { @@ -112,7 +112,7 @@ ; CHECK-LABEL: store_v16i8: ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype store_v16i8 (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 0($1), $0{{$}} +; SIMD128-NEXT: v128.store 0($1):p2align=0, $0{{$}} define void @store_v16i8(<16 x i8> %v, <16 x i8>* %p) { store <16 x i8> %v , <16 x i8>* %p ret void @@ -121,7 +121,7 @@ ; CHECK-LABEL: store_v16i8_with_folded_offset: ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype store_v16i8_with_folded_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 16($1), $0{{$}} +; SIMD128-NEXT: v128.store 16($1):p2align=0, $0{{$}} define void @store_v16i8_with_folded_offset(<16 x i8> %v, <16 x i8>* %p) { %q = ptrtoint <16 x i8>* %p to i32 %r = add nuw i32 %q, 16 @@ -133,7 +133,7 @@ ; CHECK-LABEL: store_v16i8_with_folded_gep_offset: ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype store_v16i8_with_folded_gep_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 16($1), $0{{$}} +; SIMD128-NEXT: v128.store 16($1):p2align=0, $0{{$}} define void @store_v16i8_with_folded_gep_offset(<16 x i8> %v, <16 x i8>* %p) { %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1 store <16 x i8> %v , <16 x i8>* %s @@ -145,7 +145,7 @@ ; SIMD128-NEXT: .functype store_v16i8_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}} define void @store_v16i8_with_unfolded_gep_negative_offset(<16 x i8> %v, <16 x i8>* %p) { %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1 store <16 x i8> %v , <16 x i8>* %s @@ -157,7 +157,7 @@ ; SIMD128-NEXT: .functype store_v16i8_with_unfolded_offset (v128, i32) -> (){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}} define void @store_v16i8_with_unfolded_offset(<16 x i8> %v, <16 x i8>* %p) { %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1 store <16 x i8> %v , <16 x i8>* %s @@ -169,7 +169,7 @@ ; SIMD128-NEXT: .functype store_v16i8_with_unfolded_gep_offset (v128, i32) -> (){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}} define void @store_v16i8_with_unfolded_gep_offset(<16 x i8> %v, <16 x i8>* %p) { %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1 store <16 x i8> %v , <16 x i8>* %s @@ -180,7 +180,7 @@ ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype store_v16i8_to_numeric_address (v128) -> (){{$}} ; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.store 32($pop[[R]]), $0{{$}} +; SIMD128-NEXT: v128.store 32($pop[[R]]):p2align=0, $0{{$}} define void @store_v16i8_to_numeric_address(<16 x i8> %v) { %s = inttoptr i32 32 to <16 x i8>* store <16 x i8> %v , <16 x i8>* %s @@ -191,7 +191,7 @@ ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype store_v16i8_to_global_address (v128) -> (){{$}} ; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.store gv_v16i8($pop[[R]]), $0{{$}} +; SIMD128-NEXT: v128.store gv_v16i8($pop[[R]]):p2align=0, $0{{$}} define void @store_v16i8_to_global_address(<16 x i8> %v) { store <16 x i8> %v , <16 x i8>* @gv_v16i8 ret void @@ -203,7 +203,7 @@ ; CHECK-LABEL: load_v8i16: ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype load_v8i16 (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_v8i16(<8 x i16>* %p) { %v = load <8 x i16>, <8 x i16>* %p @@ -213,7 +213,7 @@ ; CHECK-LABEL: load_v8i16_with_folded_offset: ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype load_v8i16_with_folded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_v8i16_with_folded_offset(<8 x i16>* %p) { %q = ptrtoint <8 x i16>* %p to i32 @@ -226,7 +226,7 @@ ; CHECK-LABEL: load_v8i16_with_folded_gep_offset: ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype load_v8i16_with_folded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_v8i16_with_folded_gep_offset(<8 x i16>* %p) { %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1 @@ -239,7 +239,7 @@ ; SIMD128-NEXT: .functype load_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_v8i16_with_unfolded_gep_negative_offset(<8 x i16>* %p) { %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1 @@ -252,7 +252,7 @@ ; SIMD128-NEXT: .functype load_v8i16_with_unfolded_offset (i32) -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[L0:[0-9]+]]=, 0($pop[[L1]]){{$}} +; SIMD128-NEXT: v128.load $push[[L0:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[L0]]{{$}} define <8 x i16> @load_v8i16_with_unfolded_offset(<8 x i16>* %p) { %q = ptrtoint <8 x i16>* %p to i32 @@ -267,7 +267,7 @@ ; SIMD128-NEXT: .functype load_v8i16_with_unfolded_gep_offset (i32) -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_v8i16_with_unfolded_gep_offset(<8 x i16>* %p) { %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1 @@ -279,7 +279,7 @@ ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype load_v8i16_from_numeric_address () -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_v8i16_from_numeric_address() { %s = inttoptr i32 32 to <8 x i16>* @@ -291,7 +291,7 @@ ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype load_v8i16_from_global_address () -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v8i16($pop[[L0]]){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v8i16($pop[[L0]]):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} @gv_v8i16 = global <8 x i16> define <8 x i16> @load_v8i16_from_global_address() { @@ -302,7 +302,7 @@ ; CHECK-LABEL: store_v8i16: ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype store_v8i16 (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 0($1), $0{{$}} +; SIMD128-NEXT: v128.store 0($1):p2align=0, $0{{$}} define void @store_v8i16(<8 x i16> %v, <8 x i16>* %p) { store <8 x i16> %v , <8 x i16>* %p ret void @@ -311,7 +311,7 @@ ; CHECK-LABEL: store_v8i16_with_folded_offset: ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype store_v8i16_with_folded_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 16($1), $0{{$}} +; SIMD128-NEXT: v128.store 16($1):p2align=0, $0{{$}} define void @store_v8i16_with_folded_offset(<8 x i16> %v, <8 x i16>* %p) { %q = ptrtoint <8 x i16>* %p to i32 %r = add nuw i32 %q, 16 @@ -323,7 +323,7 @@ ; CHECK-LABEL: store_v8i16_with_folded_gep_offset: ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype store_v8i16_with_folded_gep_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 16($1), $0{{$}} +; SIMD128-NEXT: v128.store 16($1):p2align=0, $0{{$}} define void @store_v8i16_with_folded_gep_offset(<8 x i16> %v, <8 x i16>* %p) { %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1 store <8 x i16> %v , <8 x i16>* %s @@ -335,7 +335,7 @@ ; SIMD128-NEXT: .functype store_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}} define void @store_v8i16_with_unfolded_gep_negative_offset(<8 x i16> %v, <8 x i16>* %p) { %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1 store <8 x i16> %v , <8 x i16>* %s @@ -347,7 +347,7 @@ ; SIMD128-NEXT: .functype store_v8i16_with_unfolded_offset (v128, i32) -> (){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}} define void @store_v8i16_with_unfolded_offset(<8 x i16> %v, <8 x i16>* %p) { %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1 store <8 x i16> %v , <8 x i16>* %s @@ -359,7 +359,7 @@ ; SIMD128-NEXT: .functype store_v8i16_with_unfolded_gep_offset (v128, i32) -> (){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}} define void @store_v8i16_with_unfolded_gep_offset(<8 x i16> %v, <8 x i16>* %p) { %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1 store <8 x i16> %v , <8 x i16>* %s @@ -370,7 +370,7 @@ ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype store_v8i16_to_numeric_address (v128) -> (){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}} +; SIMD128-NEXT: v128.store 32($pop[[L0]]):p2align=0, $0{{$}} define void @store_v8i16_to_numeric_address(<8 x i16> %v) { %s = inttoptr i32 32 to <8 x i16>* store <8 x i16> %v , <8 x i16>* %s @@ -381,7 +381,7 @@ ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype store_v8i16_to_global_address (v128) -> (){{$}} ; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.store gv_v8i16($pop[[R]]), $0{{$}} +; SIMD128-NEXT: v128.store gv_v8i16($pop[[R]]):p2align=0, $0{{$}} define void @store_v8i16_to_global_address(<8 x i16> %v) { store <8 x i16> %v , <8 x i16>* @gv_v8i16 ret void @@ -393,7 +393,7 @@ ; CHECK-LABEL: load_v4i32: ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype load_v4i32 (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_v4i32(<4 x i32>* %p) { %v = load <4 x i32>, <4 x i32>* %p @@ -403,7 +403,7 @@ ; CHECK-LABEL: load_v4i32_with_folded_offset: ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype load_v4i32_with_folded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_v4i32_with_folded_offset(<4 x i32>* %p) { %q = ptrtoint <4 x i32>* %p to i32 @@ -416,7 +416,7 @@ ; CHECK-LABEL: load_v4i32_with_folded_gep_offset: ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype load_v4i32_with_folded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_v4i32_with_folded_gep_offset(<4 x i32>* %p) { %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1 @@ -429,7 +429,7 @@ ; SIMD128-NEXT: .functype load_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_v4i32_with_unfolded_gep_negative_offset(<4 x i32>* %p) { %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1 @@ -442,7 +442,7 @@ ; SIMD128-NEXT: .functype load_v4i32_with_unfolded_offset (i32) -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_v4i32_with_unfolded_offset(<4 x i32>* %p) { %q = ptrtoint <4 x i32>* %p to i32 @@ -457,7 +457,7 @@ ; SIMD128-NEXT: .functype load_v4i32_with_unfolded_gep_offset (i32) -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_v4i32_with_unfolded_gep_offset(<4 x i32>* %p) { %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1 @@ -469,7 +469,7 @@ ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype load_v4i32_from_numeric_address () -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_v4i32_from_numeric_address() { %s = inttoptr i32 32 to <4 x i32>* @@ -481,7 +481,7 @@ ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype load_v4i32_from_global_address () -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v4i32($pop[[L0]]){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v4i32($pop[[L0]]):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} @gv_v4i32 = global <4 x i32> define <4 x i32> @load_v4i32_from_global_address() { @@ -492,7 +492,7 @@ ; CHECK-LABEL: store_v4i32: ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype store_v4i32 (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 0($1), $0{{$}} +; SIMD128-NEXT: v128.store 0($1):p2align=0, $0{{$}} define void @store_v4i32(<4 x i32> %v, <4 x i32>* %p) { store <4 x i32> %v , <4 x i32>* %p ret void @@ -501,7 +501,7 @@ ; CHECK-LABEL: store_v4i32_with_folded_offset: ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype store_v4i32_with_folded_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 16($1), $0{{$}} +; SIMD128-NEXT: v128.store 16($1):p2align=0, $0{{$}} define void @store_v4i32_with_folded_offset(<4 x i32> %v, <4 x i32>* %p) { %q = ptrtoint <4 x i32>* %p to i32 %r = add nuw i32 %q, 16 @@ -513,7 +513,7 @@ ; CHECK-LABEL: store_v4i32_with_folded_gep_offset: ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype store_v4i32_with_folded_gep_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 16($1), $0{{$}} +; SIMD128-NEXT: v128.store 16($1):p2align=0, $0{{$}} define void @store_v4i32_with_folded_gep_offset(<4 x i32> %v, <4 x i32>* %p) { %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1 store <4 x i32> %v , <4 x i32>* %s @@ -525,7 +525,7 @@ ; SIMD128-NEXT: .functype store_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}} define void @store_v4i32_with_unfolded_gep_negative_offset(<4 x i32> %v, <4 x i32>* %p) { %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1 store <4 x i32> %v , <4 x i32>* %s @@ -537,7 +537,7 @@ ; SIMD128-NEXT: .functype store_v4i32_with_unfolded_offset (v128, i32) -> (){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}} define void @store_v4i32_with_unfolded_offset(<4 x i32> %v, <4 x i32>* %p) { %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1 store <4 x i32> %v , <4 x i32>* %s @@ -549,7 +549,7 @@ ; SIMD128-NEXT: .functype store_v4i32_with_unfolded_gep_offset (v128, i32) -> (){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}} define void @store_v4i32_with_unfolded_gep_offset(<4 x i32> %v, <4 x i32>* %p) { %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1 store <4 x i32> %v , <4 x i32>* %s @@ -560,7 +560,7 @@ ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype store_v4i32_to_numeric_address (v128) -> (){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}} +; SIMD128-NEXT: v128.store 32($pop[[L0]]):p2align=0, $0{{$}} define void @store_v4i32_to_numeric_address(<4 x i32> %v) { %s = inttoptr i32 32 to <4 x i32>* store <4 x i32> %v , <4 x i32>* %s @@ -571,7 +571,7 @@ ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype store_v4i32_to_global_address (v128) -> (){{$}} ; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.store gv_v4i32($pop[[R]]), $0{{$}} +; SIMD128-NEXT: v128.store gv_v4i32($pop[[R]]):p2align=0, $0{{$}} define void @store_v4i32_to_global_address(<4 x i32> %v) { store <4 x i32> %v , <4 x i32>* @gv_v4i32 ret void @@ -584,7 +584,7 @@ ; NO-SIMD128-NOT: v128 ; SIMD128-VM-NOT: v128 ; SIMD128-NEXT: .functype load_v2i64 (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_v2i64(<2 x i64>* %p) { %v = load <2 x i64>, <2 x i64>* %p @@ -595,7 +595,7 @@ ; NO-SIMD128-NOT: v128 ; SIMD128-VM-NOT: v128 ; SIMD128-NEXT: .functype load_v2i64_with_folded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_v2i64_with_folded_offset(<2 x i64>* %p) { %q = ptrtoint <2 x i64>* %p to i32 @@ -609,7 +609,7 @@ ; NO-SIMD128-NOT: v128 ; SIMD128-VM-NOT: v128 ; SIMD128-NEXT: .functype load_v2i64_with_folded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_v2i64_with_folded_gep_offset(<2 x i64>* %p) { %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1 @@ -623,7 +623,7 @@ ; SIMD128-NEXT: .functype load_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_v2i64_with_unfolded_gep_negative_offset(<2 x i64>* %p) { %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1 @@ -637,7 +637,7 @@ ; SIMD128-NEXT: .functype load_v2i64_with_unfolded_offset (i32) -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_v2i64_with_unfolded_offset(<2 x i64>* %p) { %q = ptrtoint <2 x i64>* %p to i32 @@ -653,7 +653,7 @@ ; SIMD128-NEXT: .functype load_v2i64_with_unfolded_gep_offset (i32) -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_v2i64_with_unfolded_gep_offset(<2 x i64>* %p) { %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1 @@ -666,7 +666,7 @@ ; SIMD128-VM-NOT: v128 ; SIMD128-NEXT: .functype load_v2i64_from_numeric_address () -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_v2i64_from_numeric_address() { %s = inttoptr i32 32 to <2 x i64>* @@ -679,7 +679,7 @@ ; SIMD128-VM-NOT: v128 ; SIMD128-NEXT: .functype load_v2i64_from_global_address () -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v2i64($pop[[L0]]){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v2i64($pop[[L0]]):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} @gv_v2i64 = global <2 x i64> define <2 x i64> @load_v2i64_from_global_address() { @@ -691,7 +691,7 @@ ; NO-SIMD128-NOT: v128 ; SIMD128-VM-NOT: v128 ; SIMD128-NEXT: .functype store_v2i64 (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 0($1), $0{{$}} +; SIMD128-NEXT: v128.store 0($1):p2align=0, $0{{$}} define void @store_v2i64(<2 x i64> %v, <2 x i64>* %p) { store <2 x i64> %v , <2 x i64>* %p ret void @@ -701,7 +701,7 @@ ; NO-SIMD128-NOT: v128 ; SIMD128-VM-NOT: v128 ; SIMD128-NEXT: .functype store_v2i64_with_folded_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 16($1), $0{{$}} +; SIMD128-NEXT: v128.store 16($1):p2align=0, $0{{$}} define void @store_v2i64_with_folded_offset(<2 x i64> %v, <2 x i64>* %p) { %q = ptrtoint <2 x i64>* %p to i32 %r = add nuw i32 %q, 16 @@ -714,7 +714,7 @@ ; NO-SIMD128-NOT: v128 ; SIMD128-VM-NOT: v128 ; SIMD128-NEXT: .functype store_v2i64_with_folded_gep_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 16($1), $0{{$}} +; SIMD128-NEXT: v128.store 16($1):p2align=0, $0{{$}} define void @store_v2i64_with_folded_gep_offset(<2 x i64> %v, <2 x i64>* %p) { %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1 store <2 x i64> %v , <2 x i64>* %s @@ -727,7 +727,7 @@ ; SIMD128-NEXT: .functype store_v2i64_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}} define void @store_v2i64_with_unfolded_gep_negative_offset(<2 x i64> %v, <2 x i64>* %p) { %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1 store <2 x i64> %v , <2 x i64>* %s @@ -740,7 +740,7 @@ ; SIMD128-NEXT: .functype store_v2i64_with_unfolded_offset (v128, i32) -> (){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}} define void @store_v2i64_with_unfolded_offset(<2 x i64> %v, <2 x i64>* %p) { %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1 store <2 x i64> %v , <2 x i64>* %s @@ -753,7 +753,7 @@ ; SIMD128-NEXT: .functype store_v2i64_with_unfolded_gep_offset (v128, i32) -> (){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}} define void @store_v2i64_with_unfolded_gep_offset(<2 x i64> %v, <2 x i64>* %p) { %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1 store <2 x i64> %v , <2 x i64>* %s @@ -765,7 +765,7 @@ ; SIMD128-VM-NOT: v128 ; SIMD128-NEXT: .functype store_v2i64_to_numeric_address (v128) -> (){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}} +; SIMD128-NEXT: v128.store 32($pop[[L0]]):p2align=0, $0{{$}} define void @store_v2i64_to_numeric_address(<2 x i64> %v) { %s = inttoptr i32 32 to <2 x i64>* store <2 x i64> %v , <2 x i64>* %s @@ -777,7 +777,7 @@ ; SIMD128-VM-NOT: v128 ; SIMD128-NEXT: .functype store_v2i64_to_global_address (v128) -> (){{$}} ; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.store gv_v2i64($pop[[R]]), $0{{$}} +; SIMD128-NEXT: v128.store gv_v2i64($pop[[R]]):p2align=0, $0{{$}} define void @store_v2i64_to_global_address(<2 x i64> %v) { store <2 x i64> %v , <2 x i64>* @gv_v2i64 ret void @@ -789,7 +789,7 @@ ; CHECK-LABEL: load_v4f32: ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype load_v4f32 (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @load_v4f32(<4 x float>* %p) { %v = load <4 x float>, <4 x float>* %p @@ -799,7 +799,7 @@ ; CHECK-LABEL: load_v4f32_with_folded_offset: ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype load_v4f32_with_folded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @load_v4f32_with_folded_offset(<4 x float>* %p) { %q = ptrtoint <4 x float>* %p to i32 @@ -812,7 +812,7 @@ ; CHECK-LABEL: load_v4f32_with_folded_gep_offset: ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype load_v4f32_with_folded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @load_v4f32_with_folded_gep_offset(<4 x float>* %p) { %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1 @@ -825,7 +825,7 @@ ; SIMD128-NEXT: .functype load_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @load_v4f32_with_unfolded_gep_negative_offset(<4 x float>* %p) { %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1 @@ -838,7 +838,7 @@ ; SIMD128-NEXT: .functype load_v4f32_with_unfolded_offset (i32) -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @load_v4f32_with_unfolded_offset(<4 x float>* %p) { %q = ptrtoint <4 x float>* %p to i32 @@ -853,7 +853,7 @@ ; SIMD128-NEXT: .functype load_v4f32_with_unfolded_gep_offset (i32) -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @load_v4f32_with_unfolded_gep_offset(<4 x float>* %p) { %s = getelementptr <4 x float>, <4 x float>* %p, i32 1 @@ -865,7 +865,7 @@ ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype load_v4f32_from_numeric_address () -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @load_v4f32_from_numeric_address() { %s = inttoptr i32 32 to <4 x float>* @@ -877,7 +877,7 @@ ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype load_v4f32_from_global_address () -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v4f32($pop[[L0]]){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v4f32($pop[[L0]]):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} @gv_v4f32 = global <4 x float> define <4 x float> @load_v4f32_from_global_address() { @@ -888,7 +888,7 @@ ; CHECK-LABEL: store_v4f32: ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype store_v4f32 (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 0($1), $0{{$}} +; SIMD128-NEXT: v128.store 0($1):p2align=0, $0{{$}} define void @store_v4f32(<4 x float> %v, <4 x float>* %p) { store <4 x float> %v , <4 x float>* %p ret void @@ -897,7 +897,7 @@ ; CHECK-LABEL: store_v4f32_with_folded_offset: ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype store_v4f32_with_folded_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 16($1), $0{{$}} +; SIMD128-NEXT: v128.store 16($1):p2align=0, $0{{$}} define void @store_v4f32_with_folded_offset(<4 x float> %v, <4 x float>* %p) { %q = ptrtoint <4 x float>* %p to i32 %r = add nuw i32 %q, 16 @@ -909,7 +909,7 @@ ; CHECK-LABEL: store_v4f32_with_folded_gep_offset: ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype store_v4f32_with_folded_gep_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 16($1), $0{{$}} +; SIMD128-NEXT: v128.store 16($1):p2align=0, $0{{$}} define void @store_v4f32_with_folded_gep_offset(<4 x float> %v, <4 x float>* %p) { %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1 store <4 x float> %v , <4 x float>* %s @@ -921,7 +921,7 @@ ; SIMD128-NEXT: .functype store_v4f32_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}} define void @store_v4f32_with_unfolded_gep_negative_offset(<4 x float> %v, <4 x float>* %p) { %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1 store <4 x float> %v , <4 x float>* %s @@ -933,7 +933,7 @@ ; SIMD128-NEXT: .functype store_v4f32_with_unfolded_offset (v128, i32) -> (){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}} define void @store_v4f32_with_unfolded_offset(<4 x float> %v, <4 x float>* %p) { %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1 store <4 x float> %v , <4 x float>* %s @@ -945,7 +945,7 @@ ; SIMD128-NEXT: .functype store_v4f32_with_unfolded_gep_offset (v128, i32) -> (){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}} define void @store_v4f32_with_unfolded_gep_offset(<4 x float> %v, <4 x float>* %p) { %s = getelementptr <4 x float>, <4 x float>* %p, i32 1 store <4 x float> %v , <4 x float>* %s @@ -956,7 +956,7 @@ ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype store_v4f32_to_numeric_address (v128) -> (){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}} +; SIMD128-NEXT: v128.store 32($pop[[L0]]):p2align=0, $0{{$}} define void @store_v4f32_to_numeric_address(<4 x float> %v) { %s = inttoptr i32 32 to <4 x float>* store <4 x float> %v , <4 x float>* %s @@ -967,7 +967,7 @@ ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .functype store_v4f32_to_global_address (v128) -> (){{$}} ; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.store gv_v4f32($pop[[R]]), $0{{$}} +; SIMD128-NEXT: v128.store gv_v4f32($pop[[R]]):p2align=0, $0{{$}} define void @store_v4f32_to_global_address(<4 x float> %v) { store <4 x float> %v , <4 x float>* @gv_v4f32 ret void @@ -980,7 +980,7 @@ ; NO-SIMD128-NOT: v128 ; SIMD128-VM-NOT: v128 ; SIMD128-NEXT: .functype load_v2f64 (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @load_v2f64(<2 x double>* %p) { %v = load <2 x double>, <2 x double>* %p @@ -991,7 +991,7 @@ ; NO-SIMD128-NOT: v128 ; SIMD128-VM-NOT: v128 ; SIMD128-NEXT: .functype load_v2f64_with_folded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @load_v2f64_with_folded_offset(<2 x double>* %p) { %q = ptrtoint <2 x double>* %p to i32 @@ -1005,7 +1005,7 @@ ; NO-SIMD128-NOT: v128 ; SIMD128-VM-NOT: v128 ; SIMD128-NEXT: .functype load_v2f64_with_folded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @load_v2f64_with_folded_gep_offset(<2 x double>* %p) { %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1 @@ -1019,7 +1019,7 @@ ; SIMD128-NEXT: .functype load_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @load_v2f64_with_unfolded_gep_negative_offset(<2 x double>* %p) { %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1 @@ -1033,7 +1033,7 @@ ; SIMD128-NEXT: .functype load_v2f64_with_unfolded_offset (i32) -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @load_v2f64_with_unfolded_offset(<2 x double>* %p) { %q = ptrtoint <2 x double>* %p to i32 @@ -1049,7 +1049,7 @@ ; SIMD128-NEXT: .functype load_v2f64_with_unfolded_gep_offset (i32) -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @load_v2f64_with_unfolded_gep_offset(<2 x double>* %p) { %s = getelementptr <2 x double>, <2 x double>* %p, i32 1 @@ -1062,7 +1062,7 @@ ; SIMD128-VM-NOT: v128 ; SIMD128-NEXT: .functype load_v2f64_from_numeric_address () -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @load_v2f64_from_numeric_address() { %s = inttoptr i32 32 to <2 x double>* @@ -1075,7 +1075,7 @@ ; SIMD128-VM-NOT: v128 ; SIMD128-NEXT: .functype load_v2f64_from_global_address () -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v2f64($pop[[L0]]){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v2f64($pop[[L0]]):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} @gv_v2f64 = global <2 x double> define <2 x double> @load_v2f64_from_global_address() { @@ -1087,7 +1087,7 @@ ; NO-SIMD128-NOT: v128 ; SIMD128-VM-NOT: v128 ; SIMD128-NEXT: .functype store_v2f64 (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 0($1), $0{{$}} +; SIMD128-NEXT: v128.store 0($1):p2align=0, $0{{$}} define void @store_v2f64(<2 x double> %v, <2 x double>* %p) { store <2 x double> %v , <2 x double>* %p ret void @@ -1097,7 +1097,7 @@ ; NO-SIMD128-NOT: v128 ; SIMD128-VM-NOT: v128 ; SIMD128-NEXT: .functype store_v2f64_with_folded_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 16($1), $0{{$}} +; SIMD128-NEXT: v128.store 16($1):p2align=0, $0{{$}} define void @store_v2f64_with_folded_offset(<2 x double> %v, <2 x double>* %p) { %q = ptrtoint <2 x double>* %p to i32 %r = add nuw i32 %q, 16 @@ -1110,7 +1110,7 @@ ; NO-SIMD128-NOT: v128 ; SIMD128-VM-NOT: v128 ; SIMD128-NEXT: .functype store_v2f64_with_folded_gep_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 16($1), $0{{$}} +; SIMD128-NEXT: v128.store 16($1):p2align=0, $0{{$}} define void @store_v2f64_with_folded_gep_offset(<2 x double> %v, <2 x double>* %p) { %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1 store <2 x double> %v , <2 x double>* %s @@ -1123,7 +1123,7 @@ ; SIMD128-NEXT: .functype store_v2f64_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}} define void @store_v2f64_with_unfolded_gep_negative_offset(<2 x double> %v, <2 x double>* %p) { %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1 store <2 x double> %v , <2 x double>* %s @@ -1136,7 +1136,7 @@ ; SIMD128-NEXT: .functype store_v2f64_with_unfolded_offset (v128, i32) -> (){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}} define void @store_v2f64_with_unfolded_offset(<2 x double> %v, <2 x double>* %p) { %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1 store <2 x double> %v , <2 x double>* %s @@ -1149,7 +1149,7 @@ ; SIMD128-NEXT: .functype store_v2f64_with_unfolded_gep_offset (v128, i32) -> (){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}} define void @store_v2f64_with_unfolded_gep_offset(<2 x double> %v, <2 x double>* %p) { %s = getelementptr <2 x double>, <2 x double>* %p, i32 1 store <2 x double> %v , <2 x double>* %s @@ -1161,7 +1161,7 @@ ; SIMD128-VM-NOT: v128 ; SIMD128-NEXT: .functype store_v2f64_to_numeric_address (v128) -> (){{$}} ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}} +; SIMD128-NEXT: v128.store 32($pop[[L0]]):p2align=0, $0{{$}} define void @store_v2f64_to_numeric_address(<2 x double> %v) { %s = inttoptr i32 32 to <2 x double>* store <2 x double> %v , <2 x double>* %s @@ -1173,7 +1173,7 @@ ; SIMD128-VM-NOT: v128 ; SIMD128-NEXT: .functype store_v2f64_to_global_address (v128) -> (){{$}} ; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.store gv_v2f64($pop[[R]]), $0{{$}} +; SIMD128-NEXT: v128.store gv_v2f64($pop[[R]]):p2align=0, $0{{$}} define void @store_v2f64_to_global_address(<2 x double> %v) { store <2 x double> %v , <2 x double>* @gv_v2f64 ret void diff --git a/llvm/test/CodeGen/WebAssembly/simd.ll b/llvm/test/CodeGen/WebAssembly/simd.ll --- a/llvm/test/CodeGen/WebAssembly/simd.ll +++ b/llvm/test/CodeGen/WebAssembly/simd.ll @@ -60,7 +60,7 @@ ; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16 ; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] ; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $2=, $pop[[L2]] -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0 +; SIMD128-NEXT: v128.store 0($pop[[L3]]):p2align=0, $0 ; SIMD128-NEXT: i32.const $push[[L4:[0-9]+]]=, 15 ; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L4]] ; SIMD128-NEXT: i32.or $push[[L6:[0-9]+]]=, $2, $pop[[L5]] @@ -102,7 +102,7 @@ ; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} ; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $2=, $pop[[L2]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[L3]]):p2align=0, $0{{$}} ; SIMD128-NEXT: i32.const $push[[L4:[0-9]+]]=, 15{{$}} ; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L4]]{{$}} ; SIMD128-NEXT: i32.or $push[[L6:[0-9]+]]=, $2, $pop[[L5]]{{$}} @@ -143,7 +143,7 @@ ; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} ; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $2=, $pop[[L2]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[L3]]):p2align=0, $0{{$}} ; SIMD128-NEXT: i32.const $push[[L4:[0-9]+]]=, 15{{$}} ; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L4]]{{$}} ; SIMD128-NEXT: i32.or $push[[L6:[0-9]+]]=, $2, $pop[[L5]]{{$}} @@ -181,12 +181,12 @@ ; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} ; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $3=, $pop[[L2]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[L3]]):p2align=0, $0{{$}} ; SIMD128-NEXT: i32.const $push[[L4:[0-9]+]]=, 15{{$}} ; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L4]]{{$}} ; SIMD128-NEXT: i32.or $push[[L6:[0-9]+]]=, $3, $pop[[L5]]{{$}} ; SIMD128-NEXT: i32.store8 0($pop[[L6]]), $2{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($3){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($3):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @replace_var_v16i8(<16 x i8> %v, i32 %i, i8 %x) { %res = insertelement <16 x i8> %v, i8 %x, i32 %i @@ -323,7 +323,7 @@ ; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} ; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $2=, $pop[[L2]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[L3]]):p2align=0, $0{{$}} ; SIMD128-NEXT: i32.const $push[[L4:[0-9]+]]=, 7{{$}} ; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L4]]{{$}} ; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 1{{$}} @@ -367,7 +367,7 @@ ; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} ; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $2=, $pop[[L2]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[L3]]):p2align=0, $0{{$}} ; SIMD128-NEXT: i32.const $push[[L4:[0-9]+]]=, 7{{$}} ; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L4]]{{$}} ; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 1{{$}} @@ -410,7 +410,7 @@ ; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} ; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $2=, $pop[[L2]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[L3]]):p2align=0, $0{{$}} ; SIMD128-NEXT: i32.const $push[[L4:[0-9]+]]=, 7{{$}} ; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L4]]{{$}} ; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 1{{$}} @@ -450,14 +450,14 @@ ; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} ; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $3=, $pop[[L2]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[L3]]):p2align=0, $0{{$}} ; SIMD128-NEXT: i32.const $push[[L4:[0-9]+]]=, 7{{$}} ; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L4]]{{$}} ; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 1{{$}} ; SIMD128-NEXT: i32.shl $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} ; SIMD128-NEXT: i32.or $push[[L8:[0-9]+]]=, $3, $pop[[L7]]{{$}} ; SIMD128-NEXT: i32.store16 0($pop[[L8]]), $2{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($3){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($3):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @replace_var_v8i16(<8 x i16> %v, i32 %i, i16 %x) { %res = insertelement <8 x i16> %v, i16 %x, i32 %i @@ -571,7 +571,7 @@ ; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} ; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $2=, $pop[[L2]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[L3]]):p2align=0, $0{{$}} ; SIMD128-NEXT: i32.const $push[[L4:[0-9]+]]=, 3{{$}} ; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L4]]{{$}} ; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 2{{$}} @@ -611,14 +611,14 @@ ; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} ; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $3=, $pop[[L2]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[L3]]):p2align=0, $0{{$}} ; SIMD128-NEXT: i32.const $push[[L4:[0-9]+]]=, 3{{$}} ; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L4]]{{$}} ; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 2{{$}} ; SIMD128-NEXT: i32.shl $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} ; SIMD128-NEXT: i32.or $push[[L4:[0-9]+]]=, $3, $pop[[L7]]{{$}} ; SIMD128-NEXT: i32.store 0($pop[[L4]]), $2{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($3){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($3):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @replace_var_v4i32(<4 x i32> %v, i32 %i, i32 %x) { %res = insertelement <4 x i32> %v, i32 %x, i32 %i @@ -724,7 +724,7 @@ ; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} ; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $2=, $pop[[L2]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[L3]]):p2align=0, $0{{$}} ; SIMD128-NEXT: i32.const $push[[L2:[0-9]+]]=, 1{{$}} ; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L2]]{{$}} ; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 3{{$}} @@ -767,14 +767,14 @@ ; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} ; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $3=, $pop[[L2]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[L3]]):p2align=0, $0{{$}} ; SIMD128-NEXT: i32.const $push[[L2:[0-9]+]]=, 1{{$}} ; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L2]]{{$}} ; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 3{{$}} ; SIMD128-NEXT: i32.shl $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} ; SIMD128-NEXT: i32.or $push[[L2:[0-9]+]]=, $3, $pop[[L7]]{{$}} ; SIMD128-NEXT: i64.store 0($pop[[L2]]), $2{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($3){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($3):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @replace_var_v2i64(<2 x i64> %v, i32 %i, i64 %x) { %res = insertelement <2 x i64> %v, i64 %x, i32 %i @@ -877,7 +877,7 @@ ; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} ; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $2=, $pop[[L2]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[L3]]):p2align=0, $0{{$}} ; SIMD128-NEXT: i32.const $push[[L2:[0-9]+]]=, 3{{$}} ; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L2]]{{$}} ; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 2{{$}} @@ -917,14 +917,14 @@ ; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} ; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $3=, $pop[[L2]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[L3]]):p2align=0, $0{{$}} ; SIMD128-NEXT: i32.const $push[[L2:[0-9]+]]=, 3{{$}} ; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L2]]{{$}} ; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 2{{$}} ; SIMD128-NEXT: i32.shl $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} ; SIMD128-NEXT: i32.or $push[[L2:[0-9]+]]=, $3, $pop[[L7]]{{$}} ; SIMD128-NEXT: f32.store 0($pop[[L2]]), $2{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($3){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($3):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @replace_var_v4f32(<4 x float> %v, i32 %i, float %x) { %res = insertelement <4 x float> %v, float %x, i32 %i @@ -1029,7 +1029,7 @@ ; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} ; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $2=, $pop[[L2]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[L3]]):p2align=0, $0{{$}} ; SIMD128-NEXT: i32.const $push[[L2:[0-9]+]]=, 1{{$}} ; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L2]]{{$}} ; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 3{{$}} @@ -1072,14 +1072,14 @@ ; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} ; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} ; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $3=, $pop[[L2]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} +; SIMD128-NEXT: v128.store 0($pop[[L3]]):p2align=0, $0{{$}} ; SIMD128-NEXT: i32.const $push[[L2:[0-9]+]]=, 1{{$}} ; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L2]]{{$}} ; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 3{{$}} ; SIMD128-NEXT: i32.shl $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} ; SIMD128-NEXT: i32.or $push[[L2:[0-9]+]]=, $3, $pop[[L7]]{{$}} ; SIMD128-NEXT: f64.store 0($pop[[L2]]), $2{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($3){{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($3):p2align=0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @replace_var_v2f64(<2 x double> %v, i32 %i, double %x) { %res = insertelement <2 x double> %v, double %x, i32 %i