diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -66,8 +66,8 @@ bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector &OutOps) override; - bool SelectLoadOperands32(SDValue Op, SDValue &Offset, SDValue &Addr); - bool SelectLoadOperands64(SDValue Op, SDValue &Offset, SDValue &Addr); + bool SelectAddrOperands32(SDValue Op, SDValue &Offset, SDValue &Addr); + bool SelectAddrOperands64(SDValue Op, SDValue &Offset, SDValue &Addr); // Include the pieces autogenerated from the target description. #include "WebAssemblyGenDAGISel.inc" @@ -75,9 +75,9 @@ private: // add select functions here... - bool SelectLoadOperands(MVT AddrType, unsigned ConstOpc, SDValue Op, + bool SelectAddrOperands(MVT AddrType, unsigned ConstOpc, SDValue Op, SDValue &Offset, SDValue &Addr); - bool SelectLoadAddOperands(MVT OffsetType, SDValue N, SDValue &Offset, + bool SelectAddrAddOperands(MVT OffsetType, SDValue N, SDValue &Offset, SDValue &Addr); }; } // end anonymous namespace @@ -288,7 +288,7 @@ return true; } -bool WebAssemblyDAGToDAGISel::SelectLoadAddOperands(MVT OffsetType, SDValue N, +bool WebAssemblyDAGToDAGISel::SelectAddrAddOperands(MVT OffsetType, SDValue N, SDValue &Offset, SDValue &Addr) { assert(N.getNumOperands() == 2 && "Attempting to fold in a non-binary op"); @@ -314,7 +314,7 @@ return false; } -bool WebAssemblyDAGToDAGISel::SelectLoadOperands(MVT AddrType, unsigned ConstOpc, +bool WebAssemblyDAGToDAGISel::SelectAddrOperands(MVT AddrType, unsigned ConstOpc, SDValue N, SDValue &Offset, SDValue &Addr) { SDLoc DL(N); @@ -337,7 +337,7 @@ // Fold anything inside an add into the offset. if (N.getOpcode() == ISD::ADD && - SelectLoadAddOperands(AddrType, N, Offset, Addr)) + SelectAddrAddOperands(AddrType, N, Offset, Addr)) return true; // Likewise, treat an 'or' node as an 'add' if the or'ed bits are known to be @@ -353,7 +353,7 @@ OrIsAdd = (~Known0.Zero & ~Known1.Zero) == 0; } - if (OrIsAdd && SelectLoadAddOperands(AddrType, N, Offset, Addr)) + if (OrIsAdd && SelectAddrAddOperands(AddrType, N, Offset, Addr)) return true; } @@ -367,20 +367,20 @@ return true; } - // Else it's a plain old load with no offset. + // Else it's a plain old load/store with no offset. Offset = CurDAG->getTargetConstant(0, DL, AddrType); Addr = N; return true; } -bool WebAssemblyDAGToDAGISel::SelectLoadOperands32(SDValue Op, SDValue &Offset, +bool WebAssemblyDAGToDAGISel::SelectAddrOperands32(SDValue Op, SDValue &Offset, SDValue &Addr) { - return SelectLoadOperands(MVT::i32, WebAssembly::CONST_I32, Op, Offset, Addr); + return SelectAddrOperands(MVT::i32, WebAssembly::CONST_I32, Op, Offset, Addr); } -bool WebAssemblyDAGToDAGISel::SelectLoadOperands64(SDValue Op, SDValue &Offset, +bool WebAssemblyDAGToDAGISel::SelectAddrOperands64(SDValue Op, SDValue &Offset, SDValue &Addr) { - return SelectLoadOperands(MVT::i64, WebAssembly::CONST_I64, Op, Offset, Addr); + return SelectAddrOperands(MVT::i64, WebAssembly::CONST_I64, Op, Offset, Addr); } /// This pass converts a legalized DAG into a WebAssembly-specific DAG, ready diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td @@ -11,6 +11,27 @@ /// //===----------------------------------------------------------------------===// +// WebAssembly constant offsets are performed as unsigned with infinite +// precision, so we need to check for NoUnsignedWrap so that we don't fold an +// offset for an add that needs wrapping. +def regPlusImm : PatFrag<(ops node:$addr, node:$off), + (add node:$addr, node:$off), + [{ return N->getFlags().hasNoUnsignedWrap(); }]>; + + +// Treat an 'or' node as an 'add' if the or'ed bits are known to be zero. +def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ + if (ConstantSDNode *CN = dyn_cast(N->getOperand(1))) + return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); + + KnownBits Known0 = CurDAG->computeKnownBits(N->getOperand(0), 0); + KnownBits Known1 = CurDAG->computeKnownBits(N->getOperand(1), 0); + return (~Known0.Zero & ~Known1.Zero) == 0; +}]>; + +// We don't need a regPlusES because external symbols never have constant +// offsets folded into them, so we can just use add. + let UseNamedOperandTable = 1 in multiclass ATOMIC_I pattern_r, string asmstr_r, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -19,26 +19,6 @@ // local types. These memory-only types instead zero- or sign-extend into local // types when loading, and truncate when storing. -// WebAssembly constant offsets are performed as unsigned with infinite -// precision, so we need to check for NoUnsignedWrap so that we don't fold an -// offset for an add that needs wrapping. -def regPlusImm : PatFrag<(ops node:$addr, node:$off), - (add node:$addr, node:$off), - [{ return N->getFlags().hasNoUnsignedWrap(); }]>; - -// Treat an 'or' node as an 'add' if the or'ed bits are known to be zero. -def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ - if (ConstantSDNode *CN = dyn_cast(N->getOperand(1))) - return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); - - KnownBits Known0 = CurDAG->computeKnownBits(N->getOperand(0), 0); - KnownBits Known1 = CurDAG->computeKnownBits(N->getOperand(1), 0); - return (~Known0.Zero & ~Known1.Zero) == 0; -}]>; - -// We don't need a regPlusES because external symbols never have constant -// offsets folded into them, so we can just use add. - // Defines atomic and non-atomic loads, regular and extending. multiclass WebAssemblyLoad reqs = []> { @@ -81,30 +61,29 @@ // Pattern matching // Patterns that match the static (offset) and dynamic (address stack -// operand) operands for loads, based on a combination of target -// global addresses and constants. +// operand) operands for loads and stores, based on a combination of +// target global addresses and constants. // For example, // (add tga x) -> load tga(x) // tga -> load tga(0) // (add const x) -> load const(x) // const -> load const(0) // x -> load 0(x) -def LoadOps32 : ComplexPattern; -def LoadOps64 : ComplexPattern; +def AddrOps32 : ComplexPattern; +def AddrOps64 : ComplexPattern; multiclass LoadPat { - def : Pat<(ty (kind (LoadOps32 offset32_op:$offset, I32:$addr))), + def : Pat<(ty (kind (AddrOps32 offset32_op:$offset, I32:$addr))), (!cast(Name # "_A32") 0, offset32_op:$offset, I32:$addr)>, Requires<[HasAddr32]>; - def : Pat<(ty (kind (LoadOps64 offset64_op:$offset, I64:$addr))), + def : Pat<(ty (kind (AddrOps64 offset64_op:$offset, I64:$addr))), (!cast(Name # "_A64") 0, offset64_op:$offset, I64:$addr)>, Requires<[HasAddr64]>; - } defm : LoadPat; @@ -158,69 +137,25 @@ defm STORE_F32 : WebAssemblyStore; defm STORE_F64 : WebAssemblyStore; -// Select stores with no constant offset. -multiclass StorePatNoOffset { - def : Pat<(node ty:$val, I32:$addr), - (!cast(inst # "_A32") 0, 0, I32:$addr, ty:$val)>, - Requires<[HasAddr32]>; - def : Pat<(node ty:$val, I64:$addr), - (!cast(inst # "_A64") 0, 0, I64:$addr, ty:$val)>, - Requires<[HasAddr64]>; -} - -defm : StorePatNoOffset; -defm : StorePatNoOffset; -defm : StorePatNoOffset; -defm : StorePatNoOffset; - -// Select stores with a constant offset. -multiclass StorePatImmOff { - def : Pat<(kind ty:$val, (operand I32:$addr, imm:$off)), - (!cast(inst # "_A32") 0, imm:$off, I32:$addr, ty:$val)>, - Requires<[HasAddr32]>; - def : Pat<(kind ty:$val, (operand I64:$addr, imm:$off)), - (!cast(inst # "_A64") 0, imm:$off, I64:$addr, ty:$val)>, - Requires<[HasAddr64]>; +multiclass StorePat { + def : Pat<(kind ty:$val, (AddrOps32 offset32_op:$offset, I32:$addr)), + (!cast(Name # "_A32") 0, + offset32_op:$offset, + I32:$addr, + ty:$val)>, + Requires<[HasAddr32]>; + def : Pat<(kind ty:$val, (AddrOps64 offset64_op:$offset, I64:$addr)), + (!cast(Name # "_A64") 0, + offset64_op:$offset, + I64:$addr, + ty:$val)>, + Requires<[HasAddr64]>; } -defm : StorePatImmOff; -defm : StorePatImmOff; -defm : StorePatImmOff; -defm : StorePatImmOff; -defm : StorePatImmOff; -defm : StorePatImmOff; -defm : StorePatImmOff; -defm : StorePatImmOff; - -// Select stores with just a constant offset. -multiclass StorePatOffsetOnly { - def : Pat<(kind ty:$val, imm:$off), - (!cast(inst # "_A32") 0, imm:$off, (CONST_I32 0), ty:$val)>, - Requires<[HasAddr32]>; - def : Pat<(kind ty:$val, imm:$off), - (!cast(inst # "_A64") 0, imm:$off, (CONST_I64 0), ty:$val)>, - Requires<[HasAddr64]>; -} -defm : StorePatOffsetOnly; -defm : StorePatOffsetOnly; -defm : StorePatOffsetOnly; -defm : StorePatOffsetOnly; - -multiclass StorePatGlobalAddrOffOnly { - def : Pat<(kind ty:$val, (WebAssemblyWrapper tglobaladdr:$off)), - (!cast(inst # "_A32") 0, tglobaladdr:$off, (CONST_I32 0), - ty:$val)>, - Requires<[IsNotPIC, HasAddr32]>; - def : Pat<(kind ty:$val, (WebAssemblyWrapper tglobaladdr:$off)), - (!cast(inst # "_A64") 0, tglobaladdr:$off, (CONST_I64 0), - ty:$val)>, - Requires<[IsNotPIC, HasAddr64]>; -} -defm : StorePatGlobalAddrOffOnly; -defm : StorePatGlobalAddrOffOnly; -defm : StorePatGlobalAddrOffOnly; -defm : StorePatGlobalAddrOffOnly; +defm : StorePat; +defm : StorePat; +defm : StorePat; +defm : StorePat; // Truncating store. defm STORE8_I32 : WebAssemblyStore; @@ -229,36 +164,11 @@ defm STORE16_I64 : WebAssemblyStore; defm STORE32_I64 : WebAssemblyStore; -// Select truncating stores with no constant offset. -defm : StorePatNoOffset; -defm : StorePatNoOffset; -defm : StorePatNoOffset; -defm : StorePatNoOffset; -defm : StorePatNoOffset; - -// Select truncating stores with a constant offset. -defm : StorePatImmOff; -defm : StorePatImmOff; -defm : StorePatImmOff; -defm : StorePatImmOff; -defm : StorePatImmOff; -defm : StorePatImmOff; -defm : StorePatImmOff; -defm : StorePatImmOff; -defm : StorePatImmOff; -defm : StorePatImmOff; - -// Select truncating stores with just a constant offset. -defm : StorePatOffsetOnly; -defm : StorePatOffsetOnly; -defm : StorePatOffsetOnly; -defm : StorePatOffsetOnly; -defm : StorePatOffsetOnly; -defm : StorePatGlobalAddrOffOnly; -defm : StorePatGlobalAddrOffOnly; -defm : StorePatGlobalAddrOffOnly; -defm : StorePatGlobalAddrOffOnly; -defm : StorePatGlobalAddrOffOnly; +defm : StorePat; +defm : StorePat; +defm : StorePat; +defm : StorePat; +defm : StorePat; multiclass MemoryOps { // Current memory size. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -366,11 +366,7 @@ // Def store patterns from WebAssemblyInstrMemory.td for vector types foreach vec = AllVecs in { -defm : StorePatNoOffset; -defm : StorePatImmOff; -defm : StorePatImmOff; -defm : StorePatOffsetOnly; -defm : StorePatGlobalAddrOffOnly; +defm : StorePat; } // Store lane @@ -399,13 +395,16 @@ defm "" : SIMDStoreLane; defm "" : SIMDStoreLane; -// Select stores with no constant offset. -multiclass StoreLanePatNoOffset { - def : Pat<(kind (i32 I32:$addr), (vec.vt V128:$vec), (i32 vec.lane_idx:$idx)), - (!cast("STORE_LANE_"#vec#"_A32") 0, 0, imm:$idx, $addr, $vec)>, +multiclass StoreLanePat { + def : Pat<(kind (AddrOps32 offset32_op:$offset, I32:$addr), + (vec.vt V128:$vec), + (i32 vec.lane_idx:$idx)), + (!cast("STORE_LANE_"#vec#"_A32") 0, $offset, imm:$idx, $addr, $vec)>, Requires<[HasAddr32]>; - def : Pat<(kind (i64 I64:$addr), (vec.vt V128:$vec), (i32 vec.lane_idx:$idx)), - (!cast("STORE_LANE_"#vec#"_A64") 0, 0, imm:$idx, $addr, $vec)>, + def : Pat<(kind (AddrOps64 offset64_op:$offset, I64:$addr), + (vec.vt V128:$vec), + (i32 vec.lane_idx:$idx)), + (!cast("STORE_LANE_"#vec#"_A64") 0, $offset, imm:$idx, $addr, $vec)>, Requires<[HasAddr64]>; } @@ -424,10 +423,10 @@ // TODO: floating point lanes as well let AddedComplexity = 1 in { -defm : StoreLanePatNoOffset; -defm : StoreLanePatNoOffset; -defm : StoreLanePatNoOffset; -defm : StoreLanePatNoOffset; +defm : StoreLanePat; +defm : StoreLanePat; +defm : StoreLanePat; +defm : StoreLanePat; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/WebAssembly/simd-load-lane-offset.ll b/llvm/test/CodeGen/WebAssembly/simd-load-lane-offset.ll --- a/llvm/test/CodeGen/WebAssembly/simd-load-lane-offset.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-load-lane-offset.ll @@ -155,10 +155,8 @@ ; CHECK: .functype store_lane_i8_with_folded_offset (v128, i32) -> () ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 24 -; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store8_lane 0, 0 +; CHECK-NEXT: v128.store8_lane 24, 0 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint i8* %p to i32 %r = add nuw i32 %q, 24 @@ -173,10 +171,8 @@ ; CHECK: .functype store_lane_i8_with_folded_gep_offset (v128, i32) -> () ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 6 -; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store8_lane 0, 0 +; CHECK-NEXT: v128.store8_lane 6, 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i8, i8* %p, i32 6 %x = extractelement <16 x i8> %v, i32 0 @@ -238,9 +234,9 @@ ; CHECK-LABEL: store_lane_i8_to_numeric_address: ; CHECK: .functype store_lane_i8_to_numeric_address (v128) -> () ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i32.const 42 +; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store8_lane 0, 0 +; CHECK-NEXT: v128.store8_lane 42, 0 ; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 42 to i8* %x = extractelement <16 x i8> %v, i32 0 @@ -252,9 +248,9 @@ ; CHECK-LABEL: store_lane_i8_from_global_address: ; CHECK: .functype store_lane_i8_from_global_address (v128) -> () ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i32.const gv_i8 +; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store8_lane 0, 0 +; CHECK-NEXT: v128.store8_lane gv_i8, 0 ; CHECK-NEXT: # fallthrough-return %x = extractelement <16 x i8> %v, i32 0 store i8 %x, i8* @gv_i8 @@ -408,10 +404,8 @@ ; CHECK: .functype store_lane_i16_with_folded_offset (v128, i32) -> () ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 24 -; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store16_lane 0, 0 +; CHECK-NEXT: v128.store16_lane 24, 0 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint i16* %p to i32 %r = add nuw i32 %q, 24 @@ -426,10 +420,8 @@ ; CHECK: .functype store_lane_i16_with_folded_gep_offset (v128, i32) -> () ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 12 -; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store16_lane 0, 0 +; CHECK-NEXT: v128.store16_lane 12, 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i16, i16* %p, i32 6 %x = extractelement <8 x i16> %v, i32 0 @@ -491,9 +483,9 @@ ; CHECK-LABEL: store_lane_i16_to_numeric_address: ; CHECK: .functype store_lane_i16_to_numeric_address (v128) -> () ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i32.const 42 +; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store16_lane 0, 0 +; CHECK-NEXT: v128.store16_lane 42, 0 ; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 42 to i16* %x = extractelement <8 x i16> %v, i32 0 @@ -505,9 +497,9 @@ ; CHECK-LABEL: store_lane_i16_from_global_address: ; CHECK: .functype store_lane_i16_from_global_address (v128) -> () ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i32.const gv_i16 +; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store16_lane 0, 0 +; CHECK-NEXT: v128.store16_lane gv_i16, 0 ; CHECK-NEXT: # fallthrough-return %x = extractelement <8 x i16> %v, i32 0 store i16 %x, i16* @gv_i16 @@ -661,10 +653,8 @@ ; CHECK: .functype store_lane_i32_with_folded_offset (v128, i32) -> () ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 24 -; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store32_lane 0, 0 +; CHECK-NEXT: v128.store32_lane 24, 0 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint i32* %p to i32 %r = add nuw i32 %q, 24 @@ -679,10 +669,8 @@ ; CHECK: .functype store_lane_i32_with_folded_gep_offset (v128, i32) -> () ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 24 -; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store32_lane 0, 0 +; CHECK-NEXT: v128.store32_lane 24, 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i32, i32* %p, i32 6 %x = extractelement <4 x i32> %v, i32 0 @@ -744,9 +732,9 @@ ; CHECK-LABEL: store_lane_i32_to_numeric_address: ; CHECK: .functype store_lane_i32_to_numeric_address (v128) -> () ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i32.const 42 +; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store32_lane 0, 0 +; CHECK-NEXT: v128.store32_lane 42, 0 ; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 42 to i32* %x = extractelement <4 x i32> %v, i32 0 @@ -758,9 +746,9 @@ ; CHECK-LABEL: store_lane_i32_from_global_address: ; CHECK: .functype store_lane_i32_from_global_address (v128) -> () ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i32.const gv_i32 +; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store32_lane 0, 0 +; CHECK-NEXT: v128.store32_lane gv_i32, 0 ; CHECK-NEXT: # fallthrough-return %x = extractelement <4 x i32> %v, i32 0 store i32 %x, i32* @gv_i32 @@ -914,10 +902,8 @@ ; CHECK: .functype store_lane_i64_with_folded_offset (v128, i32) -> () ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 24 -; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store64_lane 0, 0 +; CHECK-NEXT: v128.store64_lane 24, 0 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint i64* %p to i32 %r = add nuw i32 %q, 24 @@ -932,10 +918,8 @@ ; CHECK: .functype store_lane_i64_with_folded_gep_offset (v128, i32) -> () ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 48 -; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store64_lane 0, 0 +; CHECK-NEXT: v128.store64_lane 48, 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i64, i64* %p, i32 6 %x = extractelement <2 x i64> %v, i32 0 @@ -997,9 +981,9 @@ ; CHECK-LABEL: store_lane_i64_to_numeric_address: ; CHECK: .functype store_lane_i64_to_numeric_address (v128) -> () ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i32.const 42 +; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store64_lane 0, 0 +; CHECK-NEXT: v128.store64_lane 42, 0 ; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 42 to i64* %x = extractelement <2 x i64> %v, i32 0 @@ -1011,9 +995,9 @@ ; CHECK-LABEL: store_lane_i64_from_global_address: ; CHECK: .functype store_lane_i64_from_global_address (v128) -> () ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i32.const gv_i64 +; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store64_lane 0, 0 +; CHECK-NEXT: v128.store64_lane gv_i64, 0 ; CHECK-NEXT: # fallthrough-return %x = extractelement <2 x i64> %v, i32 0 store i64 %x, i64* @gv_i64 diff --git a/llvm/test/CodeGen/WebAssembly/simd-offset.ll b/llvm/test/CodeGen/WebAssembly/simd-offset.ll --- a/llvm/test/CodeGen/WebAssembly/simd-offset.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-offset.ll @@ -949,10 +949,8 @@ ; CHECK: .functype store_narrowing_v8i16_with_folded_offset (v128, i32) -> () ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 16 -; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store64_lane 0, 0 +; CHECK-NEXT: v128.store64_lane 16, 0 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint <8 x i8>* %p to i32 %r = add nuw i32 %q, 16 @@ -979,10 +977,8 @@ ; CHECK: .functype store_narrowing_v8i16_with_folded_gep_offset (v128, i32) -> () ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 8 -; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store64_lane 0, 0 +; CHECK-NEXT: v128.store64_lane 8, 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1 store <8 x i8> %v , <8 x i8>* %s @@ -1100,9 +1096,9 @@ ; CHECK-LABEL: store_narrowing_v8i16_to_numeric_address: ; CHECK: .functype store_narrowing_v8i16_to_numeric_address (v128, i32) -> () ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i32.const 32 +; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store64_lane 0, 0 +; CHECK-NEXT: v128.store64_lane 32, 0 ; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to <8 x i8>* store <8 x i8> %v , <8 x i8>* %s @@ -1125,9 +1121,9 @@ ; CHECK-LABEL: store_narrowing_v8i16_to_global_address: ; CHECK: .functype store_narrowing_v8i16_to_global_address (v128) -> () ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i32.const gv_v8i8 +; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store64_lane 0, 0 +; CHECK-NEXT: v128.store64_lane gv_v8i8, 0 ; CHECK-NEXT: # fallthrough-return store <8 x i8> %v , <8 x i8>* @gv_v8i8 ret void @@ -1417,11 +1413,11 @@ ; CHECK-LABEL: load_zext_v4i8_to_v4i32_with_folded_gep_offset: ; CHECK: .functype load_zext_v4i8_to_v4i32_with_folded_gep_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.load32_zero 4 -; CHECK-NEXT: i8x16.shuffle 16, 1, 2, 3, 17, 5, 6, 7, 18, 9, 10, 11, 19, 13, 14, 15 -; CHECK-NEXT: # fallthrough-return +; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load32_zero 4 +; CHECK-NEXT: i8x16.shuffle 16, 1, 2, 3, 17, 5, 6, 7, 18, 9, 10, 11, 19, 13, 14, 15 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <4 x i8>, <4 x i8>* %p, i32 1 %v = load <4 x i8>, <4 x i8>* %s %v2 = zext <4 x i8> %v to <4 x i32> @@ -2032,10 +2028,8 @@ ; CHECK: .functype store_narrowing_v4i32_with_folded_offset (v128, i32) -> () ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 16 -; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store64_lane 0, 0 +; CHECK-NEXT: v128.store64_lane 16, 0 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint <4 x i16>* %p to i32 %r = add nuw i32 %q, 16 @@ -2062,10 +2056,8 @@ ; CHECK: .functype store_narrowing_v4i32_with_folded_gep_offset (v128, i32) -> () ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 8 -; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store64_lane 0, 0 +; CHECK-NEXT: v128.store64_lane 8, 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1 store <4 x i16> %v , <4 x i16>* %s @@ -2183,9 +2175,9 @@ ; CHECK-LABEL: store_narrowing_v4i32_to_numeric_address: ; CHECK: .functype store_narrowing_v4i32_to_numeric_address (v128) -> () ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i32.const 32 +; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store64_lane 0, 0 +; CHECK-NEXT: v128.store64_lane 32, 0 ; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to <4 x i16>* store <4 x i16> %v , <4 x i16>* %s @@ -2208,9 +2200,9 @@ ; CHECK-LABEL: store_narrowing_v4i32_to_global_address: ; CHECK: .functype store_narrowing_v4i32_to_global_address (v128) -> () ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i32.const gv_v4i16 +; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store64_lane 0, 0 +; CHECK-NEXT: v128.store64_lane gv_v4i16, 0 ; CHECK-NEXT: # fallthrough-return store <4 x i16> %v , <4 x i16>* @gv_v4i16 ret void