diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -246,6 +246,9 @@ setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal); setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal); } + // And some truncating stores are legal as well + setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal); + setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal); } // Don't do anything clever with build_pairs diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -885,6 +885,12 @@ defm "" : SIMDConvert; defm "" : SIMDConvert; +// Lower llvm.wasm.trunc.saturate.* to saturating instructions +def : Pat<(v4i32 (int_wasm_trunc_saturate_signed (v4f32 V128:$src))), + (fp_to_sint_v4i32_v4f32 (v4f32 V128:$src))>; +def : Pat<(v4i32 (int_wasm_trunc_saturate_unsigned (v4f32 V128:$src))), + (fp_to_uint_v4i32_v4f32 (v4f32 V128:$src))>; + // Widening operations multiclass SIMDWiden baseInst> { @@ -921,11 +927,95 @@ defm "" : SIMDNarrow; defm "" : SIMDNarrow; -// Lower llvm.wasm.trunc.saturate.* to saturating instructions -def : Pat<(v4i32 (int_wasm_trunc_saturate_signed (v4f32 V128:$src))), - (fp_to_sint_v4i32_v4f32 (v4f32 V128:$src))>; -def : Pat<(v4i32 (int_wasm_trunc_saturate_unsigned (v4f32 V128:$src))), - (fp_to_uint_v4i32_v4f32 (v4f32 V128:$src))>; +// Use narrowing operations for truncating stores. Since the narrowing +// operations are saturating instead of truncating, we need to mask +// the stored values first. +// TODO: Use consts instead of splats +def store_v8i8_trunc_v8i16 : + OutPatFrag<(ops node:$val), + (EXTRACT_LANE_v2i64 + (NARROW_U_v16i8 + (AND_v4i32 (SPLAT_v4i32 (CONST_I32 0x00ff00ff)), node:$val), + node:$val + ), + 0 + )>; + +def store_v4i16_trunc_v4i32 : + OutPatFrag<(ops node:$val), + (EXTRACT_LANE_v2i64 + (NARROW_U_v8i16 + (AND_v4i32 (SPLAT_v4i32 (CONST_I32 0x0000ffff)), node:$val), + node:$val + ), + 0 + )>; + +// Store patterns adapted from WebAssemblyInstrMemory.td +multiclass NarrowingStorePatNoOffset { + def : Pat<(node ty:$val, I32:$addr), + (STORE_I64_A32 0, 0, I32:$addr, (i64 (out ty:$val)))>, + Requires<[HasAddr32]>; + def : Pat<(node ty:$val, I64:$addr), + (STORE_I64_A64 0, 0, I64:$addr, (i64 (out ty:$val)))>, + Requires<[HasAddr64]>; +} + +defm : NarrowingStorePatNoOffset; +defm : NarrowingStorePatNoOffset; + +multiclass NarrowingStorePatImmOff { + def : Pat<(kind ty:$val, (operand I32:$addr, imm:$off)), + (STORE_I64_A32 0, imm:$off, I32:$addr, (i64 (out ty:$val)))>, + Requires<[HasAddr32]>; + def : Pat<(kind ty:$val, (operand I64:$addr, imm:$off)), + (STORE_I64_A64 0, imm:$off, I64:$addr, (i64 (out ty:$val)))>, + Requires<[HasAddr64]>; +} + +defm : NarrowingStorePatImmOff; +defm : NarrowingStorePatImmOff; +defm : NarrowingStorePatImmOff; +defm : NarrowingStorePatImmOff; + +multiclass NarrowingStorePatOffsetOnly { + def : Pat<(kind ty:$val, imm:$off), + (STORE_I64_A32 0, imm:$off, (CONST_I32 0), (i64 (out ty:$val)))>, + Requires<[HasAddr32]>; + def : Pat<(kind ty:$val, imm:$off), + (STORE_I64_A64 0, imm:$off, (CONST_I64 0), (i64 (out ty:$val)))>, + Requires<[HasAddr64]>; +} + +defm : NarrowingStorePatOffsetOnly; +defm : NarrowingStorePatOffsetOnly; + +multiclass NarrowingStorePatGlobalAddrOffOnly { + def : Pat<(kind ty:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE_I64_A32 + 0, tglobaladdr:$off, (CONST_I32 0), (i64 (out ty:$val)))>, + Requires<[IsNotPIC, HasAddr32]>; + def : Pat<(kind ty:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE_I64_A64 + 0, tglobaladdr:$off, (CONST_I64 0), (i64 (out ty:$val)))>, + Requires<[IsNotPIC, HasAddr64]>; +} + +defm : NarrowingStorePatGlobalAddrOffOnly; +defm : NarrowingStorePatGlobalAddrOffOnly; // Bitcasts are nops // Matching bitcast t1 to t1 causes strange errors, so avoid repeating types diff --git a/llvm/test/CodeGen/WebAssembly/simd-offset.ll b/llvm/test/CodeGen/WebAssembly/simd-offset.ll --- a/llvm/test/CodeGen/WebAssembly/simd-offset.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-offset.ll @@ -918,6 +918,24 @@ ret void } +define void @store_narrowing_v8i16(<8 x i8> %v, <8 x i8>* %p) { +; CHECK-LABEL: store_narrowing_v8i16: +; CHECK: .functype store_narrowing_v8i16 (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const 16711935 +; CHECK-NEXT: i32x4.splat +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i8x16.narrow_i16x8_u +; CHECK-NEXT: i64x2.extract_lane 0 +; CHECK-NEXT: i64.store 0 +; CHECK-NEXT: # fallthrough-return + store <8 x i8> %v, <8 x i8>* %p + ret void +} + define void @store_v8i16_with_folded_offset(<8 x i16> %v, <8 x i16>* %p) { ; CHECK-LABEL: store_v8i16_with_folded_offset: ; CHECK: .functype store_v8i16_with_folded_offset (v128, i32) -> () @@ -933,6 +951,27 @@ ret void } +define void @store_narrowing_v8i16_with_folded_offset(<8 x i8> %v, <8 x i8>* %p) { +; CHECK-LABEL: store_narrowing_v8i16_with_folded_offset: +; CHECK: .functype store_narrowing_v8i16_with_folded_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const 16711935 +; CHECK-NEXT: i32x4.splat +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i8x16.narrow_i16x8_u +; CHECK-NEXT: i64x2.extract_lane 0 +; CHECK-NEXT: i64.store 16 +; CHECK-NEXT: # fallthrough-return + %q = ptrtoint <8 x i8>* %p to i32 + %r = add nuw i32 %q, 16 + %s = inttoptr i32 %r to <8 x i8>* + store <8 x i8> %v , <8 x i8>* %s + ret void +} + define void @store_v8i16_with_folded_gep_offset(<8 x i16> %v, <8 x i16>* %p) { ; CHECK-LABEL: store_v8i16_with_folded_gep_offset: ; CHECK: .functype store_v8i16_with_folded_gep_offset (v128, i32) -> () @@ -946,6 +985,25 @@ ret void } +define void @store_narrowing_v8i16_with_folded_gep_offset(<8 x i8> %v, <8 x i8>* %p) { +; CHECK-LABEL: store_narrowing_v8i16_with_folded_gep_offset: +; CHECK: .functype store_narrowing_v8i16_with_folded_gep_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const 16711935 +; CHECK-NEXT: i32x4.splat +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i8x16.narrow_i16x8_u +; CHECK-NEXT: i64x2.extract_lane 0 +; CHECK-NEXT: i64.store 8 +; CHECK-NEXT: # fallthrough-return + %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1 + store <8 x i8> %v , <8 x i8>* %s + ret void +} + define void @store_v8i16_with_unfolded_gep_negative_offset(<8 x i16> %v, <8 x i16>* %p) { ; CHECK-LABEL: store_v8i16_with_unfolded_gep_negative_offset: ; CHECK: .functype store_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> () @@ -961,6 +1019,27 @@ ret void } +define void @store_narrowing_v8i16_with_unfolded_gep_negative_offset(<8 x i8> %v, <8 x i8>* %p) { +; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_gep_negative_offset: +; CHECK: .functype store_narrowing_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const -8 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i32.const 16711935 +; CHECK-NEXT: i32x4.splat +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i8x16.narrow_i16x8_u +; CHECK-NEXT: i64x2.extract_lane 0 +; CHECK-NEXT: i64.store 0 +; CHECK-NEXT: # fallthrough-return + %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1 + store <8 x i8> %v , <8 x i8>* %s + ret void +} + define void @store_v8i16_with_unfolded_offset(<8 x i16> %v, <8 x i16>* %p) { ; CHECK-LABEL: store_v8i16_with_unfolded_offset: ; CHECK: .functype store_v8i16_with_unfolded_offset (v128, i32) -> () @@ -978,6 +1057,29 @@ ret void } +define void @store_narrowing_v8i16_with_unfolded_offset(<8 x i8> %v, <8 x i8>* %p) { +; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_offset: +; CHECK: .functype store_narrowing_v8i16_with_unfolded_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i32.const 16711935 +; CHECK-NEXT: i32x4.splat +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i8x16.narrow_i16x8_u +; CHECK-NEXT: i64x2.extract_lane 0 +; CHECK-NEXT: i64.store 0 +; CHECK-NEXT: # fallthrough-return + %q = ptrtoint <8 x i8>* %p to i32 + %r = add nsw i32 %q, 16 + %s = inttoptr i32 %r to <8 x i8>* + store <8 x i8> %v , <8 x i8>* %s + ret void +} + define void @store_v8i16_with_unfolded_gep_offset(<8 x i16> %v, <8 x i16>* %p) { ; CHECK-LABEL: store_v8i16_with_unfolded_gep_offset: ; CHECK: .functype store_v8i16_with_unfolded_gep_offset (v128, i32) -> () @@ -993,6 +1095,27 @@ ret void } +define void @store_narrowing_v8i16_with_unfolded_gep_offset(<8 x i8> %v, <8 x i8>* %p) { +; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_gep_offset: +; CHECK: .functype store_narrowing_v8i16_with_unfolded_gep_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const 8 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i32.const 16711935 +; CHECK-NEXT: i32x4.splat +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i8x16.narrow_i16x8_u +; CHECK-NEXT: i64x2.extract_lane 0 +; CHECK-NEXT: i64.store 0 +; CHECK-NEXT: # fallthrough-return + %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1 + store <8 x i8> %v , <8 x i8>* %s + ret void +} + define void @store_v8i16_to_numeric_address(<8 x i16> %v) { ; CHECK-LABEL: store_v8i16_to_numeric_address: ; CHECK: .functype store_v8i16_to_numeric_address (v128) -> () @@ -1006,6 +1129,25 @@ ret void } +define void @store_narrowing_v8i16_to_numeric_address(<8 x i8> %v, <8 x i8>* %p) { +; CHECK-LABEL: store_narrowing_v8i16_to_numeric_address: +; CHECK: .functype store_narrowing_v8i16_to_numeric_address (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32.const 16711935 +; CHECK-NEXT: i32x4.splat +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i8x16.narrow_i16x8_u +; CHECK-NEXT: i64x2.extract_lane 0 +; CHECK-NEXT: i64.store 32 +; CHECK-NEXT: # fallthrough-return + %s = inttoptr i32 32 to <8 x i8>* + store <8 x i8> %v , <8 x i8>* %s + ret void +} + define void @store_v8i16_to_global_address(<8 x i16> %v) { ; CHECK-LABEL: store_v8i16_to_global_address: ; CHECK: .functype store_v8i16_to_global_address (v128) -> () @@ -1018,6 +1160,24 @@ ret void } +define void @store_narrowing_v8i16_to_global_address(<8 x i8> %v) { +; CHECK-LABEL: store_narrowing_v8i16_to_global_address: +; CHECK: .functype store_narrowing_v8i16_to_global_address (v128) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32.const 16711935 +; CHECK-NEXT: i32x4.splat +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i8x16.narrow_i16x8_u +; CHECK-NEXT: i64x2.extract_lane 0 +; CHECK-NEXT: i64.store gv_v8i8 +; CHECK-NEXT: # fallthrough-return + store <8 x i8> %v , <8 x i8>* @gv_v8i8 + ret void +} + ; ============================================================================== ; 4 x i32 ; ============================================================================== @@ -1588,6 +1748,24 @@ ret void } +define void @store_narrowing_v4i32(<4 x i16> %v, <4 x i16>* %p) { +; CHECK-LABEL: store_narrowing_v4i32: +; CHECK: .functype store_narrowing_v4i32 (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const 65535 +; CHECK-NEXT: i32x4.splat +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: i64x2.extract_lane 0 +; CHECK-NEXT: i64.store 0 +; CHECK-NEXT: # fallthrough-return + store <4 x i16> %v , <4 x i16>* %p + ret void +} + define void @store_v4i32_with_folded_offset(<4 x i32> %v, <4 x i32>* %p) { ; CHECK-LABEL: store_v4i32_with_folded_offset: ; CHECK: .functype store_v4i32_with_folded_offset (v128, i32) -> () @@ -1603,6 +1781,27 @@ ret void } +define void @store_narrowing_v4i32_with_folded_offset(<4 x i16> %v, <4 x i16>* %p) { +; CHECK-LABEL: store_narrowing_v4i32_with_folded_offset: +; CHECK: .functype store_narrowing_v4i32_with_folded_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const 65535 +; CHECK-NEXT: i32x4.splat +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: i64x2.extract_lane 0 +; CHECK-NEXT: i64.store 16 +; CHECK-NEXT: # fallthrough-return + %q = ptrtoint <4 x i16>* %p to i32 + %r = add nuw i32 %q, 16 + %s = inttoptr i32 %r to <4 x i16>* + store <4 x i16> %v , <4 x i16>* %s + ret void +} + define void @store_v4i32_with_folded_gep_offset(<4 x i32> %v, <4 x i32>* %p) { ; CHECK-LABEL: store_v4i32_with_folded_gep_offset: ; CHECK: .functype store_v4i32_with_folded_gep_offset (v128, i32) -> () @@ -1616,6 +1815,25 @@ ret void } +define void @store_narrowing_v4i32_with_folded_gep_offset(<4 x i16> %v, <4 x i16>* %p) { +; CHECK-LABEL: store_narrowing_v4i32_with_folded_gep_offset: +; CHECK: .functype store_narrowing_v4i32_with_folded_gep_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const 65535 +; CHECK-NEXT: i32x4.splat +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: i64x2.extract_lane 0 +; CHECK-NEXT: i64.store 8 +; CHECK-NEXT: # fallthrough-return + %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1 + store <4 x i16> %v , <4 x i16>* %s + ret void +} + define void @store_v4i32_with_unfolded_gep_negative_offset(<4 x i32> %v, <4 x i32>* %p) { ; CHECK-LABEL: store_v4i32_with_unfolded_gep_negative_offset: ; CHECK: .functype store_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> () @@ -1631,6 +1849,27 @@ ret void } +define void @store_narrowing_v4i32_with_unfolded_gep_negative_offset(<4 x i16> %v, <4 x i16>* %p) { +; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_gep_negative_offset: +; CHECK: .functype store_narrowing_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const -8 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i32.const 65535 +; CHECK-NEXT: i32x4.splat +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: i64x2.extract_lane 0 +; CHECK-NEXT: i64.store 0 +; CHECK-NEXT: # fallthrough-return + %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1 + store <4 x i16> %v , <4 x i16>* %s + ret void +} + define void @store_v4i32_with_unfolded_offset(<4 x i32> %v, <4 x i32>* %p) { ; CHECK-LABEL: store_v4i32_with_unfolded_offset: ; CHECK: .functype store_v4i32_with_unfolded_offset (v128, i32) -> () @@ -1648,6 +1887,29 @@ ret void } +define void @store_narrowing_v4i32_with_unfolded_offset(<4 x i16> %v, <4 x i16>* %p) { +; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_offset: +; CHECK: .functype store_narrowing_v4i32_with_unfolded_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i32.const 65535 +; CHECK-NEXT: i32x4.splat +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: i64x2.extract_lane 0 +; CHECK-NEXT: i64.store 0 +; CHECK-NEXT: # fallthrough-return + %q = ptrtoint <4 x i16>* %p to i32 + %r = add nsw i32 %q, 16 + %s = inttoptr i32 %r to <4 x i16>* + store <4 x i16> %v , <4 x i16>* %s + ret void +} + define void @store_v4i32_with_unfolded_gep_offset(<4 x i32> %v, <4 x i32>* %p) { ; CHECK-LABEL: store_v4i32_with_unfolded_gep_offset: ; CHECK: .functype store_v4i32_with_unfolded_gep_offset (v128, i32) -> () @@ -1663,6 +1925,27 @@ ret void } +define void @store_narrowing_v4i32_with_unfolded_gep_offset(<4 x i16> %v, <4 x i16>* %p) { +; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_gep_offset: +; CHECK: .functype store_narrowing_v4i32_with_unfolded_gep_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const 8 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i32.const 65535 +; CHECK-NEXT: i32x4.splat +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: i64x2.extract_lane 0 +; CHECK-NEXT: i64.store 0 +; CHECK-NEXT: # fallthrough-return + %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1 + store <4 x i16> %v , <4 x i16>* %s + ret void +} + define void @store_v4i32_to_numeric_address(<4 x i32> %v) { ; CHECK-LABEL: store_v4i32_to_numeric_address: ; CHECK: .functype store_v4i32_to_numeric_address (v128) -> () @@ -1676,6 +1959,25 @@ ret void } +define void @store_narrowing_v4i32_to_numeric_address(<4 x i16> %v) { +; CHECK-LABEL: store_narrowing_v4i32_to_numeric_address: +; CHECK: .functype store_narrowing_v4i32_to_numeric_address (v128) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32.const 65535 +; CHECK-NEXT: i32x4.splat +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: i64x2.extract_lane 0 +; CHECK-NEXT: i64.store 32 +; CHECK-NEXT: # fallthrough-return + %s = inttoptr i32 32 to <4 x i16>* + store <4 x i16> %v , <4 x i16>* %s + ret void +} + define void @store_v4i32_to_global_address(<4 x i32> %v) { ; CHECK-LABEL: store_v4i32_to_global_address: ; CHECK: .functype store_v4i32_to_global_address (v128) -> () @@ -1688,6 +1990,24 @@ ret void } +define void @store_narrowing_v4i32_to_global_address(<4 x i16> %v) { +; CHECK-LABEL: store_narrowing_v4i32_to_global_address: +; CHECK: .functype store_narrowing_v4i32_to_global_address (v128) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32.const 65535 +; CHECK-NEXT: i32x4.splat +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: i64x2.extract_lane 0 +; CHECK-NEXT: i64.store gv_v4i16 +; CHECK-NEXT: # fallthrough-return + store <4 x i16> %v , <4 x i16>* @gv_v4i16 + ret void +} + ; ============================================================================== ; 2 x i64 ; ==============================================================================